Home | History | Annotate | Download | only in platform
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  *           (C) 1999 Antti Koivisto (koivisto (at) kde.org)
      4  *           (C) 2001 Dirk Mueller (mueller (at) kde.org)
      5  *           (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      6  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Library General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Library General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Library General Public License
     19  * along with this library; see the file COPYING.LIB.  If not, write to
     20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  * Boston, MA 02110-1301, USA.
     22  */
     23 
     24 #include "config.h"
     25 #include "KURL.h"
     26 #include "LinkHash.h"
     27 #include "PlatformString.h"
     28 #include <wtf/text/AtomicString.h>
     29 #include <wtf/text/StringHash.h>
     30 #include <wtf/text/StringImpl.h>
     31 
     32 namespace WebCore {
     33 
     34 static inline size_t findSlashDotDotSlash(const UChar* characters, size_t length, size_t position)
     35 {
     36     if (length < 4)
     37         return notFound;
     38     size_t loopLimit = length - 3;
     39     for (size_t i = position; i < loopLimit; ++i) {
     40         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
     41             return i;
     42     }
     43     return notFound;
     44 }
     45 
     46 static inline size_t findSlashSlash(const UChar* characters, size_t length, size_t position)
     47 {
     48     if (length < 2)
     49         return notFound;
     50     size_t loopLimit = length - 1;
     51     for (size_t i = position; i < loopLimit; ++i) {
     52         if (characters[i] == '/' && characters[i + 1] == '/')
     53             return i;
     54     }
     55     return notFound;
     56 }
     57 
     58 static inline size_t findSlashDotSlash(const UChar* characters, size_t length, size_t position)
     59 {
     60     if (length < 3)
     61         return notFound;
     62     size_t loopLimit = length - 2;
     63     for (size_t i = position; i < loopLimit; ++i) {
     64         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
     65             return i;
     66     }
     67     return notFound;
     68 }
     69 
     70 static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
     71 {
     72     if (length < 3)
     73         return false;
     74     unsigned loopLimit = length - 2;
     75     for (unsigned i = 0; i < loopLimit; ++i) {
     76         if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
     77             return true;
     78     }
     79     return false;
     80 }
     81 
     82 static inline void squeezeOutNullCharacters(Vector<UChar, 512>& string)
     83 {
     84     size_t size = string.size();
     85     size_t i = 0;
     86     for (i = 0; i < size; ++i) {
     87         if (!string[i])
     88             break;
     89     }
     90     if (i == size)
     91         return;
     92     size_t j = i;
     93     for (++i; i < size; ++i) {
     94         if (UChar character = string[i])
     95             string[j++] = character;
     96     }
     97     ASSERT(j < size);
     98     string.shrink(j);
     99 }
    100 
    101 static void cleanSlashDotDotSlashes(Vector<UChar, 512>& path, size_t firstSlash)
    102 {
    103     size_t slash = firstSlash;
    104     do {
    105         size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound;
    106         // Don't remove the host, i.e. http://foo.org/../foo.html
    107         if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) {
    108             path[slash] = 0;
    109             path[slash + 1] = 0;
    110             path[slash + 2] = 0;
    111         } else {
    112             for (size_t i = previousSlash; i < slash + 3; ++i)
    113                 path[i] = 0;
    114         }
    115         slash += 3;
    116     } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound);
    117     squeezeOutNullCharacters(path);
    118 }
    119 
    120 static void mergeDoubleSlashes(Vector<UChar, 512>& path, size_t firstSlash)
    121 {
    122     size_t refPos = find(path.data(), path.size(), '#');
    123     if (!refPos || refPos == notFound)
    124         refPos = path.size();
    125 
    126     size_t slash = firstSlash;
    127     while (slash < refPos) {
    128         if (!slash || path[slash - 1] != ':')
    129             path[slash++] = 0;
    130         else
    131             slash += 2;
    132         if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound)
    133             break;
    134     }
    135     squeezeOutNullCharacters(path);
    136 }
    137 
    138 static void cleanSlashDotSlashes(Vector<UChar, 512>& path, size_t firstSlash)
    139 {
    140     size_t slash = firstSlash;
    141     do {
    142         path[slash] = 0;
    143         path[slash + 1] = 0;
    144         slash += 2;
    145     } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound);
    146     squeezeOutNullCharacters(path);
    147 }
    148 
    149 static inline void cleanPath(Vector<UChar, 512>& path)
    150 {
    151     // FIXME: Should not do this in the query or anchor part of the URL.
    152     size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0);
    153     if (firstSlash != notFound)
    154         cleanSlashDotDotSlashes(path, firstSlash);
    155 
    156     // FIXME: Should not do this in the query part.
    157     firstSlash = findSlashSlash(path.data(), path.size(), 0);
    158     if (firstSlash != notFound)
    159         mergeDoubleSlashes(path, firstSlash);
    160 
    161     // FIXME: Should not do this in the query or anchor part.
    162     firstSlash = findSlashDotSlash(path.data(), path.size(), 0);
    163     if (firstSlash != notFound)
    164         cleanSlashDotSlashes(path, firstSlash);
    165 }
    166 
    167 static inline bool matchLetter(UChar c, UChar lowercaseLetter)
    168 {
    169     return (c | 0x20) == lowercaseLetter;
    170 }
    171 
    172 static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
    173 {
    174     if (length < 6)
    175         return false;
    176     if (!matchLetter(characters[0], 'h')
    177             || !matchLetter(characters[1], 't')
    178             || !matchLetter(characters[2], 't')
    179             || !matchLetter(characters[3], 'p'))
    180         return false;
    181     if (!(characters[4] == ':'
    182             || (matchLetter(characters[4], 's') && characters[5] == ':')))
    183         return false;
    184 
    185     unsigned pos = characters[4] == ':' ? 5 : 6;
    186 
    187     // Skip initial two slashes if present.
    188     if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
    189         pos += 2;
    190 
    191     // Find next slash.
    192     while (pos < length && characters[pos] != '/')
    193         ++pos;
    194 
    195     return pos == length;
    196 }
    197 
    198 static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length)
    199 {
    200     return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length));
    201 }
    202 
    203 LinkHash visitedLinkHash(const UChar* url, unsigned length)
    204 {
    205     return visitedLinkHashInline(url, length);
    206 }
    207 
    208 static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
    209 {
    210     if (attributeURL.isNull())
    211         return;
    212 
    213     const UChar* characters = attributeURL.characters();
    214     unsigned length = attributeURL.length();
    215 
    216     // This is a poor man's completeURL. Faster with less memory allocation.
    217     // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
    218     // For example, it does not handle international domain names properly.
    219 
    220     // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
    221     //    1) The "://" could be in the query or anchor.
    222     //    2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
    223 
    224     // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
    225     // have a query or anchor.
    226 
    227     bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
    228 
    229     if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
    230         buffer.append(attributeURL.characters(), attributeURL.length());
    231         return;
    232     }
    233 
    234 
    235     if (hasColonSlashSlash) {
    236         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
    237         // end of the path, *before* the query or anchor.
    238         buffer.append(characters, length);
    239         buffer.append('/');
    240         return;
    241     }
    242 
    243     if (!length)
    244         buffer.append(base.string().characters(), base.string().length());
    245     else {
    246         switch (characters[0]) {
    247             case '/':
    248                 buffer.append(base.string().characters(), base.pathStart());
    249                 break;
    250             case '#':
    251                 buffer.append(base.string().characters(), base.pathEnd());
    252                 break;
    253             default:
    254                 buffer.append(base.string().characters(), base.pathAfterLastSlash());
    255                 break;
    256         }
    257     }
    258     buffer.append(characters, length);
    259     cleanPath(buffer);
    260     if (needsTrailingSlash(buffer.data(), buffer.size())) {
    261         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
    262         // end of the path, *before* the query or anchor.
    263         buffer.append('/');
    264     }
    265 
    266     return;
    267 }
    268 
    269 void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
    270 {
    271     return visitedURLInline(base, attributeURL, buffer);
    272 }
    273 
    274 LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
    275 {
    276     Vector<UChar, 512> url;
    277     visitedURLInline(base, attributeURL, url);
    278     if (url.isEmpty())
    279         return 0;
    280 
    281     return visitedLinkHashInline(url.data(), url.size());
    282 }
    283 
    284 }  // namespace WebCore
    285