Home | History | Annotate | Download | only in platform
      1 /*
      2  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef KURL_h
     27 #define KURL_h
     28 
     29 #include "PlatformString.h"
     30 #include "URLString.h"
     31 #include <wtf/HashMap.h>
     32 
     33 #if USE(CF)
     34 typedef const struct __CFURL* CFURLRef;
     35 #endif
     36 
     37 #if PLATFORM(MAC)
     38 #ifdef __OBJC__
     39 @class NSURL;
     40 #else
     41 class NSURL;
     42 #endif
     43 #endif
     44 
     45 #if PLATFORM(QT)
     46 QT_BEGIN_NAMESPACE
     47 class QUrl;
     48 QT_END_NAMESPACE
     49 #endif
     50 
     51 #if USE(GOOGLEURL)
     52 #include "KURLGooglePrivate.h"
     53 #endif
     54 
     55 #if USE(JSC)
     56 #include <runtime/UString.h>
     57 #endif
     58 
     59 namespace WebCore {
     60 
     61 class TextEncoding;
     62 struct KURLHash;
     63 
     64 typedef HashMap<String, String> ParsedURLParameters;
     65 
     66 enum ParsedURLStringTag { ParsedURLString };
     67 
     68 class KURL {
     69 public:
     70     // Generates a URL which contains a null string.
     71     KURL() { invalidate(); }
     72 
     73     // The argument is an absolute URL string. The string is assumed to be output of KURL::string() called on a valid
     74     // KURL object, or indiscernible from such.
     75     // It is usually best to avoid repeatedly parsing a string, unless memory saving outweigh the possible slow-downs.
     76     KURL(ParsedURLStringTag, const char*);
     77     KURL(ParsedURLStringTag, const String&);
     78     KURL(ParsedURLStringTag, const URLString&);
     79 #if USE(GOOGLEURL)
     80     KURL(WTF::HashTableDeletedValueType) : m_url(WTF::HashTableDeletedValue) { }
     81 #else
     82     KURL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
     83 #endif
     84     bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); }
     85 
     86     // Resolves the relative URL with the given base URL. If provided, the
     87     // TextEncoding is used to encode non-ASCII characers. The base URL can be
     88     // null or empty, in which case the relative URL will be interpreted as
     89     // absolute.
     90     // FIXME: If the base URL is invalid, this always creates an invalid
     91     // URL. Instead I think it would be better to treat all invalid base URLs
     92     // the same way we treate null and empty base URLs.
     93     KURL(const KURL& base, const String& relative);
     94     KURL(const KURL& base, const String& relative, const TextEncoding&);
     95 
     96 
     97 #if USE(GOOGLEURL)
     98     // For conversions for other structures that have already parsed and
     99     // canonicalized the URL. The input must be exactly what KURL would have
    100     // done with the same input.
    101     KURL(const CString& canonicalSpec,
    102          const url_parse::Parsed& parsed, bool isValid);
    103 #endif
    104 
    105     String strippedForUseAsReferrer() const;
    106 
    107     // FIXME: The above functions should be harmonized so that passing a
    108     // base of null or the empty string gives the same result as the
    109     // standard String constructor.
    110 
    111     // Makes a deep copy. Helpful only if you need to use a KURL on another
    112     // thread.  Since the underlying StringImpl objects are immutable, there's
    113     // no other reason to ever prefer copy() over plain old assignment.
    114     KURL copy() const;
    115 
    116     bool isNull() const;
    117     bool isEmpty() const;
    118     bool isValid() const;
    119 
    120     // Returns true if this URL has a path. Note that "http://foo.com/" has a
    121     // path of "/", so this function will return true. Only invalid or
    122     // non-hierarchical (like "javascript:") URLs will have no path.
    123     bool hasPath() const;
    124 
    125     // Returns true if you can set the host and port for the URL.
    126     // Non-hierarchical URLs don't have a host and port.
    127     bool canSetHostOrPort() const { return isHierarchical(); }
    128 
    129     bool canSetPathname() const { return isHierarchical(); }
    130 
    131 #if USE(GOOGLEURL)
    132     const String& string() const { return m_url.string(); }
    133     URLString urlString() const { return URLString(m_url.string()); }
    134 #else
    135     const String& string() const { return m_string; }
    136     URLString urlString() const { return URLString(m_string); }
    137 #endif
    138 
    139     String protocol() const;
    140     String host() const;
    141     unsigned short port() const;
    142     bool hasPort() const;
    143     String user() const;
    144     String pass() const;
    145     String path() const;
    146     String lastPathComponent() const;
    147     String query() const;
    148     String fragmentIdentifier() const;
    149     bool hasFragmentIdentifier() const;
    150 
    151     void copyParsedQueryTo(ParsedURLParameters&) const;
    152 
    153     String baseAsString() const;
    154 
    155     String prettyURL() const;
    156     String fileSystemPath() const;
    157 
    158     // Returns true if the current URL's protocol is the same as the null-
    159     // terminated ASCII argument. The argument must be lower-case.
    160     bool protocolIs(const char*) const;
    161     bool protocolIsData() const { return protocolIs("data"); }
    162     bool protocolInHTTPFamily() const;
    163     bool isLocalFile() const;
    164 
    165     bool setProtocol(const String&);
    166     void setHost(const String&);
    167 
    168     void removePort();
    169     void setPort(unsigned short);
    170 
    171     // Input is like "foo.com" or "foo.com:8000".
    172     void setHostAndPort(const String&);
    173 
    174     void setUser(const String&);
    175     void setPass(const String&);
    176 
    177     // If you pass an empty path for HTTP or HTTPS URLs, the resulting path
    178     // will be "/".
    179     void setPath(const String&);
    180 
    181     // The query may begin with a question mark, or, if not, one will be added
    182     // for you. Setting the query to the empty string will leave a "?" in the
    183     // URL (with nothing after it). To clear the query, pass a null string.
    184     void setQuery(const String&);
    185 
    186     void setFragmentIdentifier(const String&);
    187     void removeFragmentIdentifier();
    188 
    189     friend bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&);
    190 
    191     friend bool protocolHostAndPortAreEqual(const KURL&, const KURL&);
    192 
    193     unsigned hostStart() const;
    194     unsigned hostEnd() const;
    195 
    196     unsigned pathStart() const;
    197     unsigned pathEnd() const;
    198     unsigned pathAfterLastSlash() const;
    199     operator const String&() const { return string(); }
    200 
    201 #if USE(CF)
    202     KURL(CFURLRef);
    203     CFURLRef createCFURL() const;
    204 #endif
    205 
    206 #if PLATFORM(MAC)
    207     KURL(NSURL*);
    208     operator NSURL*() const;
    209 #endif
    210 #ifdef __OBJC__
    211     operator NSString*() const { return string(); }
    212 #endif
    213 
    214 #if PLATFORM(QT)
    215     KURL(const QUrl&);
    216     operator QUrl() const;
    217 #endif
    218 
    219 #if USE(GOOGLEURL)
    220     // Getters for the parsed structure and its corresponding 8-bit string.
    221     const url_parse::Parsed& parsed() const { return m_url.m_parsed; }
    222     const CString& utf8String() const { return m_url.utf8String(); }
    223 #endif
    224 
    225 #ifndef NDEBUG
    226     void print() const;
    227 #endif
    228 
    229 private:
    230     void invalidate();
    231     bool isHierarchical() const;
    232     static bool protocolIs(const String&, const char*);
    233 #if USE(GOOGLEURL)
    234     friend class KURLGooglePrivate;
    235     void parse(const char* url, const String* originalString);  // KURLMac calls this.
    236     void copyToBuffer(Vector<char, 512>& buffer) const;  // KURLCFNet uses this.
    237     KURLGooglePrivate m_url;
    238 #else  // !USE(GOOGLEURL)
    239     void init(const KURL&, const String&, const TextEncoding&);
    240     void copyToBuffer(Vector<char, 512>& buffer) const;
    241 
    242     // Parses the given URL. The originalString parameter allows for an
    243     // optimization: When the source is the same as the fixed-up string,
    244     // it will use the passed-in string instead of allocating a new one.
    245     void parse(const String&);
    246     void parse(const char* url, const String* originalString);
    247 
    248     String m_string;
    249     bool m_isValid : 1;
    250     bool m_protocolInHTTPFamily : 1;
    251 
    252     int m_schemeEnd;
    253     int m_userStart;
    254     int m_userEnd;
    255     int m_passwordEnd;
    256     int m_hostEnd;
    257     int m_portEnd;
    258     int m_pathAfterLastSlash;
    259     int m_pathEnd;
    260     int m_queryEnd;
    261     int m_fragmentEnd;
    262 #endif
    263 };
    264 
    265 bool operator==(const KURL&, const KURL&);
    266 bool operator==(const KURL&, const String&);
    267 bool operator==(const String&, const KURL&);
    268 bool operator!=(const KURL&, const KURL&);
    269 bool operator!=(const KURL&, const String&);
    270 bool operator!=(const String&, const KURL&);
    271 
    272 bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&);
    273 bool protocolHostAndPortAreEqual(const KURL&, const KURL&);
    274 
    275 const KURL& blankURL();
    276 
    277 // Functions to do URL operations on strings.
    278 // These are operations that aren't faster on a parsed URL.
    279 // These are also different from the KURL functions in that they don't require the string to be a valid and parsable URL.
    280 // This is especially important because valid javascript URLs are not necessarily considered valid by KURL.
    281 
    282 bool protocolIs(const String& url, const char* protocol);
    283 bool protocolIsJavaScript(const String& url);
    284 
    285 bool isDefaultPortForProtocol(unsigned short port, const String& protocol);
    286 bool portAllowed(const KURL&); // Blacklist ports that should never be used for Web resources.
    287 
    288 bool isValidProtocol(const String&);
    289 
    290 String mimeTypeFromDataURL(const String& url);
    291 
    292 // Unescapes the given string using URL escaping rules, given an optional
    293 // encoding (defaulting to UTF-8 otherwise). DANGER: If the URL has "%00"
    294 // in it, the resulting string will have embedded null characters!
    295 String decodeURLEscapeSequences(const String&);
    296 String decodeURLEscapeSequences(const String&, const TextEncoding&);
    297 
    298 String encodeWithURLEscapeSequences(const String&);
    299 
    300 // Inlines.
    301 
    302 inline bool operator==(const KURL& a, const KURL& b)
    303 {
    304     return a.string() == b.string();
    305 }
    306 
    307 inline bool operator==(const KURL& a, const String& b)
    308 {
    309     return a.string() == b;
    310 }
    311 
    312 inline bool operator==(const String& a, const KURL& b)
    313 {
    314     return a == b.string();
    315 }
    316 
    317 inline bool operator!=(const KURL& a, const KURL& b)
    318 {
    319     return a.string() != b.string();
    320 }
    321 
    322 inline bool operator!=(const KURL& a, const String& b)
    323 {
    324     return a.string() != b;
    325 }
    326 
    327 inline bool operator!=(const String& a, const KURL& b)
    328 {
    329     return a != b.string();
    330 }
    331 
    332 #if !USE(GOOGLEURL)
    333 
    334 // Inline versions of some non-GoogleURL functions so we can get inlining
    335 // without having to have a lot of ugly ifdefs in the class definition.
    336 
    337 inline bool KURL::isNull() const
    338 {
    339     return m_string.isNull();
    340 }
    341 
    342 inline bool KURL::isEmpty() const
    343 {
    344     return m_string.isEmpty();
    345 }
    346 
    347 inline bool KURL::isValid() const
    348 {
    349     return m_isValid;
    350 }
    351 
    352 inline bool KURL::hasPort() const
    353 {
    354     return m_hostEnd < m_portEnd;
    355 }
    356 
    357 inline bool KURL::protocolInHTTPFamily() const
    358 {
    359     return m_protocolInHTTPFamily;
    360 }
    361 
    362 inline unsigned KURL::hostStart() const
    363 {
    364     return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
    365 }
    366 
    367 inline unsigned KURL::hostEnd() const
    368 {
    369     return m_hostEnd;
    370 }
    371 
    372 inline unsigned KURL::pathStart() const
    373 {
    374     return m_portEnd;
    375 }
    376 
    377 inline unsigned KURL::pathEnd() const
    378 {
    379     return m_pathEnd;
    380 }
    381 
    382 inline unsigned KURL::pathAfterLastSlash() const
    383 {
    384     return m_pathAfterLastSlash;
    385 }
    386 
    387 #endif  // !USE(GOOGLEURL)
    388 
    389 } // namespace WebCore
    390 
    391 namespace WTF {
    392 
    393     // KURLHash is the default hash for String
    394     template<typename T> struct DefaultHash;
    395     template<> struct DefaultHash<WebCore::KURL> {
    396         typedef WebCore::KURLHash Hash;
    397     };
    398 
    399 } // namespace WTF
    400 
    401 #endif // KURL_h
    402