Home | History | Annotate | Download | only in platform
      1 /*
      2  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef KURL_h
     27 #define KURL_h
     28 
     29 #include "PlatformString.h"
     30 #include "URLString.h"
     31 #include <wtf/HashMap.h>
     32 
     33 #if USE(CF)
     34 typedef const struct __CFURL* CFURLRef;
     35 #endif
     36 
     37 #if PLATFORM(MAC)
     38 #ifdef __OBJC__
     39 @class NSURL;
     40 #else
     41 class NSURL;
     42 #endif
     43 #endif
     44 
     45 #if PLATFORM(QT)
     46 QT_BEGIN_NAMESPACE
     47 class QUrl;
     48 QT_END_NAMESPACE
     49 #endif
     50 
     51 #if USE(GOOGLEURL)
     52 #include "KURLGooglePrivate.h"
     53 #endif
     54 
     55 #if USE(JSC)
     56 #include <runtime/UString.h>
     57 #endif
     58 
     59 namespace WebCore {
     60 
     61 class TextEncoding;
     62 struct KURLHash;
     63 
     64 typedef HashMap<String, String> ParsedURLParameters;
     65 
     66 enum ParsedURLStringTag { ParsedURLString };
     67 
     68 class KURL {
     69 public:
     70     // Generates a URL which contains a null string.
     71     KURL() { invalidate(); }
     72 
     73     // The argument is an absolute URL string. The string is assumed to be output of KURL::string() called on a valid
     74     // KURL object, or indiscernible from such.
     75     // It is usually best to avoid repeatedly parsing a string, unless memory saving outweigh the possible slow-downs.
     76     KURL(ParsedURLStringTag, const char*);
     77     KURL(ParsedURLStringTag, const String&);
     78     KURL(ParsedURLStringTag, const URLString&);
     79 #if USE(GOOGLEURL)
     80     KURL(WTF::HashTableDeletedValueType) : m_url(WTF::HashTableDeletedValue) { }
     81 #else
     82     KURL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
     83 #endif
     84     bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); }
     85 
     86     // Resolves the relative URL with the given base URL. If provided, the
     87     // TextEncoding is used to encode non-ASCII characers. The base URL can be
     88     // null or empty, in which case the relative URL will be interpreted as
     89     // absolute.
     90     // FIXME: If the base URL is invalid, this always creates an invalid
     91     // URL. Instead I think it would be better to treat all invalid base URLs
     92     // the same way we treate null and empty base URLs.
     93     KURL(const KURL& base, const String& relative);
     94     KURL(const KURL& base, const String& relative, const TextEncoding&);
     95 
     96 
     97 #if USE(GOOGLEURL)
     98     // For conversions for other structures that have already parsed and
     99     // canonicalized the URL. The input must be exactly what KURL would have
    100     // done with the same input.
    101     KURL(const CString& canonicalSpec,
    102          const url_parse::Parsed& parsed, bool isValid);
    103 #endif
    104 
    105     String strippedForUseAsReferrer() const;
    106 
    107     // FIXME: The above functions should be harmonized so that passing a
    108     // base of null or the empty string gives the same result as the
    109     // standard String constructor.
    110 
    111     // Makes a deep copy. Helpful only if you need to use a KURL on another
    112     // thread.  Since the underlying StringImpl objects are immutable, there's
    113     // no other reason to ever prefer copy() over plain old assignment.
    114     KURL copy() const;
    115 
    116     bool isNull() const;
    117     bool isEmpty() const;
    118     bool isValid() const;
    119 
    120     // Returns true if this URL has a path. Note that "http://foo.com/" has a
    121     // path of "/", so this function will return true. Only invalid or
    122     // non-hierarchical (like "javascript:") URLs will have no path.
    123     bool hasPath() const;
    124 
    125     // Returns true if you can set the host and port for the URL.
    126     // Non-hierarchical URLs don't have a host and port.
    127     bool canSetHostOrPort() const { return isHierarchical(); }
    128 
    129     bool canSetPathname() const { return isHierarchical(); }
    130 
    131 #if USE(GOOGLEURL)
    132     const String& string() const { return m_url.string(); }
    133     URLString urlString() const { return URLString(m_url.string()); }
    134 #else
    135     const String& string() const { return m_string; }
    136     URLString urlString() const { return URLString(m_string); }
    137 #endif
    138 
    139     String protocol() const;
    140     String host() const;
    141     unsigned short port() const;
    142     bool hasPort() const;
    143     String user() const;
    144     String pass() const;
    145     String path() const;
    146     String lastPathComponent() const;
    147     String query() const;
    148     String fragmentIdentifier() const;
    149     bool hasFragmentIdentifier() const;
    150 
    151     void copyParsedQueryTo(ParsedURLParameters&) const;
    152 
    153     String baseAsString() const;
    154 
    155     // This function is only used by location.href. It's likely we shouldn't
    156     // use it for that purpose, but more study is necessary before we remove it.
    157     String deprecatedString() const;
    158     String fileSystemPath() const;
    159 
    160     // Returns true if the current URL's protocol is the same as the null-
    161     // terminated ASCII argument. The argument must be lower-case.
    162     bool protocolIs(const char*) const;
    163     bool protocolIsData() const { return protocolIs("data"); }
    164     bool protocolInHTTPFamily() const;
    165     bool isLocalFile() const;
    166 
    167     bool setProtocol(const String&);
    168     void setHost(const String&);
    169 
    170     void removePort();
    171     void setPort(unsigned short);
    172 
    173     // Input is like "foo.com" or "foo.com:8000".
    174     void setHostAndPort(const String&);
    175 
    176     void setUser(const String&);
    177     void setPass(const String&);
    178 
    179     // If you pass an empty path for HTTP or HTTPS URLs, the resulting path
    180     // will be "/".
    181     void setPath(const String&);
    182 
    183     // The query may begin with a question mark, or, if not, one will be added
    184     // for you. Setting the query to the empty string will leave a "?" in the
    185     // URL (with nothing after it). To clear the query, pass a null string.
    186     void setQuery(const String&);
    187 
    188     void setFragmentIdentifier(const String&);
    189     void removeFragmentIdentifier();
    190 
    191     friend bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&);
    192 
    193     friend bool protocolHostAndPortAreEqual(const KURL&, const KURL&);
    194 
    195     unsigned hostStart() const;
    196     unsigned hostEnd() const;
    197 
    198     unsigned pathStart() const;
    199     unsigned pathEnd() const;
    200     unsigned pathAfterLastSlash() const;
    201     operator const String&() const { return string(); }
    202 
    203 #if USE(CF)
    204     KURL(CFURLRef);
    205     CFURLRef createCFURL() const;
    206 #endif
    207 
    208 #if PLATFORM(MAC)
    209     KURL(NSURL*);
    210     operator NSURL*() const;
    211 #endif
    212 #ifdef __OBJC__
    213     operator NSString*() const { return string(); }
    214 #endif
    215 
    216 #if PLATFORM(QT)
    217     KURL(const QUrl&);
    218     operator QUrl() const;
    219 #endif
    220 
    221 #if USE(GOOGLEURL)
    222     // Getters for the parsed structure and its corresponding 8-bit string.
    223     const url_parse::Parsed& parsed() const { return m_url.m_parsed; }
    224     const CString& utf8String() const { return m_url.utf8String(); }
    225 #endif
    226 
    227 #ifndef NDEBUG
    228     void print() const;
    229 #endif
    230 
    231 private:
    232     void invalidate();
    233     bool isHierarchical() const;
    234     static bool protocolIs(const String&, const char*);
    235 #if USE(GOOGLEURL)
    236     friend class KURLGooglePrivate;
    237     void parse(const char* url, const String* originalString);  // KURLMac calls this.
    238     void copyToBuffer(Vector<char, 512>& buffer) const;  // KURLCFNet uses this.
    239     KURLGooglePrivate m_url;
    240 #else  // !USE(GOOGLEURL)
    241     void init(const KURL&, const String&, const TextEncoding&);
    242     void copyToBuffer(Vector<char, 512>& buffer) const;
    243 
    244     // Parses the given URL. The originalString parameter allows for an
    245     // optimization: When the source is the same as the fixed-up string,
    246     // it will use the passed-in string instead of allocating a new one.
    247     void parse(const String&);
    248     void parse(const char* url, const String* originalString);
    249 
    250     String m_string;
    251     bool m_isValid : 1;
    252     bool m_protocolInHTTPFamily : 1;
    253 
    254     int m_schemeEnd;
    255     int m_userStart;
    256     int m_userEnd;
    257     int m_passwordEnd;
    258     int m_hostEnd;
    259     int m_portEnd;
    260     int m_pathAfterLastSlash;
    261     int m_pathEnd;
    262     int m_queryEnd;
    263     int m_fragmentEnd;
    264 #endif
    265 };
    266 
    267 bool operator==(const KURL&, const KURL&);
    268 bool operator==(const KURL&, const String&);
    269 bool operator==(const String&, const KURL&);
    270 bool operator!=(const KURL&, const KURL&);
    271 bool operator!=(const KURL&, const String&);
    272 bool operator!=(const String&, const KURL&);
    273 
    274 bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&);
    275 bool protocolHostAndPortAreEqual(const KURL&, const KURL&);
    276 
    277 const KURL& blankURL();
    278 
    279 // Functions to do URL operations on strings.
    280 // These are operations that aren't faster on a parsed URL.
    281 // These are also different from the KURL functions in that they don't require the string to be a valid and parsable URL.
    282 // This is especially important because valid javascript URLs are not necessarily considered valid by KURL.
    283 
    284 bool protocolIs(const String& url, const char* protocol);
    285 bool protocolIsJavaScript(const String& url);
    286 
    287 bool isDefaultPortForProtocol(unsigned short port, const String& protocol);
    288 bool portAllowed(const KURL&); // Blacklist ports that should never be used for Web resources.
    289 
    290 bool isValidProtocol(const String&);
    291 
    292 String mimeTypeFromDataURL(const String& url);
    293 
    294 // Unescapes the given string using URL escaping rules, given an optional
    295 // encoding (defaulting to UTF-8 otherwise). DANGER: If the URL has "%00"
    296 // in it, the resulting string will have embedded null characters!
    297 String decodeURLEscapeSequences(const String&);
    298 String decodeURLEscapeSequences(const String&, const TextEncoding&);
    299 
    300 String encodeWithURLEscapeSequences(const String&);
    301 
    302 // Inlines.
    303 
    304 inline bool operator==(const KURL& a, const KURL& b)
    305 {
    306     return a.string() == b.string();
    307 }
    308 
    309 inline bool operator==(const KURL& a, const String& b)
    310 {
    311     return a.string() == b;
    312 }
    313 
    314 inline bool operator==(const String& a, const KURL& b)
    315 {
    316     return a == b.string();
    317 }
    318 
    319 inline bool operator!=(const KURL& a, const KURL& b)
    320 {
    321     return a.string() != b.string();
    322 }
    323 
    324 inline bool operator!=(const KURL& a, const String& b)
    325 {
    326     return a.string() != b;
    327 }
    328 
    329 inline bool operator!=(const String& a, const KURL& b)
    330 {
    331     return a != b.string();
    332 }
    333 
    334 #if !USE(GOOGLEURL)
    335 
    336 // Inline versions of some non-GoogleURL functions so we can get inlining
    337 // without having to have a lot of ugly ifdefs in the class definition.
    338 
    339 inline bool KURL::isNull() const
    340 {
    341     return m_string.isNull();
    342 }
    343 
    344 inline bool KURL::isEmpty() const
    345 {
    346     return m_string.isEmpty();
    347 }
    348 
    349 inline bool KURL::isValid() const
    350 {
    351     return m_isValid;
    352 }
    353 
    354 inline bool KURL::hasPort() const
    355 {
    356     return m_hostEnd < m_portEnd;
    357 }
    358 
    359 inline bool KURL::protocolInHTTPFamily() const
    360 {
    361     return m_protocolInHTTPFamily;
    362 }
    363 
    364 inline unsigned KURL::hostStart() const
    365 {
    366     return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
    367 }
    368 
    369 inline unsigned KURL::hostEnd() const
    370 {
    371     return m_hostEnd;
    372 }
    373 
    374 inline unsigned KURL::pathStart() const
    375 {
    376     return m_portEnd;
    377 }
    378 
    379 inline unsigned KURL::pathEnd() const
    380 {
    381     return m_pathEnd;
    382 }
    383 
    384 inline unsigned KURL::pathAfterLastSlash() const
    385 {
    386     return m_pathAfterLastSlash;
    387 }
    388 
    389 #endif  // !USE(GOOGLEURL)
    390 
    391 } // namespace WebCore
    392 
    393 namespace WTF {
    394 
    395     // KURLHash is the default hash for String
    396     template<typename T> struct DefaultHash;
    397     template<> struct DefaultHash<WebCore::KURL> {
    398         typedef WebCore::KURLHash Hash;
    399     };
    400 
    401 } // namespace WTF
    402 
    403 #endif // KURL_h
    404