Home | History | Annotate | Download | only in platform
      1 /*
      2  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef KURL_h
     27 #define KURL_h
     28 
     29 #include "PlatformString.h"
     30 
     31 #if PLATFORM(CF)
     32 typedef const struct __CFURL* CFURLRef;
     33 #endif
     34 
     35 #if PLATFORM(MAC)
     36 #ifdef __OBJC__
     37 @class NSURL;
     38 #else
     39 class NSURL;
     40 #endif
     41 #endif
     42 
     43 #if PLATFORM(QT)
     44 QT_BEGIN_NAMESPACE
     45 class QUrl;
     46 QT_END_NAMESPACE
     47 #endif
     48 
     49 #if USE(GOOGLEURL)
     50 #include "KURLGooglePrivate.h"
     51 #endif
     52 
     53 #if USE(JSC)
     54 #include <runtime/UString.h>
     55 #endif
     56 
     57 namespace WebCore {
     58 
     59 class TextEncoding;
     60 struct KURLHash;
     61 
     62 enum ParsedURLStringTag { ParsedURLString };
     63 
     64 class KURL {
     65 public:
     66     // Generates a URL which contains a null string.
     67     KURL() { invalidate(); }
     68 
     69     // The argument is an absolute URL string. The string is assumed to be output of KURL::string() called on a valid
     70     // KURL object, or indiscernible from such.
     71     // It is usually best to avoid repeatedly parsing a string, unless memory saving outweigh the possible slow-downs.
     72     KURL(ParsedURLStringTag, const char*);
     73     KURL(ParsedURLStringTag, const String&);
     74 
     75     // Resolves the relative URL with the given base URL. If provided, the
     76     // TextEncoding is used to encode non-ASCII characers. The base URL can be
     77     // null or empty, in which case the relative URL will be interpreted as
     78     // absolute.
     79     // FIXME: If the base URL is invalid, this always creates an invalid
     80     // URL. Instead I think it would be better to treat all invalid base URLs
     81     // the same way we treate null and empty base URLs.
     82     KURL(const KURL& base, const String& relative);
     83     KURL(const KURL& base, const String& relative, const TextEncoding&);
     84 
     85 
     86 #if USE(GOOGLEURL)
     87     // For conversions for other structures that have already parsed and
     88     // canonicalized the URL. The input must be exactly what KURL would have
     89     // done with the same input.
     90     KURL(const CString& canonicalSpec,
     91          const url_parse::Parsed& parsed, bool isValid);
     92 #endif
     93 
     94     // FIXME: The above functions should be harmonized so that passing a
     95     // base of null or the empty string gives the same result as the
     96     // standard String constructor.
     97 
     98     // Makes a deep copy. Helpful only if you need to use a KURL on another
     99     // thread.  Since the underlying StringImpl objects are immutable, there's
    100     // no other reason to ever prefer copy() over plain old assignment.
    101     KURL copy() const;
    102 
    103     bool isNull() const;
    104     bool isEmpty() const;
    105     bool isValid() const;
    106 
    107     // Returns true if this URL has a path. Note that "http://foo.com/" has a
    108     // path of "/", so this function will return true. Only invalid or
    109     // non-hierarchical (like "javascript:") URLs will have no path.
    110     bool hasPath() const;
    111 
    112     // Returns true if you can set the host and port for the URL.
    113     // Non-hierarchical URLs don't have a host and port.
    114     bool canSetHostOrPort() const { return isHierarchical(); }
    115 
    116     bool canSetPathname() const { return isHierarchical(); }
    117 
    118 #if USE(GOOGLEURL)
    119     const String& string() const { return m_url.string(); }
    120 #else
    121     const String& string() const { return m_string; }
    122 #endif
    123 
    124     String protocol() const;
    125     String host() const;
    126     unsigned short port() const;
    127     bool hasPort() const;
    128     String user() const;
    129     String pass() const;
    130     String path() const;
    131     String lastPathComponent() const;
    132     String query() const;
    133     String fragmentIdentifier() const;
    134     bool hasFragmentIdentifier() const;
    135 
    136     String baseAsString() const;
    137 
    138     String prettyURL() const;
    139     String fileSystemPath() const;
    140 
    141     // Returns true if the current URL's protocol is the same as the null-
    142     // terminated ASCII argument. The argument must be lower-case.
    143     bool protocolIs(const char*) const;
    144     bool protocolInHTTPFamily() const;
    145     bool isLocalFile() const;
    146 
    147     bool setProtocol(const String&);
    148     void setHost(const String&);
    149 
    150     void removePort();
    151     void setPort(unsigned short);
    152 
    153     // Input is like "foo.com" or "foo.com:8000".
    154     void setHostAndPort(const String&);
    155 
    156     void setUser(const String&);
    157     void setPass(const String&);
    158 
    159     // If you pass an empty path for HTTP or HTTPS URLs, the resulting path
    160     // will be "/".
    161     void setPath(const String&);
    162 
    163     // The query may begin with a question mark, or, if not, one will be added
    164     // for you. Setting the query to the empty string will leave a "?" in the
    165     // URL (with nothing after it). To clear the query, pass a null string.
    166     void setQuery(const String&);
    167 
    168     void setFragmentIdentifier(const String&);
    169     void removeFragmentIdentifier();
    170 
    171     friend bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&);
    172 
    173     friend bool protocolHostAndPortAreEqual(const KURL&, const KURL&);
    174 
    175     unsigned hostStart() const;
    176     unsigned hostEnd() const;
    177 
    178     unsigned pathStart() const;
    179     unsigned pathEnd() const;
    180     unsigned pathAfterLastSlash() const;
    181     operator const String&() const { return string(); }
    182 #if USE(JSC)
    183     operator JSC::UString() const { return string(); }
    184 #endif
    185 
    186 #if PLATFORM(CF)
    187     KURL(CFURLRef);
    188     CFURLRef createCFURL() const;
    189 #endif
    190 
    191 #if PLATFORM(MAC)
    192     KURL(NSURL*);
    193     operator NSURL*() const;
    194 #endif
    195 #ifdef __OBJC__
    196     operator NSString*() const { return string(); }
    197 #endif
    198 
    199 #if PLATFORM(QT)
    200     KURL(const QUrl&);
    201     operator QUrl() const;
    202 #endif
    203 
    204 #if USE(GOOGLEURL)
    205     // Getters for the parsed structure and its corresponding 8-bit string.
    206     const url_parse::Parsed& parsed() const { return m_url.m_parsed; }
    207     const CString& utf8String() const { return m_url.utf8String(); }
    208 #endif
    209 
    210 #ifndef NDEBUG
    211     void print() const;
    212 #endif
    213 
    214 private:
    215     void invalidate();
    216     bool isHierarchical() const;
    217     static bool protocolIs(const String&, const char*);
    218 #if USE(GOOGLEURL)
    219     friend class KURLGooglePrivate;
    220     void parse(const char* url, const String* originalString);  // KURLMac calls this.
    221     void copyToBuffer(Vector<char, 512>& buffer) const;  // KURLCFNet uses this.
    222     KURLGooglePrivate m_url;
    223 #else  // !USE(GOOGLEURL)
    224     void init(const KURL&, const String&, const TextEncoding&);
    225     void copyToBuffer(Vector<char, 512>& buffer) const;
    226 
    227     // Parses the given URL. The originalString parameter allows for an
    228     // optimization: When the source is the same as the fixed-up string,
    229     // it will use the passed-in string instead of allocating a new one.
    230     void parse(const String&);
    231     void parse(const char* url, const String* originalString);
    232 
    233     String m_string;
    234     bool m_isValid : 1;
    235     bool m_protocolInHTTPFamily : 1;
    236 
    237     int m_schemeEnd;
    238     int m_userStart;
    239     int m_userEnd;
    240     int m_passwordEnd;
    241     int m_hostEnd;
    242     int m_portEnd;
    243     int m_pathAfterLastSlash;
    244     int m_pathEnd;
    245     int m_queryEnd;
    246     int m_fragmentEnd;
    247 #endif
    248 };
    249 
    250 bool operator==(const KURL&, const KURL&);
    251 bool operator==(const KURL&, const String&);
    252 bool operator==(const String&, const KURL&);
    253 bool operator!=(const KURL&, const KURL&);
    254 bool operator!=(const KURL&, const String&);
    255 bool operator!=(const String&, const KURL&);
    256 
    257 bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&);
    258 bool protocolHostAndPortAreEqual(const KURL&, const KURL&);
    259 
    260 const KURL& blankURL();
    261 
    262 // Functions to do URL operations on strings.
    263 // These are operations that aren't faster on a parsed URL.
    264 // These are also different from the KURL functions in that they don't require the string to be a valid and parsable URL.
    265 // This is especially important because valid javascript URLs are not necessarily considered valid by KURL.
    266 
    267 bool protocolIs(const String& url, const char* protocol);
    268 bool protocolIsJavaScript(const String& url);
    269 
    270 bool isDefaultPortForProtocol(unsigned short port, const String& protocol);
    271 bool portAllowed(const KURL&); // Blacklist ports that should never be used for Web resources.
    272 
    273 String mimeTypeFromDataURL(const String& url);
    274 
    275 // Unescapes the given string using URL escaping rules, given an optional
    276 // encoding (defaulting to UTF-8 otherwise). DANGER: If the URL has "%00"
    277 // in it, the resulting string will have embedded null characters!
    278 String decodeURLEscapeSequences(const String&);
    279 String decodeURLEscapeSequences(const String&, const TextEncoding&);
    280 
    281 String encodeWithURLEscapeSequences(const String&);
    282 
    283 // Inlines.
    284 
    285 inline bool operator==(const KURL& a, const KURL& b)
    286 {
    287     return a.string() == b.string();
    288 }
    289 
    290 inline bool operator==(const KURL& a, const String& b)
    291 {
    292     return a.string() == b;
    293 }
    294 
    295 inline bool operator==(const String& a, const KURL& b)
    296 {
    297     return a == b.string();
    298 }
    299 
    300 inline bool operator!=(const KURL& a, const KURL& b)
    301 {
    302     return a.string() != b.string();
    303 }
    304 
    305 inline bool operator!=(const KURL& a, const String& b)
    306 {
    307     return a.string() != b;
    308 }
    309 
    310 inline bool operator!=(const String& a, const KURL& b)
    311 {
    312     return a != b.string();
    313 }
    314 
    315 #if !USE(GOOGLEURL)
    316 
    317 // Inline versions of some non-GoogleURL functions so we can get inlining
    318 // without having to have a lot of ugly ifdefs in the class definition.
    319 
    320 inline bool KURL::isNull() const
    321 {
    322     return m_string.isNull();
    323 }
    324 
    325 inline bool KURL::isEmpty() const
    326 {
    327     return m_string.isEmpty();
    328 }
    329 
    330 inline bool KURL::isValid() const
    331 {
    332     return m_isValid;
    333 }
    334 
    335 inline bool KURL::hasPort() const
    336 {
    337     return m_hostEnd < m_portEnd;
    338 }
    339 
    340 inline bool KURL::protocolInHTTPFamily() const
    341 {
    342     return m_protocolInHTTPFamily;
    343 }
    344 
    345 inline unsigned KURL::hostStart() const
    346 {
    347     return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
    348 }
    349 
    350 inline unsigned KURL::hostEnd() const
    351 {
    352     return m_hostEnd;
    353 }
    354 
    355 inline unsigned KURL::pathStart() const
    356 {
    357     return m_portEnd;
    358 }
    359 
    360 inline unsigned KURL::pathEnd() const
    361 {
    362     return m_pathEnd;
    363 }
    364 
    365 inline unsigned KURL::pathAfterLastSlash() const
    366 {
    367     return m_pathAfterLastSlash;
    368 }
    369 
    370 #endif  // !USE(GOOGLEURL)
    371 
    372 } // namespace WebCore
    373 
    374 namespace WTF {
    375 
    376     // KURLHash is the default hash for String
    377     template<typename T> struct DefaultHash;
    378     template<> struct DefaultHash<WebCore::KURL> {
    379         typedef WebCore::KURLHash Hash;
    380     };
    381 
    382 } // namespace WTF
    383 
    384 #endif // KURL_h
    385