1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef URL_URL_UTIL_H_ 6 #define URL_URL_UTIL_H_ 7 8 #include <string> 9 10 #include "base/strings/string16.h" 11 #include "url/url_canon.h" 12 #include "url/url_export.h" 13 #include "url/url_parse.h" 14 15 namespace url_util { 16 17 // Init ------------------------------------------------------------------------ 18 19 // Initialization is NOT required, it will be implicitly initialized when first 20 // used. However, this implicit initialization is NOT threadsafe. If you are 21 // using this library in a threaded environment and don't have a consistent 22 // "first call" (an example might be calling "AddStandardScheme" with your 23 // special application-specific schemes) then you will want to call initialize 24 // before spawning any threads. 25 // 26 // It is OK to call this function more than once, subsequent calls will simply 27 // "noop", unless Shutdown() was called in the mean time. This will also be a 28 // "noop" if other calls to the library have forced an initialization 29 // beforehand. 30 URL_EXPORT void Initialize(); 31 32 // Cleanup is not required, except some strings may leak. For most user 33 // applications, this is fine. If you're using it in a library that may get 34 // loaded and unloaded, you'll want to unload to properly clean up your 35 // library. 36 URL_EXPORT void Shutdown(); 37 38 // Schemes -------------------------------------------------------------------- 39 40 // Adds an application-defined scheme to the internal list of "standard" URL 41 // schemes. This function is not threadsafe and can not be called concurrently 42 // with any other url_util function. It will assert if the list of standard 43 // schemes has been locked (see LockStandardSchemes). 44 URL_EXPORT void AddStandardScheme(const char* new_scheme); 45 46 // Sets a flag to prevent future calls to AddStandardScheme from succeeding. 47 // 48 // This is designed to help prevent errors for multithreaded applications. 49 // Normal usage would be to call AddStandardScheme for your custom schemes at 50 // the beginning of program initialization, and then LockStandardSchemes. This 51 // prevents future callers from mistakenly calling AddStandardScheme when the 52 // program is running with multiple threads, where such usage would be 53 // dangerous. 54 // 55 // We could have had AddStandardScheme use a lock instead, but that would add 56 // some platform-specific dependencies we don't otherwise have now, and is 57 // overkill considering the normal usage is so simple. 58 URL_EXPORT void LockStandardSchemes(); 59 60 // Locates the scheme in the given string and places it into |found_scheme|, 61 // which may be NULL to indicate the caller does not care about the range. 62 // 63 // Returns whether the given |compare| scheme matches the scheme found in the 64 // input (if any). The |compare| scheme must be a valid canonical scheme or 65 // the result of the comparison is undefined. 66 URL_EXPORT bool FindAndCompareScheme(const char* str, 67 int str_len, 68 const char* compare, 69 url_parse::Component* found_scheme); 70 URL_EXPORT bool FindAndCompareScheme(const base::char16* str, 71 int str_len, 72 const char* compare, 73 url_parse::Component* found_scheme); 74 inline bool FindAndCompareScheme(const std::string& str, 75 const char* compare, 76 url_parse::Component* found_scheme) { 77 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), 78 compare, found_scheme); 79 } 80 inline bool FindAndCompareScheme(const base::string16& str, 81 const char* compare, 82 url_parse::Component* found_scheme) { 83 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), 84 compare, found_scheme); 85 } 86 87 // Returns true if the given string represents a standard URL. This means that 88 // either the scheme is in the list of known standard schemes. 89 URL_EXPORT bool IsStandard(const char* spec, 90 const url_parse::Component& scheme); 91 URL_EXPORT bool IsStandard(const base::char16* spec, 92 const url_parse::Component& scheme); 93 94 // TODO(brettw) remove this. This is a temporary compatibility hack to avoid 95 // breaking the WebKit build when this version is synced via Chrome. 96 inline bool IsStandard(const char* spec, int spec_len, 97 const url_parse::Component& scheme) { 98 return IsStandard(spec, scheme); 99 } 100 101 // URL library wrappers ------------------------------------------------------- 102 103 // Parses the given spec according to the extracted scheme type. Normal users 104 // should use the URL object, although this may be useful if performance is 105 // critical and you don't want to do the heap allocation for the std::string. 106 // 107 // As with the url_canon::Canonicalize* functions, the charset converter can 108 // be NULL to use UTF-8 (it will be faster in this case). 109 // 110 // Returns true if a valid URL was produced, false if not. On failure, the 111 // output and parsed structures will still be filled and will be consistent, 112 // but they will not represent a loadable URL. 113 URL_EXPORT bool Canonicalize(const char* spec, 114 int spec_len, 115 url_canon::CharsetConverter* charset_converter, 116 url_canon::CanonOutput* output, 117 url_parse::Parsed* output_parsed); 118 URL_EXPORT bool Canonicalize(const base::char16* spec, 119 int spec_len, 120 url_canon::CharsetConverter* charset_converter, 121 url_canon::CanonOutput* output, 122 url_parse::Parsed* output_parsed); 123 124 // Resolves a potentially relative URL relative to the given parsed base URL. 125 // The base MUST be valid. The resulting canonical URL and parsed information 126 // will be placed in to the given out variables. 127 // 128 // The relative need not be relative. If we discover that it's absolute, this 129 // will produce a canonical version of that URL. See Canonicalize() for more 130 // about the charset_converter. 131 // 132 // Returns true if the output is valid, false if the input could not produce 133 // a valid URL. 134 URL_EXPORT bool ResolveRelative(const char* base_spec, 135 int base_spec_len, 136 const url_parse::Parsed& base_parsed, 137 const char* relative, 138 int relative_length, 139 url_canon::CharsetConverter* charset_converter, 140 url_canon::CanonOutput* output, 141 url_parse::Parsed* output_parsed); 142 URL_EXPORT bool ResolveRelative(const char* base_spec, 143 int base_spec_len, 144 const url_parse::Parsed& base_parsed, 145 const base::char16* relative, 146 int relative_length, 147 url_canon::CharsetConverter* charset_converter, 148 url_canon::CanonOutput* output, 149 url_parse::Parsed* output_parsed); 150 151 // Replaces components in the given VALID input url. The new canonical URL info 152 // is written to output and out_parsed. 153 // 154 // Returns true if the resulting URL is valid. 155 URL_EXPORT bool ReplaceComponents( 156 const char* spec, 157 int spec_len, 158 const url_parse::Parsed& parsed, 159 const url_canon::Replacements<char>& replacements, 160 url_canon::CharsetConverter* charset_converter, 161 url_canon::CanonOutput* output, 162 url_parse::Parsed* out_parsed); 163 URL_EXPORT bool ReplaceComponents( 164 const char* spec, 165 int spec_len, 166 const url_parse::Parsed& parsed, 167 const url_canon::Replacements<base::char16>& replacements, 168 url_canon::CharsetConverter* charset_converter, 169 url_canon::CanonOutput* output, 170 url_parse::Parsed* out_parsed); 171 172 // String helper functions ---------------------------------------------------- 173 174 // Compare the lower-case form of the given string against the given ASCII 175 // string. This is useful for doing checking if an input string matches some 176 // token, and it is optimized to avoid intermediate string copies. 177 // 178 // The versions of this function that don't take a b_end assume that the b 179 // string is NULL terminated. 180 URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 181 const char* a_end, 182 const char* b); 183 URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 184 const char* a_end, 185 const char* b_begin, 186 const char* b_end); 187 URL_EXPORT bool LowerCaseEqualsASCII(const base::char16* a_begin, 188 const base::char16* a_end, 189 const char* b); 190 191 // Unescapes the given string using URL escaping rules. 192 URL_EXPORT void DecodeURLEscapeSequences(const char* input, int length, 193 url_canon::CanonOutputW* output); 194 195 // Escapes the given string as defined by the JS method encodeURIComponent. See 196 // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent 197 URL_EXPORT void EncodeURIComponent(const char* input, int length, 198 url_canon::CanonOutput* output); 199 200 201 } // namespace url_util 202 203 #endif // URL_URL_UTIL_H_ 204