1 // Copyright 2007, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 #ifndef GOOGLEURL_SRC_URL_UTIL_H__ 31 #define GOOGLEURL_SRC_URL_UTIL_H__ 32 33 #include <string> 34 35 #include "base/string16.h" 36 #include "googleurl/src/url_parse.h" 37 #include "googleurl/src/url_canon.h" 38 39 namespace url_util { 40 41 // Schemes -------------------------------------------------------------------- 42 43 // Adds an application-defined scheme to the internal list of "standard" URL 44 // schemes. 45 void AddStandardScheme(const char* new_scheme); 46 47 // Locates the scheme in the given string and places it into |found_scheme|, 48 // which may be NULL to indicate the caller does not care about the range. 49 // Returns whether the given |compare| scheme matches the scheme found in the 50 // input (if any). 51 bool FindAndCompareScheme(const char* str, 52 int str_len, 53 const char* compare, 54 url_parse::Component* found_scheme); 55 bool FindAndCompareScheme(const char16* str, 56 int str_len, 57 const char* compare, 58 url_parse::Component* found_scheme); 59 inline bool FindAndCompareScheme(const std::string& str, 60 const char* compare, 61 url_parse::Component* found_scheme) { 62 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), 63 compare, found_scheme); 64 } 65 inline bool FindAndCompareScheme(const string16& str, 66 const char* compare, 67 url_parse::Component* found_scheme) { 68 return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), 69 compare, found_scheme); 70 } 71 72 // Returns true if the given string represents a standard URL. This means that 73 // either the scheme is in the list of known standard schemes, or there is a 74 // "://" following the scheme. 75 bool IsStandard(const char* spec, int spec_len, 76 const url_parse::Component& scheme); 77 bool IsStandard(const char16* spec, int spec_len, 78 const url_parse::Component& scheme); 79 80 // URL library wrappers ------------------------------------------------------- 81 82 // Parses the given spec according to the extracted scheme type. Normal users 83 // should use the URL object, although this may be useful if performance is 84 // critical and you don't want to do the heap allocation for the std::string. 85 // 86 // As with the url_canon::Canonicalize* functions, the charset converter can 87 // be NULL to use UTF-8 (it will be faster in this case). 88 // 89 // Returns true if a valid URL was produced, false if not. On failure, the 90 // output and parsed structures will still be filled and will be consistent, 91 // but they will not represent a loadable URL. 92 bool Canonicalize(const char* spec, 93 int spec_len, 94 url_canon::CharsetConverter* charset_converter, 95 url_canon::CanonOutput* output, 96 url_parse::Parsed* output_parsed); 97 bool Canonicalize(const char16* spec, 98 int spec_len, 99 url_canon::CharsetConverter* charset_converter, 100 url_canon::CanonOutput* output, 101 url_parse::Parsed* output_parsed); 102 103 // Resolves a potentially relative URL relative to the given parsed base URL. 104 // The base MUST be valid. The resulting canonical URL and parsed information 105 // will be placed in to the given out variables. 106 // 107 // The relative need not be relative. If we discover that it's absolute, this 108 // will produce a canonical version of that URL. See Canonicalize() for more 109 // about the charset_converter. 110 // 111 // Returns true if the output is valid, false if the input could not produce 112 // a valid URL. 113 bool ResolveRelative(const char* base_spec, 114 int base_spec_len, 115 const url_parse::Parsed& base_parsed, 116 const char* relative, 117 int relative_length, 118 url_canon::CharsetConverter* charset_converter, 119 url_canon::CanonOutput* output, 120 url_parse::Parsed* output_parsed); 121 bool ResolveRelative(const char* base_spec, 122 int base_spec_len, 123 const url_parse::Parsed& base_parsed, 124 const char16* relative, 125 int relative_length, 126 url_canon::CharsetConverter* charset_converter, 127 url_canon::CanonOutput* output, 128 url_parse::Parsed* output_parsed); 129 130 // Replaces components in the given VALID input url. The new canonical URL info 131 // is written to output and out_parsed. 132 // 133 // Returns true if the resulting URL is valid. 134 bool ReplaceComponents(const char* spec, 135 int spec_len, 136 const url_parse::Parsed& parsed, 137 const url_canon::Replacements<char>& replacements, 138 url_canon::CharsetConverter* charset_converter, 139 url_canon::CanonOutput* output, 140 url_parse::Parsed* out_parsed); 141 bool ReplaceComponents(const char* spec, 142 int spec_len, 143 const url_parse::Parsed& parsed, 144 const url_canon::Replacements<char16>& replacements, 145 url_canon::CharsetConverter* charset_converter, 146 url_canon::CanonOutput* output, 147 url_parse::Parsed* out_parsed); 148 149 // String helper functions ---------------------------------------------------- 150 151 // Compare the lower-case form of the given string against the given ASCII 152 // string. This is useful for doing checking if an input string matches some 153 // token, and it is optimized to avoid intermediate string copies. 154 // 155 // The versions of this function that don't take a b_end assume that the b 156 // string is NULL terminated. 157 bool LowerCaseEqualsASCII(const char* a_begin, 158 const char* a_end, 159 const char* b); 160 bool LowerCaseEqualsASCII(const char* a_begin, 161 const char* a_end, 162 const char* b_begin, 163 const char* b_end); 164 bool LowerCaseEqualsASCII(const char16* a_begin, 165 const char16* a_end, 166 const char* b); 167 168 } // namespace url_util 169 170 #endif // GOOGLEURL_SRC_URL_UTIL_H__ 171