1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/tools/dump_cache/url_utilities.h" 6 7 #include "base/logging.h" 8 #include "base/string_number_conversions.h" 9 #include "base/string_util.h" 10 11 namespace net { 12 13 std::string UrlUtilities::GetUrlHost(const std::string& url) { 14 size_t b = url.find("//"); 15 if (b == std::string::npos) 16 b = 0; 17 else 18 b += 2; 19 size_t next_slash = url.find_first_of('/', b); 20 size_t next_colon = url.find_first_of(':', b); 21 if (next_slash != std::string::npos 22 && next_colon != std::string::npos 23 && next_colon < next_slash) { 24 return std::string(url, b, next_colon - b); 25 } 26 if (next_slash == std::string::npos) { 27 if (next_colon != std::string::npos) { 28 return std::string(url, b, next_colon - b); 29 } else { 30 next_slash = url.size(); 31 } 32 } 33 return std::string(url, b, next_slash - b); 34 } 35 36 std::string UrlUtilities::GetUrlHostPath(const std::string& url) { 37 size_t b = url.find("//"); 38 if (b == std::string::npos) 39 b = 0; 40 else 41 b += 2; 42 return std::string(url, b); 43 } 44 45 std::string UrlUtilities::GetUrlPath(const std::string& url) { 46 size_t b = url.find("//"); 47 if (b == std::string::npos) 48 b = 0; 49 else 50 b += 2; 51 b = url.find("/", b); 52 if (b == std::string::npos) 53 return "/"; 54 55 size_t e = url.find("#", b+1); 56 if (e != std::string::npos) 57 return std::string(url, b, (e - b)); 58 return std::string(url, b); 59 } 60 61 namespace { 62 63 // Parsing states for UrlUtilities::Unescape 64 enum UnescapeState { 65 NORMAL, // We are not in the middle of parsing an escape. 66 ESCAPE1, // We just parsed % . 67 ESCAPE2 // We just parsed %X for some hex digit X. 68 }; 69 70 } // namespace 71 72 std::string UrlUtilities::Unescape(const std::string& escaped_url) { 73 std::string unescaped_url, escape_text; 74 int escape_value; 75 UnescapeState state = NORMAL; 76 std::string::const_iterator iter = escaped_url.begin(); 77 while (iter < escaped_url.end()) { 78 char c = *iter; 79 switch (state) { 80 case NORMAL: 81 if (c == '%') { 82 escape_text.clear(); 83 state = ESCAPE1; 84 } else { 85 unescaped_url.push_back(c); 86 } 87 ++iter; 88 break; 89 case ESCAPE1: 90 if (IsHexDigit(c)) { 91 escape_text.push_back(c); 92 state = ESCAPE2; 93 ++iter; 94 } else { 95 // Unexpected, % followed by non-hex chars, pass it through. 96 unescaped_url.push_back('%'); 97 state = NORMAL; 98 } 99 break; 100 case ESCAPE2: 101 if (IsHexDigit(c)) { 102 escape_text.push_back(c); 103 bool ok = base::HexStringToInt(escape_text, &escape_value); 104 DCHECK(ok); 105 unescaped_url.push_back(static_cast<unsigned char>(escape_value)); 106 state = NORMAL; 107 ++iter; 108 } else { 109 // Unexpected, % followed by non-hex chars, pass it through. 110 unescaped_url.push_back('%'); 111 unescaped_url.append(escape_text); 112 state = NORMAL; 113 } 114 break; 115 } 116 } 117 // Unexpected, % followed by end of string, pass it through. 118 if (state == ESCAPE1 || state == ESCAPE2) { 119 unescaped_url.push_back('%'); 120 unescaped_url.append(escape_text); 121 } 122 return unescaped_url; 123 } 124 125 } // namespace net 126 127