1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/tools/dump_cache/url_to_filename_encoder.h" 6 7 #include <string> 8 #include <vector> 9 10 #include "base/string_piece.h" 11 #include "base/string_util.h" 12 #include "base/stringprintf.h" 13 #include "testing/gtest/include/gtest/gtest.h" 14 15 using base::StringPiece; 16 using std::string; 17 18 namespace net { 19 20 #ifdef WIN32 21 char kDirSeparator = '\\'; 22 char kOtherDirSeparator = '/'; 23 #else 24 char kDirSeparator = '/'; 25 char kOtherDirSeparator = '\\'; 26 #endif 27 28 class UrlToFilenameEncoderTest : public ::testing::Test { 29 protected: 30 UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar), 31 dir_sep_(1, kDirSeparator) { 32 } 33 34 void CheckSegmentLength(const StringPiece& escaped_word) { 35 std::vector<StringPiece> components; 36 Tokenize(escaped_word, StringPiece("/"), &components); 37 for (size_t i = 0; i < components.size(); ++i) { 38 EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 39 components[i].size()); 40 } 41 } 42 43 void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) { 44 // These characters are invalid in Windows. We add in ', as that's pretty 45 // inconvenient in a Unix filename. 46 // 47 // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx 48 const string kInvalidChars = "<>:\"|?*'"; 49 for (size_t i = 0; i < escaped_word.size(); ++i) { 50 char c = escaped_word[i]; 51 EXPECT_EQ(string::npos, kInvalidChars.find(c)); 52 EXPECT_NE(invalid_slash, c); 53 EXPECT_NE('\0', c); // only invalid character in Posix 54 EXPECT_GT(0x7E, c); // only English printable characters 55 } 56 } 57 58 void Validate(const string& in_word, const string& gold_word) { 59 string escaped_word, url; 60 UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word); 61 EXPECT_EQ(gold_word, escaped_word); 62 CheckSegmentLength(escaped_word); 63 CheckValidChars(escaped_word, '\\'); 64 UrlToFilenameEncoder::Decode(escaped_word, '/', &url); 65 EXPECT_EQ(in_word, url); 66 } 67 68 void ValidateAllSegmentsSmall(const string& in_word) { 69 string escaped_word, url; 70 UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word); 71 CheckSegmentLength(escaped_word); 72 CheckValidChars(escaped_word, '\\'); 73 UrlToFilenameEncoder::Decode(escaped_word, '/', &url); 74 EXPECT_EQ(in_word, url); 75 } 76 77 void ValidateNoChange(const string& word) { 78 // We always suffix the leaf with kEscapeChar, unless the leaf is empty. 79 Validate(word, word + escape_); 80 } 81 82 void ValidateEscaped(unsigned char ch) { 83 // We always suffix the leaf with kEscapeChar, unless the leaf is empty. 84 char escaped[100]; 85 const char escape = UrlToFilenameEncoder::kEscapeChar; 86 base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape); 87 Validate(string(1, ch), escaped); 88 } 89 90 void ValidateUrl(const string& url, const string& base_path, 91 bool legacy_escape, const string& gold_filename) { 92 string encoded_filename = UrlToFilenameEncoder::Encode( 93 url, base_path, legacy_escape); 94 EXPECT_EQ(gold_filename, encoded_filename); 95 if (!legacy_escape) { 96 CheckSegmentLength(encoded_filename); 97 CheckValidChars(encoded_filename, kOtherDirSeparator); 98 string decoded_url; 99 UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator, 100 &decoded_url); 101 if (url != decoded_url) { 102 EXPECT_EQ(url, "http://" + decoded_url); 103 } 104 } 105 } 106 107 void ValidateUrlOldNew(const string& url, const string& gold_old_filename, 108 const string& gold_new_filename) { 109 ValidateUrl(url, "", true, gold_old_filename); 110 ValidateUrl(url, "", false, gold_new_filename); 111 } 112 113 void ValidateEncodeSame(const string& url1, const string& url2) { 114 string filename1 = UrlToFilenameEncoder::Encode(url1, "", false); 115 string filename2 = UrlToFilenameEncoder::Encode(url2, "", false); 116 EXPECT_EQ(filename1, filename2); 117 } 118 119 string escape_; 120 string dir_sep_; 121 }; 122 123 TEST_F(UrlToFilenameEncoderTest, DoesNotEscape) { 124 ValidateNoChange(""); 125 ValidateNoChange("abcdefg"); 126 ValidateNoChange("abcdefghijklmnopqrstuvwxyz"); 127 ValidateNoChange("ZYXWVUT"); 128 ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA"); 129 ValidateNoChange("01234567689"); 130 ValidateNoChange("_.=+-"); 131 ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA" 132 "01234567689_.=+-"); 133 ValidateNoChange("index.html"); 134 ValidateNoChange("/"); 135 ValidateNoChange("/."); 136 ValidateNoChange("."); 137 ValidateNoChange(".."); 138 } 139 140 TEST_F(UrlToFilenameEncoderTest, Escapes) { 141 const string bad_chars = 142 "<>:\"\\|?*" // Illegal on Windows 143 "~`!$^&(){}[]';" // Bad for Unix shells 144 "^@" // Build tool doesn't like 145 "#%" // Tool doesn't like 146 ","; // The escape char has to be escaped 147 148 for (size_t i = 0; i < bad_chars.size(); ++i) { 149 ValidateEscaped(bad_chars[i]); 150 } 151 152 // Check non-printable characters. 153 ValidateEscaped('\0'); 154 for (size_t i = 127; i < 256; ++i) { 155 ValidateEscaped(static_cast<char>(i)); 156 } 157 } 158 159 TEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) { 160 Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_); 161 Validate("/./", "/" + escape_ + "./" + escape_); 162 Validate("/../", "/" + escape_ + "../" + escape_); 163 Validate("//", "/" + escape_ + "2F" + escape_); 164 Validate("/./leaf", "/" + escape_ + "./leaf" + escape_); 165 Validate("/../leaf", "/" + escape_ + "../leaf" + escape_); 166 Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_); 167 Validate("mysite/u?param1=x¶m2=y", 168 "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" + 169 escape_); 170 Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test. 171 "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ + 172 "26form=QBLH" + escape_ + "26qs=n" + escape_); 173 Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true", 174 "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ + 175 "3Fid=138" + escape_ + "26content=true" + escape_); 176 } 177 178 TEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) { 179 ValidateUrlOldNew("http://www.google.com/index.html", 180 "www.google.com" + dir_sep_ + "indexx2Ehtml", 181 "www.google.com" + dir_sep_ + "index.html" + escape_); 182 ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=", 183 "www.google.com" + dir_sep_ + "x" + dir_sep_ + 184 "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D", 185 186 "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" + 187 escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ + 188 "26oq=" + escape_); 189 ValidateUrlOldNew("http://www.foo.com/a//", 190 "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml", 191 "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" + 192 escape_); 193 194 // From bug: Double slash preserved. 195 ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html", 196 "", false, 197 "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" + 198 escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" + 199 dir_sep_ + "index.html" + escape_); 200 ValidateUrlOldNew( 201 "http://blogutils.net/olct/online.php?" 202 "site=http://thelwordfanfics.blogspot.&interval=600", 203 204 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F" 205 "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600", 206 207 "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ + 208 "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ + 209 "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_); 210 } 211 212 // From bug: Escapes treated the same as normal char. 213 TEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) { 214 for (int i = 0; i < 128; ++i) { 215 string unescaped(1, static_cast<char>(i)); 216 string escaped = base::StringPrintf("%%%02X", i); 217 ValidateEncodeSame(unescaped, escaped); 218 } 219 220 ValidateEncodeSame( 221 "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot" 222 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch", 223 224 "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot" 225 "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch"); 226 } 227 228 // From bug: Filename encoding is not prefix-free. 229 TEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) { 230 Validate("/", "/" + escape_); 231 Validate("//", "/" + escape_ + "2F" + escape_); 232 Validate("///", "/" + escape_ + "2F" + "/" + escape_); 233 } 234 235 TEST_F(UrlToFilenameEncoderTest, LongTail) { 236 static char long_word[] = 237 "~joebob/briggs/12345678901234567890123456789012345678901234567890" 238 "1234567890123456789012345678901234567890123456789012345678901234567890" 239 "1234567890123456789012345678901234567890123456789012345678901234567890" 240 "1234567890123456789012345678901234567890123456789012345678901234567890" 241 "1234567890123456789012345678901234567890123456789012345678901234567890" 242 "1234567890123456789012345678901234567890123456789012345678901234567890"; 243 244 // the long lines in the string below are 64 characters, so we can see 245 // the slashes every 128. 246 string gold_long_word = 247 escape_ + "7Ejoebob/briggs/" 248 "1234567890123456789012345678901234567890123456789012345678901234" 249 "56789012345678901234567890123456789012345678901234567890123456" + 250 escape_ + "-/" 251 "7890123456789012345678901234567890123456789012345678901234567890" 252 "12345678901234567890123456789012345678901234567890123456789012" + 253 escape_ + "-/" 254 "3456789012345678901234567890123456789012345678901234567890123456" 255 "78901234567890123456789012345678901234567890123456789012345678" + 256 escape_ + "-/" 257 "9012345678901234567890" + escape_; 258 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 259 sizeof(long_word)); 260 Validate(long_word, gold_long_word); 261 } 262 263 TEST_F(UrlToFilenameEncoderTest, LongTailQuestion) { 264 // Here the '?' in the last path segment expands to @3F, making 265 // it hit 128 chars before the input segment gets that big. 266 static char long_word[] = 267 "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?" 268 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 269 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 270 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 271 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 272 "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"; 273 274 // Notice that at the end of the third segment, we avoid splitting 275 // the (escape_ + "3F") that was generated from the "?", so that segment is 276 // only 127 characters. 277 string pattern = "1234567" + escape_ + "3F"; // 10 characters 278 string gold_long_word = 279 escape_ + "7Ejoebob/briggs/" + 280 pattern + pattern + pattern + pattern + pattern + pattern + "1234" 281 "567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + 282 "123456" + escape_ + "-/" 283 "7" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + 284 pattern + pattern + pattern + pattern + pattern + pattern + pattern + 285 "12" + 286 escape_ + "-/" 287 "34567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern 288 + "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern 289 + pattern + "1234567" + 290 escape_ + "-/" + 291 escape_ + "3F" + pattern + pattern + escape_; 292 EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 293 sizeof(long_word)); 294 Validate(long_word, gold_long_word); 295 } 296 297 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) { 298 // hit corner cases, +/- 4 characters from kMaxLen 299 for (int i = -4; i <= 4; ++i) { 300 string input; 301 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x'); 302 ValidateAllSegmentsSmall(input); 303 } 304 } 305 306 TEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) { 307 // hit corner cases, +/- 4 characters from kMaxLen. This time we 308 // leave off the last 'x' and put in a '.', which ensures that we 309 // are truncating with '/' *after* the expansion. 310 for (int i = -4; i <= 4; ++i) { 311 string input; 312 input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x'); 313 input.append(1, '.'); // this will expand to 3 characters. 314 ValidateAllSegmentsSmall(input); 315 } 316 } 317 318 TEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) { 319 Validate("/a/b/c", "/a/b/c" + escape_); // c is leaf file "c," 320 Validate("/a/b/c/d", "/a/b/c/d" + escape_); // c is directory "c" 321 Validate("/a/b/c/d/", "/a/b/c/d/" + escape_); 322 } 323 324 325 TEST_F(UrlToFilenameEncoderTest, BackslashSeparator) { 326 string long_word; 327 string escaped_word; 328 long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x'); 329 UrlToFilenameEncoder::EncodeSegment("", long_word, '\\', &escaped_word); 330 331 // check that one backslash, plus the escape ",-", and the ending , got added. 332 EXPECT_EQ(long_word.size() + 4, escaped_word.size()); 333 ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 334 escaped_word.size()); 335 // Check that the backslash got inserted at the correct spot. 336 EXPECT_EQ('\\', escaped_word[ 337 UrlToFilenameEncoder::kMaximumSubdirectoryLength]); 338 } 339 340 } // namespace net 341 342