1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <algorithm> 6 7 #include "base/string_util.h" 8 #include "crypto/sha2.h" 9 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 10 #include "googleurl/src/gurl.h" 11 #include "testing/gtest/include/gtest/gtest.h" 12 13 namespace { 14 15 bool VectorContains(const std::vector<std::string>& data, 16 const std::string& str) { 17 return std::find(data.begin(), data.end(), str) != data.end(); 18 } 19 20 } 21 22 // Tests that we generate the required host/path combinations for testing 23 // according to the Safe Browsing spec. 24 // See section 6.2 in 25 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 26 TEST(SafeBrowsingUtilTest, UrlParsing) { 27 std::vector<std::string> hosts, paths; 28 29 GURL url("http://a.b.c/1/2.html?param=1"); 30 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 31 safe_browsing_util::GeneratePathsToCheck(url, &paths); 32 EXPECT_EQ(hosts.size(), static_cast<size_t>(2)); 33 EXPECT_EQ(paths.size(), static_cast<size_t>(4)); 34 EXPECT_EQ(hosts[0], "b.c"); 35 EXPECT_EQ(hosts[1], "a.b.c"); 36 37 EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1")); 38 EXPECT_TRUE(VectorContains(paths, "/1/2.html")); 39 EXPECT_TRUE(VectorContains(paths, "/1/")); 40 EXPECT_TRUE(VectorContains(paths, "/")); 41 42 url = GURL("http://a.b.c.d.e.f.g/1.html"); 43 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 44 safe_browsing_util::GeneratePathsToCheck(url, &paths); 45 EXPECT_EQ(hosts.size(), static_cast<size_t>(5)); 46 EXPECT_EQ(paths.size(), static_cast<size_t>(2)); 47 EXPECT_EQ(hosts[0], "f.g"); 48 EXPECT_EQ(hosts[1], "e.f.g"); 49 EXPECT_EQ(hosts[2], "d.e.f.g"); 50 EXPECT_EQ(hosts[3], "c.d.e.f.g"); 51 EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g"); 52 EXPECT_TRUE(VectorContains(paths, "/1.html")); 53 EXPECT_TRUE(VectorContains(paths, "/")); 54 55 url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/"); 56 safe_browsing_util::GeneratePathsToCheck(url, &paths); 57 EXPECT_EQ(paths.size(), static_cast<size_t>(3)); 58 EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/")); 59 EXPECT_TRUE(VectorContains(paths, "/saw-cgi/")); 60 EXPECT_TRUE(VectorContains(paths, "/")); 61 } 62 63 // Tests the url canonicalization according to the Safe Browsing spec. 64 // See section 6.1 in 65 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 66 TEST(SafeBrowsingUtilTest, CanonicalizeUrl) { 67 struct { 68 const char* input_url; 69 const char* expected_canonicalized_hostname; 70 const char* expected_canonicalized_path; 71 const char* expected_canonicalized_query; 72 } tests[] = { 73 { 74 "http://host/%25%32%35", 75 "host", 76 "/%25", 77 "" 78 }, { 79 "http://host/%25%32%35%25%32%35", 80 "host", 81 "/%25%25", 82 "" 83 }, { 84 "http://host/%2525252525252525", 85 "host", 86 "/%25", 87 "" 88 }, { 89 "http://host/asdf%25%32%35asd", 90 "host", 91 "/asdf%25asd", 92 "" 93 }, { 94 "http://host/%%%25%32%35asd%%", 95 "host", 96 "/%25%25%25asd%25%25", 97 "" 98 }, { 99 "http://host/%%%25%32%35asd%%", 100 "host", 101 "/%25%25%25asd%25%25", 102 "" 103 }, { 104 "http://www.google.com/", 105 "www.google.com", 106 "/", 107 "" 108 }, { 109 "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77" 110 "%77%77%2E%65%62%61%79%2E%63%6F%6D/", 111 "168.188.99.26", 112 "/.secure/www.ebay.com/", 113 "" 114 }, { 115 "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd" 116 "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/", 117 "195.127.0.11", 118 "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv" 119 "alidateinfoswqpcmlx=hgplmcx/", 120 "" 121 }, { 122 "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A" 123 "22%252833%252944_55%252B", 124 "host.com", 125 "/~a!b@c%23d$e%25f^00&11*22(33)44_55+", 126 "" 127 }, { 128 "http://3279880203/blah", 129 "195.127.0.11", 130 "/blah", 131 "" 132 }, { 133 "http://www.google.com/blah/..", 134 "www.google.com", 135 "/", 136 "" 137 }, { 138 "http://www.google.com/blah#fraq", 139 "www.google.com", 140 "/blah", 141 "" 142 }, { 143 "http://www.GOOgle.com/", 144 "www.google.com", 145 "/", 146 "" 147 }, { 148 "http://www.google.com.../", 149 "www.google.com", 150 "/", 151 "" 152 }, { 153 "http://www.google.com/q?", 154 "www.google.com", 155 "/q", 156 "" 157 }, { 158 "http://www.google.com/q?r?", 159 "www.google.com", 160 "/q", 161 "r?" 162 }, { 163 "http://www.google.com/q?r?s", 164 "www.google.com", 165 "/q", 166 "r?s" 167 }, { 168 "http://evil.com/foo#bar#baz", 169 "evil.com", 170 "/foo", 171 "" 172 }, { 173 "http://evil.com/foo;", 174 "evil.com", 175 "/foo;", 176 "" 177 }, { 178 "http://evil.com/foo?bar;", 179 "evil.com", 180 "/foo", 181 "bar;" 182 }, { 183 "http://notrailingslash.com", 184 "notrailingslash.com", 185 "/", 186 "" 187 }, { 188 "http://www.gotaport.com:1234/", 189 "www.gotaport.com", 190 "/", 191 "" 192 }, { 193 " http://www.google.com/ ", 194 "www.google.com", 195 "/", 196 "" 197 }, { 198 "http:// leadingspace.com/", 199 "%20leadingspace.com", 200 "/", 201 "" 202 }, { 203 "http://%20leadingspace.com/", 204 "%20leadingspace.com", 205 "/", 206 "" 207 }, { 208 "https://www.securesite.com/", 209 "www.securesite.com", 210 "/", 211 "" 212 }, { 213 "http://host.com/ab%23cd", 214 "host.com", 215 "/ab%23cd", 216 "" 217 }, { 218 "http://host%3e.com//twoslashes?more//slashes", 219 "host>.com", 220 "/twoslashes", 221 "more//slashes" 222 }, { 223 "http://host.com/abc?val=xyz#anything", 224 "host.com", 225 "/abc", 226 "val=xyz" 227 }, { 228 "http://abc:def@host.com/xyz", 229 "host.com", 230 "/xyz", 231 "" 232 }, { 233 "http://host%3e.com/abc/%2e%2e%2fdef", 234 "host>.com", 235 "/def", 236 "" 237 }, { 238 "http://.......host...com.....//abc/////def%2F%2F%2Fxyz", 239 "host.com", 240 "/abc/def/xyz", 241 "" 242 }, { 243 "ftp://host.com/foo?bar", 244 "host.com", 245 "/foo", 246 "bar" 247 }, { 248 "data:text/html;charset=utf-8,%0D%0A", 249 "", 250 "", 251 "" 252 }, { 253 "javascript:alert()", 254 "", 255 "", 256 "" 257 }, { 258 "mailto:abc (at) example.com", 259 "", 260 "", 261 "" 262 }, 263 }; 264 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) { 265 SCOPED_TRACE(StringPrintf("Test: %s", tests[i].input_url)); 266 GURL url(tests[i].input_url); 267 268 std::string canonicalized_hostname; 269 std::string canonicalized_path; 270 std::string canonicalized_query; 271 safe_browsing_util::CanonicalizeUrl(url, &canonicalized_hostname, 272 &canonicalized_path, &canonicalized_query); 273 274 EXPECT_EQ(tests[i].expected_canonicalized_hostname, 275 canonicalized_hostname); 276 EXPECT_EQ(tests[i].expected_canonicalized_path, 277 canonicalized_path); 278 EXPECT_EQ(tests[i].expected_canonicalized_query, 279 canonicalized_query); 280 } 281 } 282 283 TEST(SafeBrowsingUtilTest, GetUrlHashIndex) { 284 GURL url("http://www.evil.com/phish.html"); 285 SBFullHashResult full_hash; 286 crypto::SHA256HashString(url.host() + url.path(), 287 &full_hash.hash, 288 sizeof(SBFullHash)); 289 std::vector<SBFullHashResult> full_hashes; 290 full_hashes.push_back(full_hash); 291 292 EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), 0); 293 294 url = GURL("http://www.evil.com/okay_path.html"); 295 EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), -1); 296 } 297 298 TEST(SafeBrowsingUtilTest, ListIdListNameConversion) { 299 std::string list_name; 300 EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID, 301 &list_name)); 302 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE, 303 &list_name)); 304 EXPECT_EQ(list_name, std::string(safe_browsing_util::kMalwareList)); 305 EXPECT_EQ(safe_browsing_util::MALWARE, 306 safe_browsing_util::GetListId(list_name)); 307 308 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH, 309 &list_name)); 310 EXPECT_EQ(list_name, std::string(safe_browsing_util::kPhishingList)); 311 EXPECT_EQ(safe_browsing_util::PHISH, 312 safe_browsing_util::GetListId(list_name)); 313 314 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL, 315 &list_name)); 316 EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinUrlList)); 317 EXPECT_EQ(safe_browsing_util::BINURL, 318 safe_browsing_util::GetListId(list_name)); 319 320 321 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINHASH, 322 &list_name)); 323 EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinHashList)); 324 EXPECT_EQ(safe_browsing_util::BINHASH, 325 safe_browsing_util::GetListId(list_name)); 326 } 327 328 // Since the ids are saved in file, we need to make sure they don't change. 329 // Since only the last bit of each id is saved in file together with 330 // chunkids, this checks only last bit. 331 TEST(SafeBrowsingUtilTest, ListIdVerification) { 332 EXPECT_EQ(0, safe_browsing_util::MALWARE % 2); 333 EXPECT_EQ(1, safe_browsing_util::PHISH % 2); 334 EXPECT_EQ(0, safe_browsing_util::BINURL %2); 335 EXPECT_EQ(1, safe_browsing_util::BINHASH % 2); 336 } 337 338 TEST(SafeBrowsingUtilTest, StringToSBFullHashAndSBFullHashToString) { 339 // 31 chars plus the last \0 as full_hash. 340 const std::string hash_in = "12345678902234567890323456789012"; 341 SBFullHash hash_out; 342 safe_browsing_util::StringToSBFullHash(hash_in, &hash_out); 343 EXPECT_EQ(0x34333231, hash_out.prefix); 344 EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash))); 345 346 std::string hash_final = safe_browsing_util::SBFullHashToString(hash_out); 347 EXPECT_EQ(hash_in, hash_final); 348 } 349