1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Parse the data returned from the SafeBrowsing v2.1 protocol response. 6 7 #include <stdlib.h> 8 9 #include "chrome/browser/safe_browsing/protocol_parser.h" 10 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 11 12 #include "build/build_config.h" 13 14 #if defined(OS_WIN) 15 #include <Winsock2.h> 16 #elif defined(OS_POSIX) 17 #include <arpa/inet.h> 18 #endif 19 20 #include "base/format_macros.h" 21 #include "base/logging.h" 22 #include "base/string_split.h" 23 #include "base/string_util.h" 24 25 namespace { 26 // Helper function for quick scans of a line oriented protocol. Note that we use 27 // std::string::assign(const charT* s, size_type n) 28 // to copy data into 'line'. This form of 'assign' does not call strlen on 29 // 'input', which is binary data and is not NULL terminated. 'input' may also 30 // contain valid NULL bytes in the payload, which a strlen based copy would 31 // truncate. 32 bool GetLine(const char* input, int input_len, std::string* line) { 33 const char* pos = input; 34 while (pos && (pos - input < input_len)) { 35 if (*pos == '\n') { 36 line->assign(input, pos - input); 37 return true; 38 } 39 ++pos; 40 } 41 return false; 42 } 43 } 44 45 //------------------------------------------------------------------------------ 46 // SafeBrowsingParser implementation 47 48 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() { 49 } 50 51 bool SafeBrowsingProtocolParser::ParseGetHash( 52 const char* chunk_data, 53 int chunk_len, 54 const std::string& key, 55 bool* re_key, 56 std::vector<SBFullHashResult>* full_hashes) { 57 full_hashes->clear(); 58 int length = chunk_len; 59 const char* data = chunk_data; 60 61 int offset; 62 std::string line; 63 if (!key.empty()) { 64 if (!GetLine(data, length, &line)) 65 return false; // Error! Bad GetHash result. 66 67 if (line == "e:pleaserekey") { 68 *re_key = true; 69 return true; 70 } 71 72 offset = static_cast<int>(line.size()) + 1; 73 data += offset; 74 length -= offset; 75 76 if (!safe_browsing_util::VerifyMAC(key, line, data, length)) 77 return false; 78 } 79 80 while (length > 0) { 81 if (!GetLine(data, length, &line)) 82 return false; 83 84 offset = static_cast<int>(line.size()) + 1; 85 data += offset; 86 length -= offset; 87 88 std::vector<std::string> cmd_parts; 89 base::SplitString(line, ':', &cmd_parts); 90 if (cmd_parts.size() != 3) 91 return false; 92 93 SBFullHashResult full_hash; 94 full_hash.list_name = cmd_parts[0]; 95 full_hash.add_chunk_id = atoi(cmd_parts[1].c_str()); 96 int full_hash_len = atoi(cmd_parts[2].c_str()); 97 98 // Ignore hash results from lists we don't recognize. 99 if (safe_browsing_util::GetListId(full_hash.list_name) < 0) { 100 data += full_hash_len; 101 length -= full_hash_len; 102 continue; 103 } 104 105 while (full_hash_len > 0) { 106 DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash)); 107 memcpy(&full_hash.hash, data, sizeof(SBFullHash)); 108 full_hashes->push_back(full_hash); 109 data += sizeof(SBFullHash); 110 length -= sizeof(SBFullHash); 111 full_hash_len -= sizeof(SBFullHash); 112 } 113 } 114 115 return length == 0; 116 } 117 118 void SafeBrowsingProtocolParser::FormatGetHash( 119 const std::vector<SBPrefix>& prefixes, std::string* request) { 120 DCHECK(request); 121 122 // Format the request for GetHash. 123 request->append(StringPrintf("%" PRIuS ":%" PRIuS "\n", 124 sizeof(SBPrefix), 125 sizeof(SBPrefix) * prefixes.size())); 126 for (size_t i = 0; i < prefixes.size(); ++i) { 127 request->append(reinterpret_cast<const char*>(&prefixes[i]), 128 sizeof(SBPrefix)); 129 } 130 } 131 132 bool SafeBrowsingProtocolParser::ParseUpdate( 133 const char* chunk_data, 134 int chunk_len, 135 const std::string& key, 136 int* next_update_sec, 137 bool* re_key, 138 bool* reset, 139 std::vector<SBChunkDelete>* deletes, 140 std::vector<ChunkUrl>* chunk_urls) { 141 DCHECK(next_update_sec); 142 DCHECK(deletes); 143 DCHECK(chunk_urls); 144 145 int length = chunk_len; 146 const char* data = chunk_data; 147 148 // Populated below. 149 std::string list_name; 150 151 while (length > 0) { 152 std::string cmd_line; 153 if (!GetLine(data, length, &cmd_line)) 154 return false; // Error: bad list format! 155 156 std::vector<std::string> cmd_parts; 157 base::SplitString(cmd_line, ':', &cmd_parts); 158 if (cmd_parts.empty()) 159 return false; 160 const std::string& command = cmd_parts[0]; 161 if (cmd_parts.size() != 2 && command[0] != 'u') 162 return false; 163 164 const int consumed = static_cast<int>(cmd_line.size()) + 1; 165 data += consumed; 166 length -= consumed; 167 if (length < 0) 168 return false; // Parsing error. 169 170 // Differentiate on the first character of the command (which is usually 171 // only one character, with the exception of the 'ad' and 'sd' commands). 172 switch (command[0]) { 173 case 'a': 174 case 's': { 175 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must 176 // have also parsed the list name before getting here, or the add-del 177 // or sub-del will have no context. 178 if (command.size() != 2 || command[1] != 'd' || list_name.empty()) 179 return false; 180 SBChunkDelete chunk_delete; 181 chunk_delete.is_sub_del = command[0] == 's'; 182 StringToRanges(cmd_parts[1], &chunk_delete.chunk_del); 183 chunk_delete.list_name = list_name; 184 deletes->push_back(chunk_delete); 185 break; 186 } 187 188 case 'e': 189 if (cmd_parts[1] != "pleaserekey") 190 return false; 191 *re_key = true; 192 break; 193 194 case 'i': 195 // The line providing the name of the list (i.e. 'goog-phish-shavar'). 196 list_name = cmd_parts[1]; 197 break; 198 199 case 'm': 200 // Verify that the MAC of the remainer of this chunk is what we expect. 201 if (!key.empty() && 202 !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length)) 203 return false; 204 break; 205 206 case 'n': 207 // The line providing the next earliest time (in seconds) to re-query. 208 *next_update_sec = atoi(cmd_parts[1].c_str()); 209 break; 210 211 case 'u': { 212 // The redirect command is of the form: u:<url>,<mac> where <url> can 213 // contain multiple colons, commas or any valid URL characters. We scan 214 // backwards in the string looking for the first ',' we encounter and 215 // assume that everything before that is the URL and everything after 216 // is the MAC (if the MAC was requested). 217 std::string mac; 218 std::string redirect_url(cmd_line, 2); // Skip the initial "u:". 219 if (!key.empty()) { 220 std::string::size_type mac_pos = redirect_url.rfind(','); 221 if (mac_pos == std::string::npos) 222 return false; 223 mac = redirect_url.substr(mac_pos + 1); 224 redirect_url = redirect_url.substr(0, mac_pos); 225 } 226 227 ChunkUrl chunk_url; 228 chunk_url.url = redirect_url; 229 chunk_url.list_name = list_name; 230 if (!key.empty()) 231 chunk_url.mac = mac; 232 chunk_urls->push_back(chunk_url); 233 break; 234 } 235 236 case 'r': 237 if (cmd_parts[1] != "pleasereset") 238 return false; 239 *reset = true; 240 break; 241 242 default: 243 // According to the spec, we ignore commands we don't understand. 244 break; 245 } 246 } 247 248 return true; 249 } 250 251 bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name, 252 const char* data, 253 int length, 254 const std::string& key, 255 const std::string& mac, 256 bool* re_key, 257 SBChunkList* chunks) { 258 int remaining = length; 259 const char* chunk_data = data; 260 261 if (!key.empty() && 262 !safe_browsing_util::VerifyMAC(key, mac, data, length)) { 263 return false; 264 } 265 266 while (remaining > 0) { 267 std::string cmd_line; 268 if (!GetLine(chunk_data, length, &cmd_line)) 269 return false; // Error: bad chunk format! 270 271 const int line_len = static_cast<int>(cmd_line.length()) + 1; 272 chunk_data += line_len; 273 remaining -= line_len; 274 std::vector<std::string> cmd_parts; 275 base::SplitString(cmd_line, ':', &cmd_parts); 276 277 // Handle a possible re-key command. 278 if (cmd_parts.size() != 4) { 279 if (cmd_parts.size() == 2 && 280 cmd_parts[0] == "e" && 281 cmd_parts[1] == "pleaserekey") { 282 *re_key = true; 283 continue; 284 } 285 return false; 286 } 287 288 // Process the chunk data. 289 const int chunk_number = atoi(cmd_parts[1].c_str()); 290 const int hash_len = atoi(cmd_parts[2].c_str()); 291 if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) { 292 VLOG(1) << "ParseChunk got unknown hashlen " << hash_len; 293 return false; 294 } 295 296 const int chunk_len = atoi(cmd_parts[3].c_str()); 297 298 if (remaining < chunk_len) 299 return false; // parse error. 300 301 chunks->push_back(SBChunk()); 302 chunks->back().chunk_number = chunk_number; 303 304 if (cmd_parts[0] == "a") { 305 chunks->back().is_add = true; 306 if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len, 307 &chunks->back().hosts)) 308 return false; // Parse error. 309 } else if (cmd_parts[0] == "s") { 310 chunks->back().is_add = false; 311 if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len, 312 &chunks->back().hosts)) 313 return false; // Parse error. 314 } else { 315 NOTREACHED(); 316 return false; 317 } 318 319 chunk_data += chunk_len; 320 remaining -= chunk_len; 321 DCHECK_LE(0, remaining); 322 } 323 324 DCHECK(remaining == 0); 325 326 return true; 327 } 328 329 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name, 330 const char* data, 331 int data_len, 332 int hash_len, 333 std::deque<SBChunkHost>* hosts) { 334 const char* chunk_data = data; 335 int remaining = data_len; 336 int prefix_count; 337 SBEntry::Type type = hash_len == sizeof(SBPrefix) ? 338 SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH; 339 340 if (list_name == safe_browsing_util::kBinHashList) { 341 // kBinHashList only contains prefixes, no HOSTKEY and COUNT. 342 DCHECK_EQ(0, remaining % hash_len); 343 prefix_count = remaining / hash_len; 344 SBChunkHost chunk_host; 345 chunk_host.host = 0; 346 chunk_host.entry = SBEntry::Create(type, prefix_count); 347 hosts->push_back(chunk_host); 348 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count)) 349 return false; 350 } else { 351 SBPrefix host; 352 const int min_size = sizeof(SBPrefix) + 1; 353 while (remaining >= min_size) { 354 ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); 355 SBChunkHost chunk_host; 356 chunk_host.host = host; 357 chunk_host.entry = SBEntry::Create(type, prefix_count); 358 hosts->push_back(chunk_host); 359 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, 360 prefix_count)) 361 return false; 362 } 363 } 364 return remaining == 0; 365 } 366 367 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name, 368 const char* data, 369 int data_len, 370 int hash_len, 371 std::deque<SBChunkHost>* hosts) { 372 int remaining = data_len; 373 const char* chunk_data = data; 374 int prefix_count; 375 SBEntry::Type type = hash_len == sizeof(SBPrefix) ? 376 SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH; 377 378 if (list_name == safe_browsing_util::kBinHashList) { 379 SBChunkHost chunk_host; 380 // Set host to 0 and it won't be used for kBinHashList. 381 chunk_host.host = 0; 382 // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY 383 // and COUNT. |add_chunk_number| is int32. 384 prefix_count = remaining / (sizeof(int32) + hash_len); 385 chunk_host.entry = SBEntry::Create(type, prefix_count); 386 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count)) 387 return false; 388 hosts->push_back(chunk_host); 389 } else { 390 SBPrefix host; 391 const int min_size = 2 * sizeof(SBPrefix) + 1; 392 while (remaining >= min_size) { 393 ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); 394 SBChunkHost chunk_host; 395 chunk_host.host = host; 396 chunk_host.entry = SBEntry::Create(type, prefix_count); 397 hosts->push_back(chunk_host); 398 if (prefix_count == 0) { 399 // There is only an add chunk number (no prefixes). 400 chunk_host.entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining)); 401 continue; 402 } 403 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, 404 prefix_count)) 405 return false; 406 } 407 } 408 return remaining == 0; 409 } 410 411 void SafeBrowsingProtocolParser::ReadHostAndPrefixCount( 412 const char** data, int* remaining, SBPrefix* host, int* count) { 413 // Next 4 bytes are the host prefix. 414 memcpy(host, *data, sizeof(SBPrefix)); 415 *data += sizeof(SBPrefix); 416 *remaining -= sizeof(SBPrefix); 417 418 // Next 1 byte is the prefix count (could be zero, but never negative). 419 *count = static_cast<unsigned char>(**data); 420 *data += 1; 421 *remaining -= 1; 422 } 423 424 int SafeBrowsingProtocolParser::ReadChunkId( 425 const char** data, int* remaining) { 426 int chunk_number; 427 memcpy(&chunk_number, *data, sizeof(chunk_number)); 428 *data += sizeof(chunk_number); 429 *remaining -= sizeof(chunk_number); 430 return htonl(chunk_number); 431 } 432 433 bool SafeBrowsingProtocolParser::ReadPrefixes( 434 const char** data, int* remaining, SBEntry* entry, int count) { 435 int hash_len = entry->HashLen(); 436 for (int i = 0; i < count; ++i) { 437 if (entry->IsSub()) { 438 entry->SetChunkIdAtPrefix(i, ReadChunkId(data, remaining)); 439 if (*remaining <= 0) 440 return false; 441 } 442 443 if (entry->IsPrefix()) { 444 entry->SetPrefixAt(i, *reinterpret_cast<const SBPrefix*>(*data)); 445 } else { 446 entry->SetFullHashAt(i, *reinterpret_cast<const SBFullHash*>(*data)); 447 } 448 *data += hash_len; 449 *remaining -= hash_len; 450 if (*remaining < 0) 451 return false; 452 } 453 454 return true; 455 } 456 457 bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data, 458 int chunk_length, 459 std::string* client_key, 460 std::string* wrapped_key) { 461 DCHECK(client_key && wrapped_key); 462 client_key->clear(); 463 wrapped_key->clear(); 464 465 const char* data = chunk_data; 466 int remaining = chunk_length; 467 468 while (remaining > 0) { 469 std::string line; 470 if (!GetLine(data, remaining, &line)) 471 return false; 472 473 std::vector<std::string> cmd_parts; 474 base::SplitString(line, ':', &cmd_parts); 475 if (cmd_parts.size() != 3) 476 return false; 477 478 if (static_cast<int>(cmd_parts[2].size()) != atoi(cmd_parts[1].c_str())) 479 return false; 480 481 if (cmd_parts[0] == "clientkey") { 482 client_key->assign(cmd_parts[2]); 483 } else if (cmd_parts[0] == "wrappedkey") { 484 wrapped_key->assign(cmd_parts[2]); 485 } else { 486 return false; 487 } 488 489 data += line.size() + 1; 490 remaining -= static_cast<int>(line.size()) + 1; 491 } 492 493 if (client_key->empty() || wrapped_key->empty()) 494 return false; 495 496 return true; 497 } 498