Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
      6 
      7 #include <stdlib.h>
      8 
      9 #include "chrome/browser/safe_browsing/protocol_parser.h"
     10 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     11 
     12 #include "build/build_config.h"
     13 
     14 #if defined(OS_WIN)
     15 #include <Winsock2.h>
     16 #elif defined(OS_POSIX)
     17 #include <arpa/inet.h>
     18 #endif
     19 
     20 #include "base/format_macros.h"
     21 #include "base/logging.h"
     22 #include "base/string_split.h"
     23 #include "base/string_util.h"
     24 
     25 namespace {
     26 // Helper function for quick scans of a line oriented protocol. Note that we use
     27 //   std::string::assign(const charT* s, size_type n)
     28 // to copy data into 'line'. This form of 'assign' does not call strlen on
     29 // 'input', which is binary data and is not NULL terminated. 'input' may also
     30 // contain valid NULL bytes in the payload, which a strlen based copy would
     31 // truncate.
     32 bool GetLine(const char* input, int input_len, std::string* line) {
     33   const char* pos = input;
     34   while (pos && (pos - input < input_len)) {
     35     if (*pos == '\n') {
     36       line->assign(input, pos - input);
     37       return true;
     38     }
     39     ++pos;
     40   }
     41   return false;
     42 }
     43 }
     44 
     45 //------------------------------------------------------------------------------
     46 // SafeBrowsingParser implementation
     47 
     48 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
     49 }
     50 
     51 bool SafeBrowsingProtocolParser::ParseGetHash(
     52     const char* chunk_data,
     53     int chunk_len,
     54     const std::string& key,
     55     bool* re_key,
     56     std::vector<SBFullHashResult>* full_hashes) {
     57   full_hashes->clear();
     58   int length = chunk_len;
     59   const char* data = chunk_data;
     60 
     61   int offset;
     62   std::string line;
     63   if (!key.empty()) {
     64     if (!GetLine(data, length, &line))
     65       return false;  // Error! Bad GetHash result.
     66 
     67     if (line == "e:pleaserekey") {
     68       *re_key = true;
     69       return true;
     70     }
     71 
     72     offset = static_cast<int>(line.size()) + 1;
     73     data += offset;
     74     length -= offset;
     75 
     76     if (!safe_browsing_util::VerifyMAC(key, line, data, length))
     77       return false;
     78   }
     79 
     80   while (length > 0) {
     81     if (!GetLine(data, length, &line))
     82       return false;
     83 
     84     offset = static_cast<int>(line.size()) + 1;
     85     data += offset;
     86     length -= offset;
     87 
     88     std::vector<std::string> cmd_parts;
     89     base::SplitString(line, ':', &cmd_parts);
     90     if (cmd_parts.size() != 3)
     91       return false;
     92 
     93     SBFullHashResult full_hash;
     94     full_hash.list_name = cmd_parts[0];
     95     full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
     96     int full_hash_len = atoi(cmd_parts[2].c_str());
     97 
     98     // Ignore hash results from lists we don't recognize.
     99     if (safe_browsing_util::GetListId(full_hash.list_name) < 0) {
    100       data += full_hash_len;
    101       length -= full_hash_len;
    102       continue;
    103     }
    104 
    105     while (full_hash_len > 0) {
    106       DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash));
    107       memcpy(&full_hash.hash, data, sizeof(SBFullHash));
    108       full_hashes->push_back(full_hash);
    109       data += sizeof(SBFullHash);
    110       length -= sizeof(SBFullHash);
    111       full_hash_len -= sizeof(SBFullHash);
    112     }
    113   }
    114 
    115   return length == 0;
    116 }
    117 
    118 void SafeBrowsingProtocolParser::FormatGetHash(
    119    const std::vector<SBPrefix>& prefixes, std::string* request) {
    120   DCHECK(request);
    121 
    122   // Format the request for GetHash.
    123   request->append(StringPrintf("%" PRIuS ":%" PRIuS "\n",
    124                                sizeof(SBPrefix),
    125                                sizeof(SBPrefix) * prefixes.size()));
    126   for (size_t i = 0; i < prefixes.size(); ++i) {
    127     request->append(reinterpret_cast<const char*>(&prefixes[i]),
    128                     sizeof(SBPrefix));
    129   }
    130 }
    131 
    132 bool SafeBrowsingProtocolParser::ParseUpdate(
    133     const char* chunk_data,
    134     int chunk_len,
    135     const std::string& key,
    136     int* next_update_sec,
    137     bool* re_key,
    138     bool* reset,
    139     std::vector<SBChunkDelete>* deletes,
    140     std::vector<ChunkUrl>* chunk_urls) {
    141   DCHECK(next_update_sec);
    142   DCHECK(deletes);
    143   DCHECK(chunk_urls);
    144 
    145   int length = chunk_len;
    146   const char* data = chunk_data;
    147 
    148   // Populated below.
    149   std::string list_name;
    150 
    151   while (length > 0) {
    152     std::string cmd_line;
    153     if (!GetLine(data, length, &cmd_line))
    154       return false;  // Error: bad list format!
    155 
    156     std::vector<std::string> cmd_parts;
    157     base::SplitString(cmd_line, ':', &cmd_parts);
    158     if (cmd_parts.empty())
    159       return false;
    160     const std::string& command = cmd_parts[0];
    161     if (cmd_parts.size() != 2 && command[0] != 'u')
    162       return false;
    163 
    164     const int consumed = static_cast<int>(cmd_line.size()) + 1;
    165     data += consumed;
    166     length -= consumed;
    167     if (length < 0)
    168       return false;  // Parsing error.
    169 
    170     // Differentiate on the first character of the command (which is usually
    171     // only one character, with the exception of the 'ad' and 'sd' commands).
    172     switch (command[0]) {
    173       case 'a':
    174       case 's': {
    175         // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
    176         // have also parsed the list name before getting here, or the add-del
    177         // or sub-del will have no context.
    178         if (command.size() != 2 || command[1] != 'd' || list_name.empty())
    179           return false;
    180         SBChunkDelete chunk_delete;
    181         chunk_delete.is_sub_del = command[0] == 's';
    182         StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
    183         chunk_delete.list_name = list_name;
    184         deletes->push_back(chunk_delete);
    185         break;
    186       }
    187 
    188       case 'e':
    189         if (cmd_parts[1] != "pleaserekey")
    190           return false;
    191         *re_key = true;
    192         break;
    193 
    194       case 'i':
    195         // The line providing the name of the list (i.e. 'goog-phish-shavar').
    196         list_name = cmd_parts[1];
    197         break;
    198 
    199       case 'm':
    200         // Verify that the MAC of the remainer of this chunk is what we expect.
    201         if (!key.empty() &&
    202             !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length))
    203           return false;
    204         break;
    205 
    206       case 'n':
    207         // The line providing the next earliest time (in seconds) to re-query.
    208         *next_update_sec = atoi(cmd_parts[1].c_str());
    209         break;
    210 
    211       case 'u': {
    212         // The redirect command is of the form: u:<url>,<mac> where <url> can
    213         // contain multiple colons, commas or any valid URL characters. We scan
    214         // backwards in the string looking for the first ',' we encounter and
    215         // assume that everything before that is the URL and everything after
    216         // is the MAC (if the MAC was requested).
    217         std::string mac;
    218         std::string redirect_url(cmd_line, 2);  // Skip the initial "u:".
    219         if (!key.empty()) {
    220           std::string::size_type mac_pos = redirect_url.rfind(',');
    221           if (mac_pos == std::string::npos)
    222             return false;
    223           mac = redirect_url.substr(mac_pos + 1);
    224           redirect_url = redirect_url.substr(0, mac_pos);
    225         }
    226 
    227         ChunkUrl chunk_url;
    228         chunk_url.url = redirect_url;
    229         chunk_url.list_name = list_name;
    230         if (!key.empty())
    231           chunk_url.mac = mac;
    232         chunk_urls->push_back(chunk_url);
    233         break;
    234       }
    235 
    236       case 'r':
    237         if (cmd_parts[1] != "pleasereset")
    238           return false;
    239         *reset = true;
    240         break;
    241 
    242       default:
    243         // According to the spec, we ignore commands we don't understand.
    244         break;
    245     }
    246   }
    247 
    248   return true;
    249 }
    250 
    251 bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name,
    252                                             const char* data,
    253                                             int length,
    254                                             const std::string& key,
    255                                             const std::string& mac,
    256                                             bool* re_key,
    257                                             SBChunkList* chunks) {
    258   int remaining = length;
    259   const char* chunk_data = data;
    260 
    261   if (!key.empty() &&
    262       !safe_browsing_util::VerifyMAC(key, mac, data, length)) {
    263     return false;
    264   }
    265 
    266   while (remaining > 0) {
    267     std::string cmd_line;
    268     if (!GetLine(chunk_data, length, &cmd_line))
    269       return false;  // Error: bad chunk format!
    270 
    271     const int line_len = static_cast<int>(cmd_line.length()) + 1;
    272     chunk_data += line_len;
    273     remaining -= line_len;
    274     std::vector<std::string> cmd_parts;
    275     base::SplitString(cmd_line, ':', &cmd_parts);
    276 
    277     // Handle a possible re-key command.
    278     if (cmd_parts.size() != 4) {
    279       if (cmd_parts.size() == 2 &&
    280           cmd_parts[0] == "e" &&
    281           cmd_parts[1] == "pleaserekey") {
    282         *re_key = true;
    283         continue;
    284       }
    285       return false;
    286     }
    287 
    288     // Process the chunk data.
    289     const int chunk_number = atoi(cmd_parts[1].c_str());
    290     const int hash_len = atoi(cmd_parts[2].c_str());
    291     if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
    292       VLOG(1) << "ParseChunk got unknown hashlen " << hash_len;
    293       return false;
    294     }
    295 
    296     const int chunk_len = atoi(cmd_parts[3].c_str());
    297 
    298     if (remaining < chunk_len)
    299       return false;  // parse error.
    300 
    301     chunks->push_back(SBChunk());
    302     chunks->back().chunk_number = chunk_number;
    303 
    304     if (cmd_parts[0] == "a") {
    305       chunks->back().is_add = true;
    306       if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len,
    307                          &chunks->back().hosts))
    308         return false;  // Parse error.
    309     } else if (cmd_parts[0] == "s") {
    310       chunks->back().is_add = false;
    311       if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len,
    312                          &chunks->back().hosts))
    313         return false;  // Parse error.
    314     } else {
    315       NOTREACHED();
    316       return false;
    317     }
    318 
    319     chunk_data += chunk_len;
    320     remaining -= chunk_len;
    321     DCHECK_LE(0, remaining);
    322   }
    323 
    324   DCHECK(remaining == 0);
    325 
    326   return true;
    327 }
    328 
    329 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name,
    330                                                const char* data,
    331                                                int data_len,
    332                                                int hash_len,
    333                                                std::deque<SBChunkHost>* hosts) {
    334   const char* chunk_data = data;
    335   int remaining = data_len;
    336   int prefix_count;
    337   SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
    338       SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
    339 
    340   if (list_name == safe_browsing_util::kBinHashList) {
    341     // kBinHashList only contains prefixes, no HOSTKEY and COUNT.
    342     DCHECK_EQ(0, remaining % hash_len);
    343     prefix_count = remaining / hash_len;
    344     SBChunkHost chunk_host;
    345     chunk_host.host = 0;
    346     chunk_host.entry = SBEntry::Create(type, prefix_count);
    347     hosts->push_back(chunk_host);
    348     if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
    349       return false;
    350   } else {
    351     SBPrefix host;
    352     const int min_size = sizeof(SBPrefix) + 1;
    353     while (remaining >= min_size) {
    354       ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
    355       SBChunkHost chunk_host;
    356       chunk_host.host = host;
    357       chunk_host.entry = SBEntry::Create(type, prefix_count);
    358       hosts->push_back(chunk_host);
    359       if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
    360                         prefix_count))
    361         return false;
    362     }
    363   }
    364   return remaining == 0;
    365 }
    366 
    367 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name,
    368                                                const char* data,
    369                                                int data_len,
    370                                                int hash_len,
    371                                                std::deque<SBChunkHost>* hosts) {
    372   int remaining = data_len;
    373   const char* chunk_data = data;
    374   int prefix_count;
    375   SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
    376       SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
    377 
    378   if (list_name == safe_browsing_util::kBinHashList) {
    379     SBChunkHost chunk_host;
    380     // Set host to 0 and it won't be used for kBinHashList.
    381     chunk_host.host = 0;
    382     // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY
    383     // and COUNT. |add_chunk_number| is int32.
    384     prefix_count = remaining / (sizeof(int32) + hash_len);
    385     chunk_host.entry = SBEntry::Create(type, prefix_count);
    386     if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
    387       return false;
    388     hosts->push_back(chunk_host);
    389   } else {
    390     SBPrefix host;
    391     const int min_size = 2 * sizeof(SBPrefix) + 1;
    392     while (remaining >= min_size) {
    393       ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
    394       SBChunkHost chunk_host;
    395       chunk_host.host = host;
    396       chunk_host.entry = SBEntry::Create(type, prefix_count);
    397       hosts->push_back(chunk_host);
    398       if (prefix_count == 0) {
    399         // There is only an add chunk number (no prefixes).
    400         chunk_host.entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining));
    401         continue;
    402       }
    403       if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
    404                         prefix_count))
    405         return false;
    406     }
    407   }
    408   return remaining == 0;
    409 }
    410 
    411 void SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
    412     const char** data, int* remaining, SBPrefix* host, int* count) {
    413   // Next 4 bytes are the host prefix.
    414   memcpy(host, *data, sizeof(SBPrefix));
    415   *data += sizeof(SBPrefix);
    416   *remaining -= sizeof(SBPrefix);
    417 
    418   // Next 1 byte is the prefix count (could be zero, but never negative).
    419   *count = static_cast<unsigned char>(**data);
    420   *data += 1;
    421   *remaining -= 1;
    422 }
    423 
    424 int SafeBrowsingProtocolParser::ReadChunkId(
    425     const char** data, int* remaining) {
    426   int chunk_number;
    427   memcpy(&chunk_number, *data, sizeof(chunk_number));
    428   *data += sizeof(chunk_number);
    429   *remaining -= sizeof(chunk_number);
    430   return htonl(chunk_number);
    431 }
    432 
    433 bool SafeBrowsingProtocolParser::ReadPrefixes(
    434     const char** data, int* remaining, SBEntry* entry, int count) {
    435   int hash_len = entry->HashLen();
    436   for (int i = 0; i < count; ++i) {
    437     if (entry->IsSub()) {
    438       entry->SetChunkIdAtPrefix(i, ReadChunkId(data, remaining));
    439       if (*remaining <= 0)
    440         return false;
    441     }
    442 
    443     if (entry->IsPrefix()) {
    444       entry->SetPrefixAt(i, *reinterpret_cast<const SBPrefix*>(*data));
    445     } else {
    446       entry->SetFullHashAt(i, *reinterpret_cast<const SBFullHash*>(*data));
    447     }
    448     *data += hash_len;
    449     *remaining -= hash_len;
    450     if (*remaining < 0)
    451       return false;
    452   }
    453 
    454   return true;
    455 }
    456 
    457 bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data,
    458                                              int chunk_length,
    459                                              std::string* client_key,
    460                                              std::string* wrapped_key) {
    461   DCHECK(client_key && wrapped_key);
    462   client_key->clear();
    463   wrapped_key->clear();
    464 
    465   const char* data = chunk_data;
    466   int remaining = chunk_length;
    467 
    468   while (remaining > 0) {
    469     std::string line;
    470     if (!GetLine(data, remaining, &line))
    471       return false;
    472 
    473     std::vector<std::string> cmd_parts;
    474     base::SplitString(line, ':', &cmd_parts);
    475     if (cmd_parts.size() != 3)
    476       return false;
    477 
    478     if (static_cast<int>(cmd_parts[2].size()) != atoi(cmd_parts[1].c_str()))
    479       return false;
    480 
    481     if (cmd_parts[0] == "clientkey") {
    482       client_key->assign(cmd_parts[2]);
    483     } else if (cmd_parts[0] == "wrappedkey") {
    484       wrapped_key->assign(cmd_parts[2]);
    485     } else {
    486       return false;
    487     }
    488 
    489     data += line.size() + 1;
    490     remaining -= static_cast<int>(line.size()) + 1;
    491   }
    492 
    493   if (client_key->empty() || wrapped_key->empty())
    494     return false;
    495 
    496   return true;
    497 }
    498