Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
      6 
      7 // TODOv3(shess): Review these changes carefully.
      8 
      9 #include <stdlib.h>
     10 
     11 #include "base/format_macros.h"
     12 #include "base/logging.h"
     13 #include "base/strings/string_number_conversions.h"
     14 #include "base/strings/string_split.h"
     15 #include "base/strings/stringprintf.h"
     16 #include "base/sys_byteorder.h"
     17 #include "base/time/time.h"
     18 #include "build/build_config.h"
     19 #include "chrome/browser/safe_browsing/protocol_parser.h"
     20 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     21 
     22 namespace {
     23 
     24 // Helper class for scanning a buffer.
     25 class BufferReader {
     26  public:
     27   BufferReader(const char* data, size_t length)
     28       : data_(data),
     29         length_(length) {
     30   }
     31 
     32   // Return info about remaining buffer data.
     33   size_t length() const {
     34     return length_;
     35   }
     36   const char* data() const {
     37     return data_;
     38   }
     39   bool empty() const {
     40     return length_ == 0;
     41   }
     42 
     43   // Remove |l| characters from the buffer.
     44   void Advance(size_t l) {
     45     DCHECK_LE(l, length());
     46     data_ += l;
     47     length_ -= l;
     48   }
     49 
     50   // Get a reference to data in the buffer.
     51   // TODO(shess): I'm not sure I like this.  Fill out a StringPiece instead?
     52   bool RefData(const void** pptr, size_t l) {
     53     if (length() < l) {
     54       Advance(length());  // poison
     55       return false;
     56     }
     57 
     58     *pptr = data();
     59     Advance(l);
     60     return true;
     61   }
     62 
     63   // Copy data out of the buffer.
     64   bool GetData(void* ptr, size_t l) {
     65     const void* buf_ptr;
     66     if (!RefData(&buf_ptr, l))
     67       return false;
     68 
     69     memcpy(ptr, buf_ptr, l);
     70     return true;
     71   }
     72 
     73   // Read a 32-bit integer in network byte order into a local uint32.
     74   bool GetNet32(uint32* i) {
     75     if (!GetData(i, sizeof(*i)))
     76       return false;
     77 
     78     *i = base::NetToHost32(*i);
     79     return true;
     80   }
     81 
     82   // Returns false if there is no data, otherwise fills |*line| with a reference
     83   // to the next line of data in the buffer.
     84   bool GetLine(base::StringPiece* line) {
     85     if (!length_)
     86       return false;
     87 
     88     // Find the end of the line, or the end of the input.
     89     size_t eol = 0;
     90     while (eol < length_ && data_[eol] != '\n') {
     91       ++eol;
     92     }
     93     line->set(data_, eol);
     94     Advance(eol);
     95 
     96     // Skip the newline if present.
     97     if (length_ && data_[0] == '\n')
     98       Advance(1);
     99 
    100     return true;
    101   }
    102 
    103   // Read out |c| colon-separated pieces from the next line.  The resulting
    104   // pieces point into the original data buffer.
    105   bool GetPieces(size_t c, std::vector<base::StringPiece>* pieces) {
    106     base::StringPiece line;
    107     if (!GetLine(&line))
    108       return false;
    109 
    110     // Find the parts separated by ':'.
    111     while (pieces->size() + 1 < c) {
    112       size_t colon_ofs = line.find(':');
    113       if (colon_ofs == base::StringPiece::npos) {
    114         Advance(length_);
    115         return false;
    116       }
    117 
    118       pieces->push_back(line.substr(0, colon_ofs));
    119       line.remove_prefix(colon_ofs + 1);
    120     }
    121 
    122     // The last piece runs to the end of the line.
    123     pieces->push_back(line);
    124     return true;
    125   }
    126 
    127  private:
    128   const char* data_;
    129   size_t length_;
    130 
    131   DISALLOW_COPY_AND_ASSIGN(BufferReader);
    132 };
    133 
    134 bool ParseGetHashMetadata(size_t hash_count,
    135                           BufferReader* reader,
    136                           std::vector<SBFullHashResult>* full_hashes) {
    137   for (size_t i = 0; i < hash_count; ++i) {
    138     base::StringPiece line;
    139     if (!reader->GetLine(&line))
    140       return false;
    141 
    142     size_t meta_data_len;
    143     if (!base::StringToSizeT(line, &meta_data_len))
    144       return false;
    145 
    146     const void* meta_data;
    147     if (!reader->RefData(&meta_data, meta_data_len))
    148       return false;
    149 
    150     if (full_hashes) {
    151       (*full_hashes)[full_hashes->size() - hash_count + i].metadata.assign(
    152           reinterpret_cast<const char*>(meta_data), meta_data_len);
    153     }
    154   }
    155   return true;
    156 }
    157 
    158 }  // namespace
    159 
    160 namespace safe_browsing {
    161 
    162 // BODY          = CACHELIFETIME LF HASHENTRY* EOF
    163 // CACHELIFETIME = DIGIT+
    164 // HASHENTRY     = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
    165 //                 HASHDATA (METADATALEN LF METADATA)*
    166 // HASHSIZE      = DIGIT+                  # Length of each full hash
    167 // NUMRESPONSES  = DIGIT+                  # Number of full hashes in HASHDATA
    168 // HASHDATA      = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
    169 // METADATALEN   = DIGIT+
    170 // METADATA      = <METADATALEN number of unsigned bytes>
    171 bool ParseGetHash(const char* chunk_data,
    172                   size_t chunk_len,
    173                   base::TimeDelta* cache_lifetime,
    174                   std::vector<SBFullHashResult>* full_hashes) {
    175   full_hashes->clear();
    176   BufferReader reader(chunk_data, chunk_len);
    177 
    178   // Parse out cache lifetime.
    179   {
    180     base::StringPiece line;
    181     if (!reader.GetLine(&line))
    182       return false;
    183 
    184     int64_t cache_lifetime_seconds;
    185     if (!base::StringToInt64(line, &cache_lifetime_seconds))
    186       return false;
    187 
    188     // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
    189     // either.  Maybe there should be a threshold involved.
    190     if (cache_lifetime_seconds < 0)
    191       return false;
    192 
    193     *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds);
    194   }
    195 
    196   while (!reader.empty()) {
    197     std::vector<base::StringPiece> cmd_parts;
    198     if (!reader.GetPieces(3, &cmd_parts))
    199       return false;
    200 
    201     SBFullHashResult full_hash;
    202     full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]);
    203 
    204     size_t hash_len;
    205     if (!base::StringToSizeT(cmd_parts[1], &hash_len))
    206       return false;
    207 
    208     // TODO(shess): Is this possible?  If not, why the length present?
    209     if (hash_len != sizeof(SBFullHash))
    210       return false;
    211 
    212     // Metadata is indicated by an optional ":m" at the end of the line.
    213     bool has_metadata = false;
    214     base::StringPiece hash_count_string = cmd_parts[2];
    215     size_t optional_colon = hash_count_string.find(':', 0);
    216     if (optional_colon != base::StringPiece::npos) {
    217       if (hash_count_string.substr(optional_colon) != ":m")
    218         return false;
    219       has_metadata = true;
    220       hash_count_string.remove_suffix(2);
    221     }
    222 
    223     size_t hash_count;
    224     if (!base::StringToSizeT(hash_count_string, &hash_count))
    225       return false;
    226 
    227     if (hash_len * hash_count > reader.length())
    228       return false;
    229 
    230     // Ignore hash results from lists we don't recognize.
    231     if (full_hash.list_id < 0) {
    232       reader.Advance(hash_len * hash_count);
    233       if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, NULL))
    234         return false;
    235       continue;
    236     }
    237 
    238     for (size_t i = 0; i < hash_count; ++i) {
    239       if (!reader.GetData(&full_hash.hash, hash_len))
    240         return false;
    241       full_hashes->push_back(full_hash);
    242     }
    243 
    244     if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, full_hashes))
    245       return false;
    246   }
    247 
    248   return reader.empty();
    249 }
    250 
    251 // BODY       = HEADER LF PREFIXES EOF
    252 // HEADER     = PREFIXSIZE ":" LENGTH
    253 // PREFIXSIZE = DIGIT+         # Size of each prefix in bytes
    254 // LENGTH     = DIGIT+         # Size of PREFIXES in bytes
    255 std::string FormatGetHash(const std::vector<SBPrefix>& prefixes) {
    256   std::string request;
    257   request.append(base::Uint64ToString(sizeof(SBPrefix)));
    258   request.append(":");
    259   request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size()));
    260   request.append("\n");
    261 
    262   // SBPrefix values are read without concern for byte order, so write back the
    263   // same way.
    264   for (size_t i = 0; i < prefixes.size(); ++i) {
    265     request.append(reinterpret_cast<const char*>(&prefixes[i]),
    266                    sizeof(SBPrefix));
    267   }
    268 
    269   return request;
    270 }
    271 
    272 bool ParseUpdate(const char* chunk_data,
    273                  size_t chunk_len,
    274                  size_t* next_update_sec,
    275                  bool* reset,
    276                  std::vector<SBChunkDelete>* deletes,
    277                  std::vector<ChunkUrl>* chunk_urls) {
    278   DCHECK(next_update_sec);
    279   DCHECK(deletes);
    280   DCHECK(chunk_urls);
    281 
    282   BufferReader reader(chunk_data, chunk_len);
    283 
    284   // Populated below.
    285   std::string list_name;
    286 
    287   while (!reader.empty()) {
    288     std::vector<base::StringPiece> pieces;
    289     if (!reader.GetPieces(2, &pieces))
    290       return false;
    291 
    292     base::StringPiece& command = pieces[0];
    293 
    294     // Differentiate on the first character of the command (which is usually
    295     // only one character, with the exception of the 'ad' and 'sd' commands).
    296     switch (command[0]) {
    297       case 'a':
    298       case 's': {
    299         // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
    300         // have also parsed the list name before getting here, or the add-del
    301         // or sub-del will have no context.
    302         if (list_name.empty() || (command != "ad" && command != "sd"))
    303           return false;
    304         SBChunkDelete chunk_delete;
    305         chunk_delete.is_sub_del = command[0] == 's';
    306         StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del);
    307         chunk_delete.list_name = list_name;
    308         deletes->push_back(chunk_delete);
    309         break;
    310       }
    311 
    312       case 'i':
    313         // The line providing the name of the list (i.e. 'goog-phish-shavar').
    314         list_name = pieces[1].as_string();
    315         break;
    316 
    317       case 'n':
    318         // The line providing the next earliest time (in seconds) to re-query.
    319         if (!base::StringToSizeT(pieces[1], next_update_sec))
    320           return false;
    321         break;
    322 
    323       case 'u': {
    324         ChunkUrl chunk_url;
    325         chunk_url.url = pieces[1].as_string();  // Skip the initial "u:".
    326         chunk_url.list_name = list_name;
    327         chunk_urls->push_back(chunk_url);
    328         break;
    329       }
    330 
    331       case 'r':
    332         if (pieces[1] != "pleasereset")
    333           return false;
    334         *reset = true;
    335         break;
    336 
    337       default:
    338         // According to the spec, we ignore commands we don't understand.
    339         // TODO(shess): Does this apply to r:unknown or n:not-integer?
    340         break;
    341     }
    342   }
    343 
    344   return true;
    345 }
    346 
    347 // BODY      = (UINT32 CHUNKDATA)+
    348 // UINT32    = Unsigned 32-bit integer in network byte order
    349 // CHUNKDATA = Encoded ChunkData protocol message
    350 bool ParseChunk(const char* data,
    351                 size_t length,
    352                 ScopedVector<SBChunkData>* chunks) {
    353   BufferReader reader(data, length);
    354 
    355   while (!reader.empty()) {
    356     uint32 l = 0;
    357     if (!reader.GetNet32(&l) || l == 0 || l > reader.length())
    358       return false;
    359 
    360     const void* p = NULL;
    361     if (!reader.RefData(&p, l))
    362       return false;
    363 
    364     scoped_ptr<SBChunkData> chunk(new SBChunkData());
    365     if (!chunk->ParseFrom(reinterpret_cast<const unsigned char*>(p), l))
    366       return false;
    367 
    368     chunks->push_back(chunk.release());
    369   }
    370 
    371   DCHECK(reader.empty());
    372   return true;
    373 }
    374 
    375 // LIST      = LISTNAME ";" LISTINFO (":" LISTINFO)*
    376 // LISTINFO  = CHUNKTYPE ":" CHUNKLIST
    377 // CHUNKTYPE = "a" | "s"
    378 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
    379 // NUMBER    = DIGIT+
    380 // RANGE     = NUMBER "-" NUMBER
    381 std::string FormatList(const SBListChunkRanges& list) {
    382   std::string formatted_results = list.name;
    383   formatted_results.append(";");
    384 
    385   if (!list.adds.empty())
    386     formatted_results.append("a:").append(list.adds);
    387   if (!list.adds.empty() && !list.subs.empty())
    388     formatted_results.append(":");
    389   if (!list.subs.empty())
    390     formatted_results.append("s:").append(list.subs);
    391   formatted_results.append("\n");
    392 
    393   return formatted_results;
    394 }
    395 
    396 }  // namespace safe_browsing
    397