1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Parse the data returned from the SafeBrowsing v2.1 protocol response. 6 7 // TODOv3(shess): Review these changes carefully. 8 9 #include <stdlib.h> 10 11 #include "base/format_macros.h" 12 #include "base/logging.h" 13 #include "base/strings/string_number_conversions.h" 14 #include "base/strings/string_split.h" 15 #include "base/strings/stringprintf.h" 16 #include "base/sys_byteorder.h" 17 #include "base/time/time.h" 18 #include "build/build_config.h" 19 #include "chrome/browser/safe_browsing/protocol_parser.h" 20 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 21 22 namespace { 23 24 // Helper class for scanning a buffer. 25 class BufferReader { 26 public: 27 BufferReader(const char* data, size_t length) 28 : data_(data), 29 length_(length) { 30 } 31 32 // Return info about remaining buffer data. 33 size_t length() const { 34 return length_; 35 } 36 const char* data() const { 37 return data_; 38 } 39 bool empty() const { 40 return length_ == 0; 41 } 42 43 // Remove |l| characters from the buffer. 44 void Advance(size_t l) { 45 DCHECK_LE(l, length()); 46 data_ += l; 47 length_ -= l; 48 } 49 50 // Get a reference to data in the buffer. 51 // TODO(shess): I'm not sure I like this. Fill out a StringPiece instead? 52 bool RefData(const void** pptr, size_t l) { 53 if (length() < l) { 54 Advance(length()); // poison 55 return false; 56 } 57 58 *pptr = data(); 59 Advance(l); 60 return true; 61 } 62 63 // Copy data out of the buffer. 64 bool GetData(void* ptr, size_t l) { 65 const void* buf_ptr; 66 if (!RefData(&buf_ptr, l)) 67 return false; 68 69 memcpy(ptr, buf_ptr, l); 70 return true; 71 } 72 73 // Read a 32-bit integer in network byte order into a local uint32. 74 bool GetNet32(uint32* i) { 75 if (!GetData(i, sizeof(*i))) 76 return false; 77 78 *i = base::NetToHost32(*i); 79 return true; 80 } 81 82 // Returns false if there is no data, otherwise fills |*line| with a reference 83 // to the next line of data in the buffer. 84 bool GetLine(base::StringPiece* line) { 85 if (!length_) 86 return false; 87 88 // Find the end of the line, or the end of the input. 89 size_t eol = 0; 90 while (eol < length_ && data_[eol] != '\n') { 91 ++eol; 92 } 93 line->set(data_, eol); 94 Advance(eol); 95 96 // Skip the newline if present. 97 if (length_ && data_[0] == '\n') 98 Advance(1); 99 100 return true; 101 } 102 103 // Read out |c| colon-separated pieces from the next line. The resulting 104 // pieces point into the original data buffer. 105 bool GetPieces(size_t c, std::vector<base::StringPiece>* pieces) { 106 base::StringPiece line; 107 if (!GetLine(&line)) 108 return false; 109 110 // Find the parts separated by ':'. 111 while (pieces->size() + 1 < c) { 112 size_t colon_ofs = line.find(':'); 113 if (colon_ofs == base::StringPiece::npos) { 114 Advance(length_); 115 return false; 116 } 117 118 pieces->push_back(line.substr(0, colon_ofs)); 119 line.remove_prefix(colon_ofs + 1); 120 } 121 122 // The last piece runs to the end of the line. 123 pieces->push_back(line); 124 return true; 125 } 126 127 private: 128 const char* data_; 129 size_t length_; 130 131 DISALLOW_COPY_AND_ASSIGN(BufferReader); 132 }; 133 134 bool ParseGetHashMetadata(size_t hash_count, 135 BufferReader* reader, 136 std::vector<SBFullHashResult>* full_hashes) { 137 for (size_t i = 0; i < hash_count; ++i) { 138 base::StringPiece line; 139 if (!reader->GetLine(&line)) 140 return false; 141 142 size_t meta_data_len; 143 if (!base::StringToSizeT(line, &meta_data_len)) 144 return false; 145 146 const void* meta_data; 147 if (!reader->RefData(&meta_data, meta_data_len)) 148 return false; 149 150 if (full_hashes) { 151 (*full_hashes)[full_hashes->size() - hash_count + i].metadata.assign( 152 reinterpret_cast<const char*>(meta_data), meta_data_len); 153 } 154 } 155 return true; 156 } 157 158 } // namespace 159 160 namespace safe_browsing { 161 162 // BODY = CACHELIFETIME LF HASHENTRY* EOF 163 // CACHELIFETIME = DIGIT+ 164 // HASHENTRY = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF 165 // HASHDATA (METADATALEN LF METADATA)* 166 // HASHSIZE = DIGIT+ # Length of each full hash 167 // NUMRESPONSES = DIGIT+ # Number of full hashes in HASHDATA 168 // HASHDATA = <HASHSIZE*NUMRESPONSES number of unsigned bytes> 169 // METADATALEN = DIGIT+ 170 // METADATA = <METADATALEN number of unsigned bytes> 171 bool ParseGetHash(const char* chunk_data, 172 size_t chunk_len, 173 base::TimeDelta* cache_lifetime, 174 std::vector<SBFullHashResult>* full_hashes) { 175 full_hashes->clear(); 176 BufferReader reader(chunk_data, chunk_len); 177 178 // Parse out cache lifetime. 179 { 180 base::StringPiece line; 181 if (!reader.GetLine(&line)) 182 return false; 183 184 int64_t cache_lifetime_seconds; 185 if (!base::StringToInt64(line, &cache_lifetime_seconds)) 186 return false; 187 188 // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden, 189 // either. Maybe there should be a threshold involved. 190 if (cache_lifetime_seconds < 0) 191 return false; 192 193 *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds); 194 } 195 196 while (!reader.empty()) { 197 std::vector<base::StringPiece> cmd_parts; 198 if (!reader.GetPieces(3, &cmd_parts)) 199 return false; 200 201 SBFullHashResult full_hash; 202 full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]); 203 204 size_t hash_len; 205 if (!base::StringToSizeT(cmd_parts[1], &hash_len)) 206 return false; 207 208 // TODO(shess): Is this possible? If not, why the length present? 209 if (hash_len != sizeof(SBFullHash)) 210 return false; 211 212 // Metadata is indicated by an optional ":m" at the end of the line. 213 bool has_metadata = false; 214 base::StringPiece hash_count_string = cmd_parts[2]; 215 size_t optional_colon = hash_count_string.find(':', 0); 216 if (optional_colon != base::StringPiece::npos) { 217 if (hash_count_string.substr(optional_colon) != ":m") 218 return false; 219 has_metadata = true; 220 hash_count_string.remove_suffix(2); 221 } 222 223 size_t hash_count; 224 if (!base::StringToSizeT(hash_count_string, &hash_count)) 225 return false; 226 227 if (hash_len * hash_count > reader.length()) 228 return false; 229 230 // Ignore hash results from lists we don't recognize. 231 if (full_hash.list_id < 0) { 232 reader.Advance(hash_len * hash_count); 233 if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, NULL)) 234 return false; 235 continue; 236 } 237 238 for (size_t i = 0; i < hash_count; ++i) { 239 if (!reader.GetData(&full_hash.hash, hash_len)) 240 return false; 241 full_hashes->push_back(full_hash); 242 } 243 244 if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, full_hashes)) 245 return false; 246 } 247 248 return reader.empty(); 249 } 250 251 // BODY = HEADER LF PREFIXES EOF 252 // HEADER = PREFIXSIZE ":" LENGTH 253 // PREFIXSIZE = DIGIT+ # Size of each prefix in bytes 254 // LENGTH = DIGIT+ # Size of PREFIXES in bytes 255 std::string FormatGetHash(const std::vector<SBPrefix>& prefixes) { 256 std::string request; 257 request.append(base::Uint64ToString(sizeof(SBPrefix))); 258 request.append(":"); 259 request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size())); 260 request.append("\n"); 261 262 // SBPrefix values are read without concern for byte order, so write back the 263 // same way. 264 for (size_t i = 0; i < prefixes.size(); ++i) { 265 request.append(reinterpret_cast<const char*>(&prefixes[i]), 266 sizeof(SBPrefix)); 267 } 268 269 return request; 270 } 271 272 bool ParseUpdate(const char* chunk_data, 273 size_t chunk_len, 274 size_t* next_update_sec, 275 bool* reset, 276 std::vector<SBChunkDelete>* deletes, 277 std::vector<ChunkUrl>* chunk_urls) { 278 DCHECK(next_update_sec); 279 DCHECK(deletes); 280 DCHECK(chunk_urls); 281 282 BufferReader reader(chunk_data, chunk_len); 283 284 // Populated below. 285 std::string list_name; 286 287 while (!reader.empty()) { 288 std::vector<base::StringPiece> pieces; 289 if (!reader.GetPieces(2, &pieces)) 290 return false; 291 292 base::StringPiece& command = pieces[0]; 293 294 // Differentiate on the first character of the command (which is usually 295 // only one character, with the exception of the 'ad' and 'sd' commands). 296 switch (command[0]) { 297 case 'a': 298 case 's': { 299 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must 300 // have also parsed the list name before getting here, or the add-del 301 // or sub-del will have no context. 302 if (list_name.empty() || (command != "ad" && command != "sd")) 303 return false; 304 SBChunkDelete chunk_delete; 305 chunk_delete.is_sub_del = command[0] == 's'; 306 StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del); 307 chunk_delete.list_name = list_name; 308 deletes->push_back(chunk_delete); 309 break; 310 } 311 312 case 'i': 313 // The line providing the name of the list (i.e. 'goog-phish-shavar'). 314 list_name = pieces[1].as_string(); 315 break; 316 317 case 'n': 318 // The line providing the next earliest time (in seconds) to re-query. 319 if (!base::StringToSizeT(pieces[1], next_update_sec)) 320 return false; 321 break; 322 323 case 'u': { 324 ChunkUrl chunk_url; 325 chunk_url.url = pieces[1].as_string(); // Skip the initial "u:". 326 chunk_url.list_name = list_name; 327 chunk_urls->push_back(chunk_url); 328 break; 329 } 330 331 case 'r': 332 if (pieces[1] != "pleasereset") 333 return false; 334 *reset = true; 335 break; 336 337 default: 338 // According to the spec, we ignore commands we don't understand. 339 // TODO(shess): Does this apply to r:unknown or n:not-integer? 340 break; 341 } 342 } 343 344 return true; 345 } 346 347 // BODY = (UINT32 CHUNKDATA)+ 348 // UINT32 = Unsigned 32-bit integer in network byte order 349 // CHUNKDATA = Encoded ChunkData protocol message 350 bool ParseChunk(const char* data, 351 size_t length, 352 ScopedVector<SBChunkData>* chunks) { 353 BufferReader reader(data, length); 354 355 while (!reader.empty()) { 356 uint32 l = 0; 357 if (!reader.GetNet32(&l) || l == 0 || l > reader.length()) 358 return false; 359 360 const void* p = NULL; 361 if (!reader.RefData(&p, l)) 362 return false; 363 364 scoped_ptr<SBChunkData> chunk(new SBChunkData()); 365 if (!chunk->ParseFrom(reinterpret_cast<const unsigned char*>(p), l)) 366 return false; 367 368 chunks->push_back(chunk.release()); 369 } 370 371 DCHECK(reader.empty()); 372 return true; 373 } 374 375 // LIST = LISTNAME ";" LISTINFO (":" LISTINFO)* 376 // LISTINFO = CHUNKTYPE ":" CHUNKLIST 377 // CHUNKTYPE = "a" | "s" 378 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST] 379 // NUMBER = DIGIT+ 380 // RANGE = NUMBER "-" NUMBER 381 std::string FormatList(const SBListChunkRanges& list) { 382 std::string formatted_results = list.name; 383 formatted_results.append(";"); 384 385 if (!list.adds.empty()) 386 formatted_results.append("a:").append(list.adds); 387 if (!list.adds.empty() && !list.subs.empty()) 388 formatted_results.append(":"); 389 if (!list.subs.empty()) 390 formatted_results.append("s:").append(list.subs); 391 formatted_results.append("\n"); 392 393 return formatted_results; 394 } 395 396 } // namespace safe_browsing 397