Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This utility program exists to process the False Start blacklist file into
      6 // a static hash table so that it can be efficiently queried by Chrome.
      7 
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 
     11 #include <set>
     12 #include <string>
     13 #include <vector>
     14 
     15 #include "base/basictypes.h"
     16 #include "net/base/ssl_false_start_blacklist.h"
     17 
     18 using net::SSLFalseStartBlacklist;
     19 
     20 static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets;
     21 
     22 static bool verbose = false;
     23 
     24 static int
     25 usage(const char* argv0) {
     26   fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0);
     27   return 1;
     28 }
     29 
     30 // StripWWWPrefix removes "www." from the beginning of any elements of the
     31 // vector.
     32 static void StripWWWPrefix(std::vector<std::string>* hosts) {
     33   static const char kPrefix[] = "www.";
     34   static const unsigned kPrefixLen = sizeof(kPrefix) - 1;
     35 
     36   for (size_t i = 0; i < hosts->size(); i++) {
     37     const std::string& h = (*hosts)[i];
     38     if (h.size() >= kPrefixLen &&
     39         memcmp(h.data(), kPrefix, kPrefixLen) == 0) {
     40       (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen);
     41     }
     42   }
     43 }
     44 
     45 // RemoveDuplicateEntries removes all duplicates from |hosts|.
     46 static void RemoveDuplicateEntries(std::vector<std::string>* hosts) {
     47   std::set<std::string> hosts_set;
     48   std::vector<std::string> ret;
     49 
     50   for (std::vector<std::string>::const_iterator
     51        i = hosts->begin(); i != hosts->end(); i++) {
     52     if (hosts_set.count(*i)) {
     53       if (verbose)
     54         fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str());
     55       continue;
     56     }
     57     hosts_set.insert(*i);
     58     ret.push_back(*i);
     59   }
     60 
     61   hosts->swap(ret);
     62 }
     63 
     64 // ParentDomain returns the parent domain for a given domain name or the empty
     65 // string if the name is a top-level domain.
     66 static std::string ParentDomain(const std::string& in) {
     67   for (size_t i = 0; i < in.size(); i++) {
     68     if (in[i] == '.') {
     69       return in.substr(i + 1, in.size() - i - 1);
     70     }
     71   }
     72 
     73   return std::string();
     74 }
     75 
     76 // RemoveRedundantEntries removes any entries which are subdomains of other
     77 // entries. (i.e. foo.example.com would be removed if example.com were also
     78 // included.)
     79 static void RemoveRedundantEntries(std::vector<std::string>* hosts) {
     80   std::set<std::string> hosts_set;
     81   std::vector<std::string> ret;
     82 
     83   for (std::vector<std::string>::const_iterator
     84        i = hosts->begin(); i != hosts->end(); i++) {
     85     hosts_set.insert(*i);
     86   }
     87 
     88   for (std::vector<std::string>::const_iterator
     89        i = hosts->begin(); i != hosts->end(); i++) {
     90     std::string parent = ParentDomain(*i);
     91     while (!parent.empty()) {
     92       if (hosts_set.count(parent))
     93         break;
     94       parent = ParentDomain(parent);
     95     }
     96     if (parent.empty()) {
     97       ret.push_back(*i);
     98     } else {
     99       if (verbose)
    100         fprintf(stderr, "Removing %s as redundant\n", i->c_str());
    101     }
    102   }
    103 
    104   hosts->swap(ret);
    105 }
    106 
    107 // CheckLengths returns true iff every host is less than 256 bytes long (not
    108 // including the terminating NUL) and contains two or more labels.
    109 static bool CheckLengths(const std::vector<std::string>& hosts) {
    110   for (std::vector<std::string>::const_iterator
    111        i = hosts.begin(); i != hosts.end(); i++) {
    112      if (i->size() >= 256) {
    113        fprintf(stderr, "Entry %s is too large\n", i->c_str());
    114        return false;
    115      }
    116      if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) {
    117        fprintf(stderr, "Entry %s contains too few labels\n", i->c_str());
    118        return false;
    119      }
    120   }
    121 
    122   return true;
    123 }
    124 
    125 int main(int argc, char** argv) {
    126   if (argc != 3)
    127     return usage(argv[0]);
    128 
    129   const char* input_file = argv[1];
    130   const char* output_file = argv[2];
    131   FILE* input = fopen(input_file, "rb");
    132   if (!input) {
    133     perror("open");
    134     return usage(argv[0]);
    135   }
    136 
    137   if (fseek(input, 0, SEEK_END)) {
    138     perror("fseek");
    139     return 1;
    140   }
    141 
    142   const long input_size = ftell(input);
    143 
    144   if (fseek(input, 0, SEEK_SET)) {
    145     perror("fseek");
    146     return 1;
    147   }
    148 
    149   char* buffer = static_cast<char*>(malloc(input_size));
    150   long done = 0;
    151   while (done < input_size) {
    152     size_t n = fread(buffer + done, 1, input_size - done, input);
    153     if (n == 0) {
    154       perror("fread");
    155       free(buffer);
    156       fclose(input);
    157       return 1;
    158     }
    159     done += n;
    160   }
    161   fclose(input);
    162 
    163   std::vector<std::string> hosts;
    164 
    165   off_t line_start = 0;
    166   bool is_comment = false;
    167   bool non_whitespace_seen = false;
    168   for (long i = 0; i <= input_size; i++) {
    169     if (i == input_size || buffer[i] == '\n') {
    170       if (!is_comment && non_whitespace_seen) {
    171         long len = i - line_start;
    172         if (i > 0 && buffer[i-1] == '\r')
    173           len--;
    174         hosts.push_back(std::string(&buffer[line_start], len));
    175       }
    176       is_comment = false;
    177       non_whitespace_seen = false;
    178       line_start = i + 1;
    179       continue;
    180     }
    181 
    182     if (i == line_start && buffer[i] == '#')
    183       is_comment = true;
    184     if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r')
    185       non_whitespace_seen = true;
    186   }
    187   free(buffer);
    188 
    189   fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size());
    190   StripWWWPrefix(&hosts);
    191   RemoveDuplicateEntries(&hosts);
    192   fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size());
    193   RemoveRedundantEntries(&hosts);
    194   fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size());
    195   if (!CheckLengths(hosts)) {
    196     fprintf(stderr, "One or more entries is too large or too small\n");
    197     return 2;
    198   }
    199 
    200   fprintf(stderr, "Using %d entry hash table\n", kBuckets);
    201   uint32 table[kBuckets];
    202   std::vector<std::string> buckets[kBuckets];
    203 
    204   for (std::vector<std::string>::const_iterator
    205        i = hosts.begin(); i != hosts.end(); i++) {
    206     const char* last_two_labels =
    207         SSLFalseStartBlacklist::LastTwoLabels(i->c_str());
    208     const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels);
    209     buckets[h & (kBuckets - 1)].push_back(*i);
    210   }
    211 
    212   std::string table_data;
    213   unsigned max_bucket_size = 0;
    214   for (unsigned i = 0; i < kBuckets; i++) {
    215     if (buckets[i].size() > max_bucket_size)
    216       max_bucket_size = buckets[i].size();
    217 
    218     table[i] = table_data.size();
    219     for (std::vector<std::string>::const_iterator
    220          j = buckets[i].begin(); j != buckets[i].end(); j++) {
    221       table_data.push_back((char) j->size());
    222       table_data.append(*j);
    223     }
    224   }
    225 
    226   fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size);
    227 
    228   FILE* out = fopen(output_file, "w+");
    229   if (!out) {
    230     perror("opening output file");
    231     return 4;
    232   }
    233 
    234   fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights "
    235           "reserved.\n// Use of this source code is governed by a BSD-style "
    236           "license that can be\n// found in the LICENSE file.\n\n");
    237   fprintf(out, "// WARNING: this code is generated by\n"
    238                "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n");
    239   fprintf(out, "#include \"base/basictypes.h\"\n\n");
    240   fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n");
    241   fprintf(out, "namespace net {\n\n");
    242   fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n",
    243           kBuckets);
    244   for (unsigned i = 0; i < kBuckets; i++) {
    245     fprintf(out, "  %u,\n", (unsigned) table[i]);
    246   }
    247   fprintf(out, "  %u,\n", (unsigned) table_data.size());
    248   fprintf(out, "};\n\n");
    249 
    250   fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n");
    251   for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) {
    252     if (line_length == 0)
    253       fprintf(out, "  ");
    254     uint8 c = static_cast<uint8>(table_data[i]);
    255     line_length += fprintf(out, "%d, ", c);
    256     if (i == table_data.size() - 1) {
    257       fprintf(out, "\n};\n");
    258     } else if (line_length >= 70) {
    259       fprintf(out, "\n");
    260       line_length = 0;
    261     }
    262   }
    263   fprintf(out, "\n}  // namespace net\n");
    264   fclose(out);
    265 
    266   return 0;
    267 }
    268