1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This utility program exists to process the False Start blacklist file into 6 // a static hash table so that it can be efficiently queried by Chrome. 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 11 #include <set> 12 #include <string> 13 #include <vector> 14 15 #include "base/basictypes.h" 16 #include "net/base/ssl_false_start_blacklist.h" 17 18 using net::SSLFalseStartBlacklist; 19 20 static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets; 21 22 static bool verbose = false; 23 24 static int 25 usage(const char* argv0) { 26 fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0); 27 return 1; 28 } 29 30 // StripWWWPrefix removes "www." from the beginning of any elements of the 31 // vector. 32 static void StripWWWPrefix(std::vector<std::string>* hosts) { 33 static const char kPrefix[] = "www."; 34 static const unsigned kPrefixLen = sizeof(kPrefix) - 1; 35 36 for (size_t i = 0; i < hosts->size(); i++) { 37 const std::string& h = (*hosts)[i]; 38 if (h.size() >= kPrefixLen && 39 memcmp(h.data(), kPrefix, kPrefixLen) == 0) { 40 (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen); 41 } 42 } 43 } 44 45 // RemoveDuplicateEntries removes all duplicates from |hosts|. 46 static void RemoveDuplicateEntries(std::vector<std::string>* hosts) { 47 std::set<std::string> hosts_set; 48 std::vector<std::string> ret; 49 50 for (std::vector<std::string>::const_iterator 51 i = hosts->begin(); i != hosts->end(); i++) { 52 if (hosts_set.count(*i)) { 53 if (verbose) 54 fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str()); 55 continue; 56 } 57 hosts_set.insert(*i); 58 ret.push_back(*i); 59 } 60 61 hosts->swap(ret); 62 } 63 64 // ParentDomain returns the parent domain for a given domain name or the empty 65 // string if the name is a top-level domain. 66 static std::string ParentDomain(const std::string& in) { 67 for (size_t i = 0; i < in.size(); i++) { 68 if (in[i] == '.') { 69 return in.substr(i + 1, in.size() - i - 1); 70 } 71 } 72 73 return std::string(); 74 } 75 76 // RemoveRedundantEntries removes any entries which are subdomains of other 77 // entries. (i.e. foo.example.com would be removed if example.com were also 78 // included.) 79 static void RemoveRedundantEntries(std::vector<std::string>* hosts) { 80 std::set<std::string> hosts_set; 81 std::vector<std::string> ret; 82 83 for (std::vector<std::string>::const_iterator 84 i = hosts->begin(); i != hosts->end(); i++) { 85 hosts_set.insert(*i); 86 } 87 88 for (std::vector<std::string>::const_iterator 89 i = hosts->begin(); i != hosts->end(); i++) { 90 std::string parent = ParentDomain(*i); 91 while (!parent.empty()) { 92 if (hosts_set.count(parent)) 93 break; 94 parent = ParentDomain(parent); 95 } 96 if (parent.empty()) { 97 ret.push_back(*i); 98 } else { 99 if (verbose) 100 fprintf(stderr, "Removing %s as redundant\n", i->c_str()); 101 } 102 } 103 104 hosts->swap(ret); 105 } 106 107 // CheckLengths returns true iff every host is less than 256 bytes long (not 108 // including the terminating NUL) and contains two or more labels. 109 static bool CheckLengths(const std::vector<std::string>& hosts) { 110 for (std::vector<std::string>::const_iterator 111 i = hosts.begin(); i != hosts.end(); i++) { 112 if (i->size() >= 256) { 113 fprintf(stderr, "Entry %s is too large\n", i->c_str()); 114 return false; 115 } 116 if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) { 117 fprintf(stderr, "Entry %s contains too few labels\n", i->c_str()); 118 return false; 119 } 120 } 121 122 return true; 123 } 124 125 int main(int argc, char** argv) { 126 if (argc != 3) 127 return usage(argv[0]); 128 129 const char* input_file = argv[1]; 130 const char* output_file = argv[2]; 131 FILE* input = fopen(input_file, "rb"); 132 if (!input) { 133 perror("open"); 134 return usage(argv[0]); 135 } 136 137 if (fseek(input, 0, SEEK_END)) { 138 perror("fseek"); 139 return 1; 140 } 141 142 const long input_size = ftell(input); 143 144 if (fseek(input, 0, SEEK_SET)) { 145 perror("fseek"); 146 return 1; 147 } 148 149 char* buffer = static_cast<char*>(malloc(input_size)); 150 long done = 0; 151 while (done < input_size) { 152 size_t n = fread(buffer + done, 1, input_size - done, input); 153 if (n == 0) { 154 perror("fread"); 155 free(buffer); 156 fclose(input); 157 return 1; 158 } 159 done += n; 160 } 161 fclose(input); 162 163 std::vector<std::string> hosts; 164 165 off_t line_start = 0; 166 bool is_comment = false; 167 bool non_whitespace_seen = false; 168 for (long i = 0; i <= input_size; i++) { 169 if (i == input_size || buffer[i] == '\n') { 170 if (!is_comment && non_whitespace_seen) { 171 long len = i - line_start; 172 if (i > 0 && buffer[i-1] == '\r') 173 len--; 174 hosts.push_back(std::string(&buffer[line_start], len)); 175 } 176 is_comment = false; 177 non_whitespace_seen = false; 178 line_start = i + 1; 179 continue; 180 } 181 182 if (i == line_start && buffer[i] == '#') 183 is_comment = true; 184 if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r') 185 non_whitespace_seen = true; 186 } 187 free(buffer); 188 189 fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size()); 190 StripWWWPrefix(&hosts); 191 RemoveDuplicateEntries(&hosts); 192 fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size()); 193 RemoveRedundantEntries(&hosts); 194 fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size()); 195 if (!CheckLengths(hosts)) { 196 fprintf(stderr, "One or more entries is too large or too small\n"); 197 return 2; 198 } 199 200 fprintf(stderr, "Using %d entry hash table\n", kBuckets); 201 uint32 table[kBuckets]; 202 std::vector<std::string> buckets[kBuckets]; 203 204 for (std::vector<std::string>::const_iterator 205 i = hosts.begin(); i != hosts.end(); i++) { 206 const char* last_two_labels = 207 SSLFalseStartBlacklist::LastTwoLabels(i->c_str()); 208 const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels); 209 buckets[h & (kBuckets - 1)].push_back(*i); 210 } 211 212 std::string table_data; 213 unsigned max_bucket_size = 0; 214 for (unsigned i = 0; i < kBuckets; i++) { 215 if (buckets[i].size() > max_bucket_size) 216 max_bucket_size = buckets[i].size(); 217 218 table[i] = table_data.size(); 219 for (std::vector<std::string>::const_iterator 220 j = buckets[i].begin(); j != buckets[i].end(); j++) { 221 table_data.push_back((char) j->size()); 222 table_data.append(*j); 223 } 224 } 225 226 fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size); 227 228 FILE* out = fopen(output_file, "w+"); 229 if (!out) { 230 perror("opening output file"); 231 return 4; 232 } 233 234 fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights " 235 "reserved.\n// Use of this source code is governed by a BSD-style " 236 "license that can be\n// found in the LICENSE file.\n\n"); 237 fprintf(out, "// WARNING: this code is generated by\n" 238 "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n"); 239 fprintf(out, "#include \"base/basictypes.h\"\n\n"); 240 fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n"); 241 fprintf(out, "namespace net {\n\n"); 242 fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n", 243 kBuckets); 244 for (unsigned i = 0; i < kBuckets; i++) { 245 fprintf(out, " %u,\n", (unsigned) table[i]); 246 } 247 fprintf(out, " %u,\n", (unsigned) table_data.size()); 248 fprintf(out, "};\n\n"); 249 250 fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n"); 251 for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) { 252 if (line_length == 0) 253 fprintf(out, " "); 254 uint8 c = static_cast<uint8>(table_data[i]); 255 line_length += fprintf(out, "%d, ", c); 256 if (i == table_data.size() - 1) { 257 fprintf(out, "\n};\n"); 258 } else if (line_length >= 70) { 259 fprintf(out, "\n"); 260 line_length = 0; 261 } 262 } 263 fprintf(out, "\n} // namespace net\n"); 264 fclose(out); 265 266 return 0; 267 } 268