1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/dns/dns_hosts.h" 6 7 #include "base/files/file_util.h" 8 #include "base/logging.h" 9 #include "base/metrics/histogram.h" 10 #include "base/strings/string_util.h" 11 12 using base::StringPiece; 13 14 namespace net { 15 16 namespace { 17 18 // Parses the contents of a hosts file. Returns one token (IP or hostname) at 19 // a time. Doesn't copy anything; accepts the file as a StringPiece and 20 // returns tokens as StringPieces. 21 class HostsParser { 22 public: 23 explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode) 24 : text_(text), 25 data_(text.data()), 26 end_(text.size()), 27 pos_(0), 28 token_is_ip_(false), 29 comma_mode_(comma_mode) {} 30 31 // Advances to the next token (IP or hostname). Returns whether another 32 // token was available. |token_is_ip| and |token| can be used to find out 33 // the type and text of the token. 34 bool Advance() { 35 bool next_is_ip = (pos_ == 0); 36 while (pos_ < end_ && pos_ != std::string::npos) { 37 switch (text_[pos_]) { 38 case ' ': 39 case '\t': 40 SkipWhitespace(); 41 break; 42 43 case '\r': 44 case '\n': 45 next_is_ip = true; 46 pos_++; 47 break; 48 49 case '#': 50 SkipRestOfLine(); 51 break; 52 53 case ',': 54 if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) { 55 SkipWhitespace(); 56 break; 57 } 58 59 // If comma_mode_ is COMMA_IS_TOKEN, fall through: 60 61 default: { 62 size_t token_start = pos_; 63 SkipToken(); 64 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; 65 66 token_ = StringPiece(data_ + token_start, token_end - token_start); 67 token_is_ip_ = next_is_ip; 68 69 return true; 70 } 71 } 72 } 73 74 return false; 75 } 76 77 // Fast-forwards the parser to the next line. Should be called if an IP 78 // address doesn't parse, to avoid wasting time tokenizing hostnames that 79 // will be ignored. 80 void SkipRestOfLine() { 81 pos_ = text_.find("\n", pos_); 82 } 83 84 // Returns whether the last-parsed token is an IP address (true) or a 85 // hostname (false). 86 bool token_is_ip() { return token_is_ip_; } 87 88 // Returns the text of the last-parsed token as a StringPiece referencing 89 // the same underlying memory as the StringPiece passed to the constructor. 90 // Returns an empty StringPiece if no token has been parsed or the end of 91 // the input string has been reached. 92 const StringPiece& token() { return token_; } 93 94 private: 95 void SkipToken() { 96 switch (comma_mode_) { 97 case PARSE_HOSTS_COMMA_IS_TOKEN: 98 pos_ = text_.find_first_of(" \t\n\r#", pos_); 99 break; 100 case PARSE_HOSTS_COMMA_IS_WHITESPACE: 101 pos_ = text_.find_first_of(" ,\t\n\r#", pos_); 102 break; 103 } 104 } 105 106 void SkipWhitespace() { 107 switch (comma_mode_) { 108 case PARSE_HOSTS_COMMA_IS_TOKEN: 109 pos_ = text_.find_first_not_of(" \t", pos_); 110 break; 111 case PARSE_HOSTS_COMMA_IS_WHITESPACE: 112 pos_ = text_.find_first_not_of(" ,\t", pos_); 113 break; 114 } 115 } 116 117 const StringPiece text_; 118 const char* data_; 119 const size_t end_; 120 121 size_t pos_; 122 StringPiece token_; 123 bool token_is_ip_; 124 125 const ParseHostsCommaMode comma_mode_; 126 127 DISALLOW_COPY_AND_ASSIGN(HostsParser); 128 }; 129 130 void ParseHostsWithCommaMode(const std::string& contents, 131 DnsHosts* dns_hosts, 132 ParseHostsCommaMode comma_mode) { 133 CHECK(dns_hosts); 134 DnsHosts& hosts = *dns_hosts; 135 136 StringPiece ip_text; 137 IPAddressNumber ip; 138 AddressFamily family = ADDRESS_FAMILY_IPV4; 139 HostsParser parser(contents, comma_mode); 140 while (parser.Advance()) { 141 if (parser.token_is_ip()) { 142 StringPiece new_ip_text = parser.token(); 143 // Some ad-blocking hosts files contain thousands of entries pointing to 144 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP 145 // again if it's the same as the one above it. 146 if (new_ip_text != ip_text) { 147 IPAddressNumber new_ip; 148 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { 149 ip_text = new_ip_text; 150 ip.swap(new_ip); 151 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; 152 } else { 153 parser.SkipRestOfLine(); 154 } 155 } 156 } else { 157 DnsHostsKey key(parser.token().as_string(), family); 158 base::StringToLowerASCII(&key.first); 159 IPAddressNumber& mapped_ip = hosts[key]; 160 if (mapped_ip.empty()) 161 mapped_ip = ip; 162 // else ignore this entry (first hit counts) 163 } 164 } 165 } 166 167 } // namespace 168 169 void ParseHostsWithCommaModeForTesting(const std::string& contents, 170 DnsHosts* dns_hosts, 171 ParseHostsCommaMode comma_mode) { 172 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); 173 } 174 175 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { 176 ParseHostsCommaMode comma_mode; 177 #if defined(OS_MACOSX) 178 // Mac OS X allows commas to separate hostnames. 179 comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE; 180 #else 181 // Linux allows commas in hostnames. 182 comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN; 183 #endif 184 185 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); 186 } 187 188 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { 189 dns_hosts->clear(); 190 // Missing file indicates empty HOSTS. 191 if (!base::PathExists(path)) 192 return true; 193 194 int64 size; 195 if (!base::GetFileSize(path, &size)) 196 return false; 197 198 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size); 199 200 // Reject HOSTS files larger than |kMaxHostsSize| bytes. 201 const int64 kMaxHostsSize = 1 << 25; // 32MB 202 if (size > kMaxHostsSize) 203 return false; 204 205 std::string contents; 206 if (!base::ReadFileToString(path, &contents)) 207 return false; 208 209 ParseHosts(contents, dns_hosts); 210 return true; 211 } 212 213 } // namespace net 214 215