1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/dns/dns_hosts.h" 6 7 #include "base/file_util.h" 8 #include "base/logging.h" 9 #include "base/metrics/histogram.h" 10 #include "base/strings/string_util.h" 11 #include "base/strings/string_tokenizer.h" 12 13 using base::StringPiece; 14 15 namespace net { 16 17 // Parses the contents of a hosts file. Returns one token (IP or hostname) at 18 // a time. Doesn't copy anything; accepts the file as a StringPiece and 19 // returns tokens as StringPieces. 20 class HostsParser { 21 public: 22 explicit HostsParser(const StringPiece& text) 23 : text_(text), 24 data_(text.data()), 25 end_(text.size()), 26 pos_(0), 27 token_(), 28 token_is_ip_(false) {} 29 30 // Advances to the next token (IP or hostname). Returns whether another 31 // token was available. |token_is_ip| and |token| can be used to find out 32 // the type and text of the token. 33 bool Advance() { 34 bool next_is_ip = (pos_ == 0); 35 while (pos_ < end_ && pos_ != std::string::npos) { 36 switch (text_[pos_]) { 37 case ' ': 38 case '\t': 39 SkipWhitespace(); 40 break; 41 42 case '\r': 43 case '\n': 44 next_is_ip = true; 45 pos_++; 46 break; 47 48 case '#': 49 SkipRestOfLine(); 50 break; 51 52 default: { 53 size_t token_start = pos_; 54 SkipToken(); 55 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; 56 57 token_ = StringPiece(data_ + token_start, token_end - token_start); 58 token_is_ip_ = next_is_ip; 59 60 return true; 61 } 62 } 63 } 64 65 text_ = StringPiece(); 66 return false; 67 } 68 69 // Fast-forwards the parser to the next line. Should be called if an IP 70 // address doesn't parse, to avoid wasting time tokenizing hostnames that 71 // will be ignored. 72 void SkipRestOfLine() { 73 pos_ = text_.find("\n", pos_); 74 } 75 76 // Returns whether the last-parsed token is an IP address (true) or a 77 // hostname (false). 78 bool token_is_ip() { return token_is_ip_; } 79 80 // Returns the text of the last-parsed token as a StringPiece referencing 81 // the same underlying memory as the StringPiece passed to the constructor. 82 // Returns an empty StringPiece if no token has been parsed or the end of 83 // the input string has been reached. 84 const StringPiece& token() { return token_; } 85 86 private: 87 void SkipToken() { 88 pos_ = text_.find_first_of(" \t\n\r#", pos_); 89 } 90 91 void SkipWhitespace() { 92 pos_ = text_.find_first_not_of(" \t", pos_); 93 } 94 95 StringPiece text_; 96 const char* data_; 97 const size_t end_; 98 99 size_t pos_; 100 StringPiece token_; 101 bool token_is_ip_; 102 103 DISALLOW_COPY_AND_ASSIGN(HostsParser); 104 }; 105 106 107 108 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { 109 CHECK(dns_hosts); 110 DnsHosts& hosts = *dns_hosts; 111 112 StringPiece ip_text; 113 IPAddressNumber ip; 114 AddressFamily family = ADDRESS_FAMILY_IPV4; 115 HostsParser parser(contents); 116 while (parser.Advance()) { 117 if (parser.token_is_ip()) { 118 StringPiece new_ip_text = parser.token(); 119 // Some ad-blocking hosts files contain thousands of entries pointing to 120 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP 121 // again if it's the same as the one above it. 122 if (new_ip_text != ip_text) { 123 IPAddressNumber new_ip; 124 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { 125 ip_text = new_ip_text; 126 ip.swap(new_ip); 127 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; 128 } else { 129 parser.SkipRestOfLine(); 130 } 131 } 132 } else { 133 DnsHostsKey key(parser.token().as_string(), family); 134 StringToLowerASCII(&key.first); 135 IPAddressNumber& mapped_ip = hosts[key]; 136 if (mapped_ip.empty()) 137 mapped_ip = ip; 138 // else ignore this entry (first hit counts) 139 } 140 } 141 } 142 143 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { 144 dns_hosts->clear(); 145 // Missing file indicates empty HOSTS. 146 if (!base::PathExists(path)) 147 return true; 148 149 int64 size; 150 if (!base::GetFileSize(path, &size)) 151 return false; 152 153 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size); 154 155 // Reject HOSTS files larger than |kMaxHostsSize| bytes. 156 const int64 kMaxHostsSize = 1 << 25; // 32MB 157 if (size > kMaxHostsSize) 158 return false; 159 160 std::string contents; 161 if (!base::ReadFileToString(path, &contents)) 162 return false; 163 164 ParseHosts(contents, dns_hosts); 165 return true; 166 } 167 168 } // namespace net 169 170