Home | History | Annotate | Download | only in dns
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/dns/dns_hosts.h"
      6 
      7 #include "base/files/file_util.h"
      8 #include "base/logging.h"
      9 #include "base/metrics/histogram.h"
     10 #include "base/strings/string_util.h"
     11 
     12 using base::StringPiece;
     13 
     14 namespace net {
     15 
     16 namespace {
     17 
     18 // Parses the contents of a hosts file.  Returns one token (IP or hostname) at
     19 // a time.  Doesn't copy anything; accepts the file as a StringPiece and
     20 // returns tokens as StringPieces.
     21 class HostsParser {
     22  public:
     23   explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode)
     24       : text_(text),
     25         data_(text.data()),
     26         end_(text.size()),
     27         pos_(0),
     28         token_is_ip_(false),
     29         comma_mode_(comma_mode) {}
     30 
     31   // Advances to the next token (IP or hostname).  Returns whether another
     32   // token was available.  |token_is_ip| and |token| can be used to find out
     33   // the type and text of the token.
     34   bool Advance() {
     35     bool next_is_ip = (pos_ == 0);
     36     while (pos_ < end_ && pos_ != std::string::npos) {
     37       switch (text_[pos_]) {
     38         case ' ':
     39         case '\t':
     40           SkipWhitespace();
     41           break;
     42 
     43         case '\r':
     44         case '\n':
     45           next_is_ip = true;
     46           pos_++;
     47           break;
     48 
     49         case '#':
     50           SkipRestOfLine();
     51           break;
     52 
     53         case ',':
     54           if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) {
     55             SkipWhitespace();
     56             break;
     57           }
     58 
     59           // If comma_mode_ is COMMA_IS_TOKEN, fall through:
     60 
     61         default: {
     62           size_t token_start = pos_;
     63           SkipToken();
     64           size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
     65 
     66           token_ = StringPiece(data_ + token_start, token_end - token_start);
     67           token_is_ip_ = next_is_ip;
     68 
     69           return true;
     70         }
     71       }
     72     }
     73 
     74     return false;
     75   }
     76 
     77   // Fast-forwards the parser to the next line.  Should be called if an IP
     78   // address doesn't parse, to avoid wasting time tokenizing hostnames that
     79   // will be ignored.
     80   void SkipRestOfLine() {
     81     pos_ = text_.find("\n", pos_);
     82   }
     83 
     84   // Returns whether the last-parsed token is an IP address (true) or a
     85   // hostname (false).
     86   bool token_is_ip() { return token_is_ip_; }
     87 
     88   // Returns the text of the last-parsed token as a StringPiece referencing
     89   // the same underlying memory as the StringPiece passed to the constructor.
     90   // Returns an empty StringPiece if no token has been parsed or the end of
     91   // the input string has been reached.
     92   const StringPiece& token() { return token_; }
     93 
     94  private:
     95   void SkipToken() {
     96     switch (comma_mode_) {
     97       case PARSE_HOSTS_COMMA_IS_TOKEN:
     98         pos_ = text_.find_first_of(" \t\n\r#", pos_);
     99         break;
    100       case PARSE_HOSTS_COMMA_IS_WHITESPACE:
    101         pos_ = text_.find_first_of(" ,\t\n\r#", pos_);
    102         break;
    103     }
    104   }
    105 
    106   void SkipWhitespace() {
    107     switch (comma_mode_) {
    108       case PARSE_HOSTS_COMMA_IS_TOKEN:
    109         pos_ = text_.find_first_not_of(" \t", pos_);
    110         break;
    111       case PARSE_HOSTS_COMMA_IS_WHITESPACE:
    112         pos_ = text_.find_first_not_of(" ,\t", pos_);
    113         break;
    114     }
    115   }
    116 
    117   const StringPiece text_;
    118   const char* data_;
    119   const size_t end_;
    120 
    121   size_t pos_;
    122   StringPiece token_;
    123   bool token_is_ip_;
    124 
    125   const ParseHostsCommaMode comma_mode_;
    126 
    127   DISALLOW_COPY_AND_ASSIGN(HostsParser);
    128 };
    129 
    130 void ParseHostsWithCommaMode(const std::string& contents,
    131                              DnsHosts* dns_hosts,
    132                              ParseHostsCommaMode comma_mode) {
    133   CHECK(dns_hosts);
    134   DnsHosts& hosts = *dns_hosts;
    135 
    136   StringPiece ip_text;
    137   IPAddressNumber ip;
    138   AddressFamily family = ADDRESS_FAMILY_IPV4;
    139   HostsParser parser(contents, comma_mode);
    140   while (parser.Advance()) {
    141     if (parser.token_is_ip()) {
    142       StringPiece new_ip_text = parser.token();
    143       // Some ad-blocking hosts files contain thousands of entries pointing to
    144       // the same IP address (usually 127.0.0.1).  Don't bother parsing the IP
    145       // again if it's the same as the one above it.
    146       if (new_ip_text != ip_text) {
    147         IPAddressNumber new_ip;
    148         if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
    149           ip_text = new_ip_text;
    150           ip.swap(new_ip);
    151           family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
    152         } else {
    153           parser.SkipRestOfLine();
    154         }
    155       }
    156     } else {
    157       DnsHostsKey key(parser.token().as_string(), family);
    158       base::StringToLowerASCII(&key.first);
    159       IPAddressNumber& mapped_ip = hosts[key];
    160       if (mapped_ip.empty())
    161         mapped_ip = ip;
    162       // else ignore this entry (first hit counts)
    163     }
    164   }
    165 }
    166 
    167 }  // namespace
    168 
    169 void ParseHostsWithCommaModeForTesting(const std::string& contents,
    170                                        DnsHosts* dns_hosts,
    171                                        ParseHostsCommaMode comma_mode) {
    172   ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
    173 }
    174 
    175 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
    176   ParseHostsCommaMode comma_mode;
    177 #if defined(OS_MACOSX)
    178   // Mac OS X allows commas to separate hostnames.
    179   comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE;
    180 #else
    181   // Linux allows commas in hostnames.
    182   comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN;
    183 #endif
    184 
    185   ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
    186 }
    187 
    188 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
    189   dns_hosts->clear();
    190   // Missing file indicates empty HOSTS.
    191   if (!base::PathExists(path))
    192     return true;
    193 
    194   int64 size;
    195   if (!base::GetFileSize(path, &size))
    196     return false;
    197 
    198   UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
    199 
    200   // Reject HOSTS files larger than |kMaxHostsSize| bytes.
    201   const int64 kMaxHostsSize = 1 << 25;  // 32MB
    202   if (size > kMaxHostsSize)
    203     return false;
    204 
    205   std::string contents;
    206   if (!base::ReadFileToString(path, &contents))
    207     return false;
    208 
    209   ParseHosts(contents, dns_hosts);
    210   return true;
    211 }
    212 
    213 }  // namespace net
    214 
    215