Home | History | Annotate | Download | only in dns
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/dns/dns_hosts.h"
      6 
      7 #include "base/file_util.h"
      8 #include "base/logging.h"
      9 #include "base/metrics/histogram.h"
     10 #include "base/strings/string_util.h"
     11 #include "base/strings/string_tokenizer.h"
     12 
     13 using base::StringPiece;
     14 
     15 namespace net {
     16 
     17 // Parses the contents of a hosts file.  Returns one token (IP or hostname) at
     18 // a time.  Doesn't copy anything; accepts the file as a StringPiece and
     19 // returns tokens as StringPieces.
     20 class HostsParser {
     21  public:
     22   explicit HostsParser(const StringPiece& text)
     23       : text_(text),
     24         data_(text.data()),
     25         end_(text.size()),
     26         pos_(0),
     27         token_(),
     28         token_is_ip_(false) {}
     29 
     30   // Advances to the next token (IP or hostname).  Returns whether another
     31   // token was available.  |token_is_ip| and |token| can be used to find out
     32   // the type and text of the token.
     33   bool Advance() {
     34     bool next_is_ip = (pos_ == 0);
     35     while (pos_ < end_ && pos_ != std::string::npos) {
     36       switch (text_[pos_]) {
     37         case ' ':
     38         case '\t':
     39           SkipWhitespace();
     40           break;
     41 
     42         case '\r':
     43         case '\n':
     44           next_is_ip = true;
     45           pos_++;
     46           break;
     47 
     48         case '#':
     49           SkipRestOfLine();
     50           break;
     51 
     52         default: {
     53           size_t token_start = pos_;
     54           SkipToken();
     55           size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
     56 
     57           token_ = StringPiece(data_ + token_start, token_end - token_start);
     58           token_is_ip_ = next_is_ip;
     59 
     60           return true;
     61         }
     62       }
     63     }
     64 
     65     text_ = StringPiece();
     66     return false;
     67   }
     68 
     69   // Fast-forwards the parser to the next line.  Should be called if an IP
     70   // address doesn't parse, to avoid wasting time tokenizing hostnames that
     71   // will be ignored.
     72   void SkipRestOfLine() {
     73     pos_ = text_.find("\n", pos_);
     74   }
     75 
     76   // Returns whether the last-parsed token is an IP address (true) or a
     77   // hostname (false).
     78   bool token_is_ip() { return token_is_ip_; }
     79 
     80   // Returns the text of the last-parsed token as a StringPiece referencing
     81   // the same underlying memory as the StringPiece passed to the constructor.
     82   // Returns an empty StringPiece if no token has been parsed or the end of
     83   // the input string has been reached.
     84   const StringPiece& token() { return token_; }
     85 
     86  private:
     87   void SkipToken() {
     88     pos_ = text_.find_first_of(" \t\n\r#", pos_);
     89   }
     90 
     91   void SkipWhitespace() {
     92     pos_ = text_.find_first_not_of(" \t", pos_);
     93   }
     94 
     95   StringPiece text_;
     96   const char* data_;
     97   const size_t end_;
     98 
     99   size_t pos_;
    100   StringPiece token_;
    101   bool token_is_ip_;
    102 
    103   DISALLOW_COPY_AND_ASSIGN(HostsParser);
    104 };
    105 
    106 
    107 
    108 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
    109   CHECK(dns_hosts);
    110   DnsHosts& hosts = *dns_hosts;
    111 
    112   StringPiece ip_text;
    113   IPAddressNumber ip;
    114   AddressFamily family = ADDRESS_FAMILY_IPV4;
    115   HostsParser parser(contents);
    116   while (parser.Advance()) {
    117     if (parser.token_is_ip()) {
    118       StringPiece new_ip_text = parser.token();
    119       // Some ad-blocking hosts files contain thousands of entries pointing to
    120       // the same IP address (usually 127.0.0.1).  Don't bother parsing the IP
    121       // again if it's the same as the one above it.
    122       if (new_ip_text != ip_text) {
    123         IPAddressNumber new_ip;
    124         if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
    125           ip_text = new_ip_text;
    126           ip.swap(new_ip);
    127           family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
    128         } else {
    129           parser.SkipRestOfLine();
    130         }
    131       }
    132     } else {
    133       DnsHostsKey key(parser.token().as_string(), family);
    134       StringToLowerASCII(&key.first);
    135       IPAddressNumber& mapped_ip = hosts[key];
    136       if (mapped_ip.empty())
    137         mapped_ip = ip;
    138       // else ignore this entry (first hit counts)
    139     }
    140   }
    141 }
    142 
    143 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
    144   dns_hosts->clear();
    145   // Missing file indicates empty HOSTS.
    146   if (!base::PathExists(path))
    147     return true;
    148 
    149   int64 size;
    150   if (!base::GetFileSize(path, &size))
    151     return false;
    152 
    153   UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
    154 
    155   // Reject HOSTS files larger than |kMaxHostsSize| bytes.
    156   const int64 kMaxHostsSize = 1 << 25;  // 32MB
    157   if (size > kMaxHostsSize)
    158     return false;
    159 
    160   std::string contents;
    161   if (!base::ReadFileToString(path, &contents))
    162     return false;
    163 
    164   ParseHosts(contents, dns_hosts);
    165   return true;
    166 }
    167 
    168 }  // namespace net
    169 
    170