Home | History | Annotate | Download | only in dns
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/dns/dns_session.h"
      6 
      7 #include "base/basictypes.h"
      8 #include "base/bind.h"
      9 #include "base/lazy_instance.h"
     10 #include "base/metrics/histogram.h"
     11 #include "base/metrics/sample_vector.h"
     12 #include "base/rand_util.h"
     13 #include "base/stl_util.h"
     14 #include "base/time/time.h"
     15 #include "net/base/ip_endpoint.h"
     16 #include "net/base/net_errors.h"
     17 #include "net/dns/dns_config_service.h"
     18 #include "net/dns/dns_socket_pool.h"
     19 #include "net/socket/stream_socket.h"
     20 #include "net/udp/datagram_client_socket.h"
     21 
     22 namespace net {
     23 
     24 namespace {
     25 // Never exceed max timeout.
     26 const unsigned kMaxTimeoutMs = 5000;
     27 // Set min timeout, in case we are talking to a local DNS proxy.
     28 const unsigned kMinTimeoutMs = 10;
     29 
     30 // Number of buckets in the histogram of observed RTTs.
     31 const size_t kRTTBucketCount = 100;
     32 // Target percentile in the RTT histogram used for retransmission timeout.
     33 const unsigned kRTOPercentile = 99;
     34 }  // namespace
     35 
     36 // Runtime statistics of DNS server.
     37 struct DnsSession::ServerStats {
     38   ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets)
     39     : last_failure_count(0), rtt_estimate(rtt_estimate_param) {
     40     rtt_histogram.reset(new base::SampleVector(buckets));
     41     // Seed histogram with 2 samples at |rtt_estimate| timeout.
     42     rtt_histogram->Accumulate(rtt_estimate.InMilliseconds(), 2);
     43   }
     44 
     45   // Count of consecutive failures after last success.
     46   int last_failure_count;
     47 
     48   // Last time when server returned failure or timeout.
     49   base::Time last_failure;
     50   // Last time when server returned success.
     51   base::Time last_success;
     52 
     53   // Estimated RTT using moving average.
     54   base::TimeDelta rtt_estimate;
     55   // Estimated error in the above.
     56   base::TimeDelta rtt_deviation;
     57 
     58   // A histogram of observed RTT .
     59   scoped_ptr<base::SampleVector> rtt_histogram;
     60 
     61   DISALLOW_COPY_AND_ASSIGN(ServerStats);
     62 };
     63 
     64 // static
     65 base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ =
     66     LAZY_INSTANCE_INITIALIZER;
     67 
     68 DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) {
     69   base::Histogram::InitializeBucketRanges(1, 5000, this);
     70 }
     71 
     72 DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session,
     73                                      unsigned server_index,
     74                                      scoped_ptr<DatagramClientSocket> socket)
     75     : session_(session), server_index_(server_index), socket_(socket.Pass()) {}
     76 
     77 DnsSession::SocketLease::~SocketLease() {
     78   session_->FreeSocket(server_index_, socket_.Pass());
     79 }
     80 
     81 DnsSession::DnsSession(const DnsConfig& config,
     82                        scoped_ptr<DnsSocketPool> socket_pool,
     83                        const RandIntCallback& rand_int_callback,
     84                        NetLog* net_log)
     85     : config_(config),
     86       socket_pool_(socket_pool.Pass()),
     87       rand_callback_(base::Bind(rand_int_callback, 0, kuint16max)),
     88       net_log_(net_log),
     89       server_index_(0) {
     90   socket_pool_->Initialize(&config_.nameservers, net_log);
     91   UMA_HISTOGRAM_CUSTOM_COUNTS(
     92       "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 11);
     93   for (size_t i = 0; i < config_.nameservers.size(); ++i) {
     94     server_stats_.push_back(new ServerStats(config_.timeout,
     95                                             rtt_buckets_.Pointer()));
     96   }
     97 }
     98 
     99 DnsSession::~DnsSession() {
    100   RecordServerStats();
    101 }
    102 
    103 int DnsSession::NextQueryId() const { return rand_callback_.Run(); }
    104 
    105 unsigned DnsSession::NextFirstServerIndex() {
    106   unsigned index = NextGoodServerIndex(server_index_);
    107   if (config_.rotate)
    108     server_index_ = (server_index_ + 1) % config_.nameservers.size();
    109   return index;
    110 }
    111 
    112 unsigned DnsSession::NextGoodServerIndex(unsigned server_index) {
    113   unsigned index = server_index;
    114   base::Time oldest_server_failure(base::Time::Now());
    115   unsigned oldest_server_failure_index = 0;
    116 
    117   UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood",
    118                         server_stats_[server_index]->last_failure.is_null());
    119 
    120   do {
    121     base::Time cur_server_failure = server_stats_[index]->last_failure;
    122     // If number of failures on this server doesn't exceed number of allowed
    123     // attempts, return its index.
    124     if (server_stats_[server_index]->last_failure_count < config_.attempts) {
    125       return index;
    126     }
    127     // Track oldest failed server.
    128     if (cur_server_failure < oldest_server_failure) {
    129       oldest_server_failure = cur_server_failure;
    130       oldest_server_failure_index = index;
    131     }
    132     index = (index + 1) % config_.nameservers.size();
    133   } while (index != server_index);
    134 
    135   // If we are here it means that there are no successful servers, so we have
    136   // to use one that has failed oldest.
    137   return oldest_server_failure_index;
    138 }
    139 
    140 void DnsSession::RecordServerFailure(unsigned server_index) {
    141   UMA_HISTOGRAM_CUSTOM_COUNTS(
    142       "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 11);
    143   ++(server_stats_[server_index]->last_failure_count);
    144   server_stats_[server_index]->last_failure = base::Time::Now();
    145 }
    146 
    147 void DnsSession::RecordServerSuccess(unsigned server_index) {
    148   if (server_stats_[server_index]->last_success.is_null()) {
    149     UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange",
    150                            server_stats_[server_index]->last_failure_count);
    151   } else {
    152     UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess",
    153                            server_stats_[server_index]->last_failure_count);
    154   }
    155   server_stats_[server_index]->last_failure_count = 0;
    156   server_stats_[server_index]->last_failure = base::Time();
    157   server_stats_[server_index]->last_success = base::Time::Now();
    158 }
    159 
    160 void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) {
    161   DCHECK_LT(server_index, server_stats_.size());
    162 
    163   // For measurement, assume it is the first attempt (no backoff).
    164   base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0);
    165   base::TimeDelta timeout_histogram = NextTimeoutFromHistogram(server_index, 0);
    166   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt - timeout_jacobson);
    167   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram",
    168                       rtt - timeout_histogram);
    169   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder",
    170                       timeout_jacobson - rtt);
    171   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder",
    172                       timeout_histogram - rtt);
    173 
    174   // Jacobson/Karels algorithm for TCP.
    175   // Using parameters: alpha = 1/8, delta = 1/4, beta = 4
    176   base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate;
    177   base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation;
    178   base::TimeDelta current_error = rtt - estimate;
    179   estimate += current_error / 8;  // * alpha
    180   base::TimeDelta abs_error = base::TimeDelta::FromInternalValue(
    181       std::abs(current_error.ToInternalValue()));
    182   deviation += (abs_error - deviation) / 4;  // * delta
    183 
    184   // Histogram-based method.
    185   server_stats_[server_index]->rtt_histogram
    186       ->Accumulate(rtt.InMilliseconds(), 1);
    187 }
    188 
    189 void DnsSession::RecordLostPacket(unsigned server_index, int attempt) {
    190   base::TimeDelta timeout_jacobson =
    191       NextTimeoutFromJacobson(server_index, attempt);
    192   base::TimeDelta timeout_histogram =
    193       NextTimeoutFromHistogram(server_index, attempt);
    194   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson);
    195   UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram);
    196 }
    197 
    198 void DnsSession::RecordServerStats() {
    199   for (size_t index = 0; index < server_stats_.size(); ++index) {
    200     if (server_stats_[index]->last_failure_count) {
    201       if (server_stats_[index]->last_success.is_null()) {
    202         UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess",
    203                              server_stats_[index]->last_failure_count);
    204       } else {
    205         UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess",
    206                              server_stats_[index]->last_failure_count);
    207       }
    208     }
    209   }
    210 }
    211 
    212 
    213 base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) {
    214   // Respect config timeout if it exceeds |kMaxTimeoutMs|.
    215   if (config_.timeout.InMilliseconds() >= kMaxTimeoutMs)
    216     return config_.timeout;
    217   return NextTimeoutFromHistogram(server_index, attempt);
    218 }
    219 
    220 // Allocate a socket, already connected to the server address.
    221 scoped_ptr<DnsSession::SocketLease> DnsSession::AllocateSocket(
    222     unsigned server_index, const NetLog::Source& source) {
    223   scoped_ptr<DatagramClientSocket> socket;
    224 
    225   socket = socket_pool_->AllocateSocket(server_index);
    226   if (!socket.get())
    227     return scoped_ptr<SocketLease>();
    228 
    229   socket->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE,
    230                               source.ToEventParametersCallback());
    231 
    232   SocketLease* lease = new SocketLease(this, server_index, socket.Pass());
    233   return scoped_ptr<SocketLease>(lease);
    234 }
    235 
    236 scoped_ptr<StreamSocket> DnsSession::CreateTCPSocket(
    237     unsigned server_index, const NetLog::Source& source) {
    238   return socket_pool_->CreateTCPSocket(server_index, source);
    239 }
    240 
    241 // Release a socket.
    242 void DnsSession::FreeSocket(unsigned server_index,
    243                             scoped_ptr<DatagramClientSocket> socket) {
    244   DCHECK(socket.get());
    245 
    246   socket->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE);
    247 
    248   socket_pool_->FreeSocket(server_index, socket.Pass());
    249 }
    250 
    251 base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index,
    252                                                     int attempt) {
    253   DCHECK_LT(server_index, server_stats_.size());
    254 
    255   base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate +
    256                             4 * server_stats_[server_index]->rtt_deviation;
    257 
    258   timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
    259 
    260   // The timeout doubles every full round.
    261   unsigned num_backoffs = attempt / config_.nameservers.size();
    262 
    263   return std::min(timeout * (1 << num_backoffs),
    264                   base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
    265 }
    266 
    267 base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index,
    268                                                      int attempt) {
    269   DCHECK_LT(server_index, server_stats_.size());
    270 
    271   COMPILE_ASSERT(std::numeric_limits<base::HistogramBase::Count>::is_signed,
    272                  histogram_base_count_assumed_to_be_signed);
    273 
    274   // Use fixed percentile of observed samples.
    275   const base::SampleVector& samples =
    276       *server_stats_[server_index]->rtt_histogram;
    277 
    278   base::HistogramBase::Count total = samples.TotalCount();
    279   base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100;
    280   size_t index = 0;
    281   while (remaining_count > 0 && index < rtt_buckets_.Get().size()) {
    282     remaining_count -= samples.GetCountAtIndex(index);
    283     ++index;
    284   }
    285 
    286   base::TimeDelta timeout =
    287       base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index));
    288 
    289   timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
    290 
    291   // The timeout still doubles every full round.
    292   unsigned num_backoffs = attempt / config_.nameservers.size();
    293 
    294   return std::min(timeout * (1 << num_backoffs),
    295                   base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
    296 }
    297 
    298 }  // namespace net
    299