1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/dns/dns_session.h" 6 7 #include "base/basictypes.h" 8 #include "base/bind.h" 9 #include "base/lazy_instance.h" 10 #include "base/metrics/histogram.h" 11 #include "base/metrics/sample_vector.h" 12 #include "base/rand_util.h" 13 #include "base/stl_util.h" 14 #include "base/time/time.h" 15 #include "net/base/ip_endpoint.h" 16 #include "net/base/net_errors.h" 17 #include "net/dns/dns_config_service.h" 18 #include "net/dns/dns_socket_pool.h" 19 #include "net/socket/stream_socket.h" 20 #include "net/udp/datagram_client_socket.h" 21 22 namespace net { 23 24 namespace { 25 // Never exceed max timeout. 26 const unsigned kMaxTimeoutMs = 5000; 27 // Set min timeout, in case we are talking to a local DNS proxy. 28 const unsigned kMinTimeoutMs = 10; 29 30 // Number of buckets in the histogram of observed RTTs. 31 const size_t kRTTBucketCount = 100; 32 // Target percentile in the RTT histogram used for retransmission timeout. 33 const unsigned kRTOPercentile = 99; 34 } // namespace 35 36 // Runtime statistics of DNS server. 37 struct DnsSession::ServerStats { 38 ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets) 39 : last_failure_count(0), rtt_estimate(rtt_estimate_param) { 40 rtt_histogram.reset(new base::SampleVector(buckets)); 41 // Seed histogram with 2 samples at |rtt_estimate| timeout. 42 rtt_histogram->Accumulate(rtt_estimate.InMilliseconds(), 2); 43 } 44 45 // Count of consecutive failures after last success. 46 int last_failure_count; 47 48 // Last time when server returned failure or timeout. 49 base::Time last_failure; 50 // Last time when server returned success. 51 base::Time last_success; 52 53 // Estimated RTT using moving average. 54 base::TimeDelta rtt_estimate; 55 // Estimated error in the above. 56 base::TimeDelta rtt_deviation; 57 58 // A histogram of observed RTT . 59 scoped_ptr<base::SampleVector> rtt_histogram; 60 61 DISALLOW_COPY_AND_ASSIGN(ServerStats); 62 }; 63 64 // static 65 base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ = 66 LAZY_INSTANCE_INITIALIZER; 67 68 DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) { 69 base::Histogram::InitializeBucketRanges(1, 5000, this); 70 } 71 72 DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session, 73 unsigned server_index, 74 scoped_ptr<DatagramClientSocket> socket) 75 : session_(session), server_index_(server_index), socket_(socket.Pass()) {} 76 77 DnsSession::SocketLease::~SocketLease() { 78 session_->FreeSocket(server_index_, socket_.Pass()); 79 } 80 81 DnsSession::DnsSession(const DnsConfig& config, 82 scoped_ptr<DnsSocketPool> socket_pool, 83 const RandIntCallback& rand_int_callback, 84 NetLog* net_log) 85 : config_(config), 86 socket_pool_(socket_pool.Pass()), 87 rand_callback_(base::Bind(rand_int_callback, 0, kuint16max)), 88 net_log_(net_log), 89 server_index_(0) { 90 socket_pool_->Initialize(&config_.nameservers, net_log); 91 UMA_HISTOGRAM_CUSTOM_COUNTS( 92 "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 10); 93 for (size_t i = 0; i < config_.nameservers.size(); ++i) { 94 server_stats_.push_back(new ServerStats(config_.timeout, 95 rtt_buckets_.Pointer())); 96 } 97 } 98 99 DnsSession::~DnsSession() { 100 RecordServerStats(); 101 } 102 103 int DnsSession::NextQueryId() const { return rand_callback_.Run(); } 104 105 unsigned DnsSession::NextFirstServerIndex() { 106 unsigned index = NextGoodServerIndex(server_index_); 107 if (config_.rotate) 108 server_index_ = (server_index_ + 1) % config_.nameservers.size(); 109 return index; 110 } 111 112 unsigned DnsSession::NextGoodServerIndex(unsigned server_index) { 113 unsigned index = server_index; 114 base::Time oldest_server_failure(base::Time::Now()); 115 unsigned oldest_server_failure_index = 0; 116 117 UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood", 118 server_stats_[server_index]->last_failure.is_null()); 119 120 do { 121 base::Time cur_server_failure = server_stats_[index]->last_failure; 122 // If number of failures on this server doesn't exceed number of allowed 123 // attempts, return its index. 124 if (server_stats_[server_index]->last_failure_count < config_.attempts) { 125 return index; 126 } 127 // Track oldest failed server. 128 if (cur_server_failure < oldest_server_failure) { 129 oldest_server_failure = cur_server_failure; 130 oldest_server_failure_index = index; 131 } 132 index = (index + 1) % config_.nameservers.size(); 133 } while (index != server_index); 134 135 // If we are here it means that there are no successful servers, so we have 136 // to use one that has failed oldest. 137 return oldest_server_failure_index; 138 } 139 140 void DnsSession::RecordServerFailure(unsigned server_index) { 141 UMA_HISTOGRAM_CUSTOM_COUNTS( 142 "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 10); 143 ++(server_stats_[server_index]->last_failure_count); 144 server_stats_[server_index]->last_failure = base::Time::Now(); 145 } 146 147 void DnsSession::RecordServerSuccess(unsigned server_index) { 148 if (server_stats_[server_index]->last_success.is_null()) { 149 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange", 150 server_stats_[server_index]->last_failure_count); 151 } else { 152 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess", 153 server_stats_[server_index]->last_failure_count); 154 } 155 server_stats_[server_index]->last_failure_count = 0; 156 server_stats_[server_index]->last_failure = base::Time(); 157 server_stats_[server_index]->last_success = base::Time::Now(); 158 } 159 160 void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) { 161 DCHECK_LT(server_index, server_stats_.size()); 162 163 // For measurement, assume it is the first attempt (no backoff). 164 base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0); 165 base::TimeDelta timeout_histogram = NextTimeoutFromHistogram(server_index, 0); 166 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt - timeout_jacobson); 167 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram", 168 rtt - timeout_histogram); 169 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder", 170 timeout_jacobson - rtt); 171 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder", 172 timeout_histogram - rtt); 173 174 // Jacobson/Karels algorithm for TCP. 175 // Using parameters: alpha = 1/8, delta = 1/4, beta = 4 176 base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate; 177 base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation; 178 base::TimeDelta current_error = rtt - estimate; 179 estimate += current_error / 8; // * alpha 180 base::TimeDelta abs_error = base::TimeDelta::FromInternalValue( 181 std::abs(current_error.ToInternalValue())); 182 deviation += (abs_error - deviation) / 4; // * delta 183 184 // Histogram-based method. 185 server_stats_[server_index]->rtt_histogram 186 ->Accumulate(rtt.InMilliseconds(), 1); 187 } 188 189 void DnsSession::RecordLostPacket(unsigned server_index, int attempt) { 190 base::TimeDelta timeout_jacobson = 191 NextTimeoutFromJacobson(server_index, attempt); 192 base::TimeDelta timeout_histogram = 193 NextTimeoutFromHistogram(server_index, attempt); 194 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson); 195 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram); 196 } 197 198 void DnsSession::RecordServerStats() { 199 for (size_t index = 0; index < server_stats_.size(); ++index) { 200 if (server_stats_[index]->last_failure_count) { 201 if (server_stats_[index]->last_success.is_null()) { 202 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess", 203 server_stats_[index]->last_failure_count); 204 } else { 205 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess", 206 server_stats_[index]->last_failure_count); 207 } 208 } 209 } 210 } 211 212 213 base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) { 214 // Respect config timeout if it exceeds |kMaxTimeoutMs|. 215 if (config_.timeout.InMilliseconds() >= kMaxTimeoutMs) 216 return config_.timeout; 217 return NextTimeoutFromHistogram(server_index, attempt); 218 } 219 220 // Allocate a socket, already connected to the server address. 221 scoped_ptr<DnsSession::SocketLease> DnsSession::AllocateSocket( 222 unsigned server_index, const NetLog::Source& source) { 223 scoped_ptr<DatagramClientSocket> socket; 224 225 socket = socket_pool_->AllocateSocket(server_index); 226 if (!socket.get()) 227 return scoped_ptr<SocketLease>(); 228 229 socket->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE, 230 source.ToEventParametersCallback()); 231 232 SocketLease* lease = new SocketLease(this, server_index, socket.Pass()); 233 return scoped_ptr<SocketLease>(lease); 234 } 235 236 scoped_ptr<StreamSocket> DnsSession::CreateTCPSocket( 237 unsigned server_index, const NetLog::Source& source) { 238 return socket_pool_->CreateTCPSocket(server_index, source); 239 } 240 241 // Release a socket. 242 void DnsSession::FreeSocket(unsigned server_index, 243 scoped_ptr<DatagramClientSocket> socket) { 244 DCHECK(socket.get()); 245 246 socket->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE); 247 248 socket_pool_->FreeSocket(server_index, socket.Pass()); 249 } 250 251 base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index, 252 int attempt) { 253 DCHECK_LT(server_index, server_stats_.size()); 254 255 base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate + 256 4 * server_stats_[server_index]->rtt_deviation; 257 258 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs)); 259 260 // The timeout doubles every full round. 261 unsigned num_backoffs = attempt / config_.nameservers.size(); 262 263 return std::min(timeout * (1 << num_backoffs), 264 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs)); 265 } 266 267 base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index, 268 int attempt) { 269 DCHECK_LT(server_index, server_stats_.size()); 270 271 COMPILE_ASSERT(std::numeric_limits<base::HistogramBase::Count>::is_signed, 272 histogram_base_count_assumed_to_be_signed); 273 274 // Use fixed percentile of observed samples. 275 const base::SampleVector& samples = 276 *server_stats_[server_index]->rtt_histogram; 277 278 base::HistogramBase::Count total = samples.TotalCount(); 279 base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100; 280 size_t index = 0; 281 while (remaining_count > 0 && index < rtt_buckets_.Get().size()) { 282 remaining_count -= samples.GetCountAtIndex(index); 283 ++index; 284 } 285 286 base::TimeDelta timeout = 287 base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index)); 288 289 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs)); 290 291 // The timeout still doubles every full round. 292 unsigned num_backoffs = attempt / config_.nameservers.size(); 293 294 return std::min(timeout * (1 << num_backoffs), 295 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs)); 296 } 297 298 } // namespace net 299