Home | History | Annotate | Download | only in shill
      1 //
      2 // Copyright (C) 2013 The Android Open Source Project
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 
     17 #include "shill/traffic_monitor.h"
     18 
     19 #include <base/bind.h>
     20 #include <base/strings/stringprintf.h>
     21 #include <netinet/in.h>
     22 
     23 #include "shill/device.h"
     24 #include "shill/device_info.h"
     25 #include "shill/event_dispatcher.h"
     26 #include "shill/logging.h"
     27 #include "shill/socket_info_reader.h"
     28 
     29 using base::StringPrintf;
     30 using std::string;
     31 using std::vector;
     32 
     33 namespace shill {
     34 
     35 namespace Logging {
     36 static auto kModuleLogScope = ScopeLogger::kLink;
     37 static string ObjectID(Device* d) { return d->link_name(); }
     38 }
     39 
     40 // static
     41 const uint16_t TrafficMonitor::kDnsPort = 53;
     42 const int64_t TrafficMonitor::kDnsTimedOutThresholdSeconds = 15;
     43 const int TrafficMonitor::kMinimumFailedSamplesToTrigger = 2;
     44 const int64_t TrafficMonitor::kSamplingIntervalMilliseconds = 5000;
     45 
     46 TrafficMonitor::TrafficMonitor(const DeviceRefPtr& device,
     47                                EventDispatcher* dispatcher)
     48     : device_(device),
     49       dispatcher_(dispatcher),
     50       socket_info_reader_(new SocketInfoReader),
     51       accummulated_congested_tx_queues_samples_(0),
     52       connection_info_reader_(new ConnectionInfoReader),
     53       accummulated_dns_failures_samples_(0) {
     54 }
     55 
     56 TrafficMonitor::~TrafficMonitor() {
     57   Stop();
     58 }
     59 
     60 void TrafficMonitor::Start() {
     61   SLOG(device_.get(), 2) << __func__;
     62   Stop();
     63 
     64   sample_traffic_callback_.Reset(base::Bind(&TrafficMonitor::SampleTraffic,
     65                                             base::Unretained(this)));
     66   dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(),
     67                                kSamplingIntervalMilliseconds);
     68 }
     69 
     70 void TrafficMonitor::Stop() {
     71   SLOG(device_.get(), 2) << __func__;
     72   sample_traffic_callback_.Cancel();
     73   ResetCongestedTxQueuesStats();
     74   ResetDnsFailingStats();
     75 }
     76 
     77 void TrafficMonitor::ResetCongestedTxQueuesStats() {
     78   accummulated_congested_tx_queues_samples_ = 0;
     79 }
     80 
     81 void TrafficMonitor::ResetCongestedTxQueuesStatsWithLogging() {
     82   SLOG(device_.get(), 2) << __func__ << ": Tx-queues decongested";
     83   ResetCongestedTxQueuesStats();
     84 }
     85 
     86 void TrafficMonitor::BuildIPPortToTxQueueLength(
     87     const vector<SocketInfo>& socket_infos,
     88     IPPortToTxQueueLengthMap* tx_queue_lengths) {
     89   SLOG(device_.get(), 3) << __func__;
     90   string device_ip_address = device_->ipconfig()->properties().address;
     91   for (const auto& info : socket_infos) {
     92     SLOG(device_.get(), 4) << "SocketInfo(IP="
     93                            << info.local_ip_address().ToString()
     94                            << ", TX=" << info.transmit_queue_value()
     95                            << ", State=" << info.connection_state()
     96                            << ", TimerState=" << info.timer_state();
     97     if (info.local_ip_address().ToString() != device_ip_address ||
     98         info.transmit_queue_value() == 0 ||
     99         info.connection_state() != SocketInfo::kConnectionStateEstablished ||
    100         (info.timer_state() != SocketInfo::kTimerStateRetransmitTimerPending &&
    101          info.timer_state() !=
    102             SocketInfo::kTimerStateZeroWindowProbeTimerPending)) {
    103       SLOG(device_.get(), 4) << "Connection Filtered.";
    104       continue;
    105     }
    106     SLOG(device_.get(), 3) << "Monitoring connection: TX="
    107                            << info.transmit_queue_value()
    108                            << " TimerState=" << info.timer_state();
    109 
    110     string local_ip_port =
    111         StringPrintf("%s:%d",
    112                      info.local_ip_address().ToString().c_str(),
    113                      info.local_port());
    114     (*tx_queue_lengths)[local_ip_port] = info.transmit_queue_value();
    115   }
    116 }
    117 
    118 bool TrafficMonitor::IsCongestedTxQueues() {
    119   SLOG(device_.get(), 4) << __func__;
    120   vector<SocketInfo> socket_infos;
    121   if (!socket_info_reader_->LoadTcpSocketInfo(&socket_infos) ||
    122       socket_infos.empty()) {
    123     SLOG(device_.get(), 3) << __func__ << ": Empty socket info";
    124     ResetCongestedTxQueuesStatsWithLogging();
    125     return false;
    126   }
    127   bool congested_tx_queues = true;
    128   IPPortToTxQueueLengthMap curr_tx_queue_lengths;
    129   BuildIPPortToTxQueueLength(socket_infos, &curr_tx_queue_lengths);
    130   if (curr_tx_queue_lengths.empty()) {
    131     SLOG(device_.get(), 3) << __func__ << ": No interesting socket info";
    132     ResetCongestedTxQueuesStatsWithLogging();
    133   } else {
    134     for (const auto& length_entry : old_tx_queue_lengths_) {
    135       IPPortToTxQueueLengthMap::iterator curr_tx_queue_it =
    136           curr_tx_queue_lengths.find(length_entry.first);
    137       if (curr_tx_queue_it == curr_tx_queue_lengths.end() ||
    138           curr_tx_queue_it->second < length_entry.second) {
    139         congested_tx_queues = false;
    140         // TODO(armansito): If we had a false positive earlier, we may
    141         // want to correct it here by invoking a "connection back to normal
    142         // callback", so that the OutOfCredits property can be set to
    143         // false.
    144         break;
    145       }
    146     }
    147     if (congested_tx_queues) {
    148       ++accummulated_congested_tx_queues_samples_;
    149       SLOG(device_.get(), 2) << __func__
    150                              << ": Congested tx-queues detected ("
    151                              << accummulated_congested_tx_queues_samples_
    152                              << ")";
    153     }
    154   }
    155   old_tx_queue_lengths_ = curr_tx_queue_lengths;
    156 
    157   return congested_tx_queues;
    158 }
    159 
    160 void TrafficMonitor::ResetDnsFailingStats() {
    161   accummulated_dns_failures_samples_ = 0;
    162 }
    163 
    164 void TrafficMonitor::ResetDnsFailingStatsWithLogging() {
    165   SLOG(device_.get(), 2) << __func__ << ": DNS queries restored";
    166   ResetDnsFailingStats();
    167 }
    168 
    169 bool TrafficMonitor::IsDnsFailing() {
    170   SLOG(device_.get(), 4) << __func__;
    171   vector<ConnectionInfo> connection_infos;
    172   if (!connection_info_reader_->LoadConnectionInfo(&connection_infos) ||
    173       connection_infos.empty()) {
    174     SLOG(device_.get(), 3) << __func__ << ": Empty connection info";
    175   } else {
    176     // The time-to-expire counter is used to determine when a DNS request
    177     // has timed out.  This counter is the number of seconds remaining until
    178     // the entry is removed from the system IP connection tracker.  The
    179     // default time is 30 seconds.  This is too long of a wait.  Instead, we
    180     // want to time out at |kDnsTimedOutThresholdSeconds|.  Unfortunately,
    181     // we cannot simply look for entries less than
    182     // |kDnsTimedOutThresholdSeconds| because we will count the entry
    183     // multiple times once its time-to-expire is less than
    184     // |kDnsTimedOutThresholdSeconds|.  To ensure that we only count an
    185     // entry once, we look for entries in this time window between
    186     // |kDnsTimedOutThresholdSeconds| and |kDnsTimedOutLowerThresholdSeconds|.
    187     const int64_t kDnsTimedOutLowerThresholdSeconds =
    188         kDnsTimedOutThresholdSeconds - kSamplingIntervalMilliseconds / 1000;
    189     string device_ip_address = device_->ipconfig()->properties().address;
    190     for (const auto& info : connection_infos) {
    191       if (info.protocol() != IPPROTO_UDP ||
    192           info.time_to_expire_seconds() > kDnsTimedOutThresholdSeconds ||
    193           info.time_to_expire_seconds() <= kDnsTimedOutLowerThresholdSeconds ||
    194           !info.is_unreplied() ||
    195           info.original_source_ip_address().ToString() != device_ip_address ||
    196           info.original_destination_port() != kDnsPort)
    197         continue;
    198 
    199       ++accummulated_dns_failures_samples_;
    200       SLOG(device_.get(), 2) << __func__
    201                              << ": DNS failures detected ("
    202                              << accummulated_dns_failures_samples_ << ")";
    203       return true;
    204     }
    205   }
    206   ResetDnsFailingStatsWithLogging();
    207   return false;
    208 }
    209 
    210 void TrafficMonitor::SampleTraffic() {
    211   SLOG(device_.get(), 3) << __func__;
    212 
    213   // Schedule the sample callback first, so it is possible for the network
    214   // problem callback to stop the traffic monitor.
    215   dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(),
    216                                kSamplingIntervalMilliseconds);
    217 
    218   if (IsCongestedTxQueues() &&
    219       accummulated_congested_tx_queues_samples_ ==
    220           kMinimumFailedSamplesToTrigger) {
    221     LOG(WARNING) << "Congested tx queues detected, out-of-credits?";
    222     network_problem_detected_callback_.Run(kNetworkProblemCongestedTxQueue);
    223   } else if (IsDnsFailing() &&
    224              accummulated_dns_failures_samples_ ==
    225                  kMinimumFailedSamplesToTrigger) {
    226     LOG(WARNING) << "DNS queries failing, out-of-credits?";
    227     network_problem_detected_callback_.Run(kNetworkProblemDNSFailure);
    228   }
    229 }
    230 
    231 }  // namespace shill
    232