1 // 2 // Copyright (C) 2013 The Android Open Source Project 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 #include "shill/traffic_monitor.h" 18 19 #include <base/bind.h> 20 #include <base/strings/stringprintf.h> 21 #include <netinet/in.h> 22 23 #include "shill/device.h" 24 #include "shill/device_info.h" 25 #include "shill/event_dispatcher.h" 26 #include "shill/logging.h" 27 #include "shill/socket_info_reader.h" 28 29 using base::StringPrintf; 30 using std::string; 31 using std::vector; 32 33 namespace shill { 34 35 namespace Logging { 36 static auto kModuleLogScope = ScopeLogger::kLink; 37 static string ObjectID(Device* d) { return d->link_name(); } 38 } 39 40 // static 41 const uint16_t TrafficMonitor::kDnsPort = 53; 42 const int64_t TrafficMonitor::kDnsTimedOutThresholdSeconds = 15; 43 const int TrafficMonitor::kMinimumFailedSamplesToTrigger = 2; 44 const int64_t TrafficMonitor::kSamplingIntervalMilliseconds = 5000; 45 46 TrafficMonitor::TrafficMonitor(const DeviceRefPtr& device, 47 EventDispatcher* dispatcher) 48 : device_(device), 49 dispatcher_(dispatcher), 50 socket_info_reader_(new SocketInfoReader), 51 accummulated_congested_tx_queues_samples_(0), 52 connection_info_reader_(new ConnectionInfoReader), 53 accummulated_dns_failures_samples_(0) { 54 } 55 56 TrafficMonitor::~TrafficMonitor() { 57 Stop(); 58 } 59 60 void TrafficMonitor::Start() { 61 SLOG(device_.get(), 2) << __func__; 62 Stop(); 63 64 sample_traffic_callback_.Reset(base::Bind(&TrafficMonitor::SampleTraffic, 65 base::Unretained(this))); 66 dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(), 67 kSamplingIntervalMilliseconds); 68 } 69 70 void TrafficMonitor::Stop() { 71 SLOG(device_.get(), 2) << __func__; 72 sample_traffic_callback_.Cancel(); 73 ResetCongestedTxQueuesStats(); 74 ResetDnsFailingStats(); 75 } 76 77 void TrafficMonitor::ResetCongestedTxQueuesStats() { 78 accummulated_congested_tx_queues_samples_ = 0; 79 } 80 81 void TrafficMonitor::ResetCongestedTxQueuesStatsWithLogging() { 82 SLOG(device_.get(), 2) << __func__ << ": Tx-queues decongested"; 83 ResetCongestedTxQueuesStats(); 84 } 85 86 void TrafficMonitor::BuildIPPortToTxQueueLength( 87 const vector<SocketInfo>& socket_infos, 88 IPPortToTxQueueLengthMap* tx_queue_lengths) { 89 SLOG(device_.get(), 3) << __func__; 90 string device_ip_address = device_->ipconfig()->properties().address; 91 for (const auto& info : socket_infos) { 92 SLOG(device_.get(), 4) << "SocketInfo(IP=" 93 << info.local_ip_address().ToString() 94 << ", TX=" << info.transmit_queue_value() 95 << ", State=" << info.connection_state() 96 << ", TimerState=" << info.timer_state(); 97 if (info.local_ip_address().ToString() != device_ip_address || 98 info.transmit_queue_value() == 0 || 99 info.connection_state() != SocketInfo::kConnectionStateEstablished || 100 (info.timer_state() != SocketInfo::kTimerStateRetransmitTimerPending && 101 info.timer_state() != 102 SocketInfo::kTimerStateZeroWindowProbeTimerPending)) { 103 SLOG(device_.get(), 4) << "Connection Filtered."; 104 continue; 105 } 106 SLOG(device_.get(), 3) << "Monitoring connection: TX=" 107 << info.transmit_queue_value() 108 << " TimerState=" << info.timer_state(); 109 110 string local_ip_port = 111 StringPrintf("%s:%d", 112 info.local_ip_address().ToString().c_str(), 113 info.local_port()); 114 (*tx_queue_lengths)[local_ip_port] = info.transmit_queue_value(); 115 } 116 } 117 118 bool TrafficMonitor::IsCongestedTxQueues() { 119 SLOG(device_.get(), 4) << __func__; 120 vector<SocketInfo> socket_infos; 121 if (!socket_info_reader_->LoadTcpSocketInfo(&socket_infos) || 122 socket_infos.empty()) { 123 SLOG(device_.get(), 3) << __func__ << ": Empty socket info"; 124 ResetCongestedTxQueuesStatsWithLogging(); 125 return false; 126 } 127 bool congested_tx_queues = true; 128 IPPortToTxQueueLengthMap curr_tx_queue_lengths; 129 BuildIPPortToTxQueueLength(socket_infos, &curr_tx_queue_lengths); 130 if (curr_tx_queue_lengths.empty()) { 131 SLOG(device_.get(), 3) << __func__ << ": No interesting socket info"; 132 ResetCongestedTxQueuesStatsWithLogging(); 133 } else { 134 for (const auto& length_entry : old_tx_queue_lengths_) { 135 IPPortToTxQueueLengthMap::iterator curr_tx_queue_it = 136 curr_tx_queue_lengths.find(length_entry.first); 137 if (curr_tx_queue_it == curr_tx_queue_lengths.end() || 138 curr_tx_queue_it->second < length_entry.second) { 139 congested_tx_queues = false; 140 // TODO(armansito): If we had a false positive earlier, we may 141 // want to correct it here by invoking a "connection back to normal 142 // callback", so that the OutOfCredits property can be set to 143 // false. 144 break; 145 } 146 } 147 if (congested_tx_queues) { 148 ++accummulated_congested_tx_queues_samples_; 149 SLOG(device_.get(), 2) << __func__ 150 << ": Congested tx-queues detected (" 151 << accummulated_congested_tx_queues_samples_ 152 << ")"; 153 } 154 } 155 old_tx_queue_lengths_ = curr_tx_queue_lengths; 156 157 return congested_tx_queues; 158 } 159 160 void TrafficMonitor::ResetDnsFailingStats() { 161 accummulated_dns_failures_samples_ = 0; 162 } 163 164 void TrafficMonitor::ResetDnsFailingStatsWithLogging() { 165 SLOG(device_.get(), 2) << __func__ << ": DNS queries restored"; 166 ResetDnsFailingStats(); 167 } 168 169 bool TrafficMonitor::IsDnsFailing() { 170 SLOG(device_.get(), 4) << __func__; 171 vector<ConnectionInfo> connection_infos; 172 if (!connection_info_reader_->LoadConnectionInfo(&connection_infos) || 173 connection_infos.empty()) { 174 SLOG(device_.get(), 3) << __func__ << ": Empty connection info"; 175 } else { 176 // The time-to-expire counter is used to determine when a DNS request 177 // has timed out. This counter is the number of seconds remaining until 178 // the entry is removed from the system IP connection tracker. The 179 // default time is 30 seconds. This is too long of a wait. Instead, we 180 // want to time out at |kDnsTimedOutThresholdSeconds|. Unfortunately, 181 // we cannot simply look for entries less than 182 // |kDnsTimedOutThresholdSeconds| because we will count the entry 183 // multiple times once its time-to-expire is less than 184 // |kDnsTimedOutThresholdSeconds|. To ensure that we only count an 185 // entry once, we look for entries in this time window between 186 // |kDnsTimedOutThresholdSeconds| and |kDnsTimedOutLowerThresholdSeconds|. 187 const int64_t kDnsTimedOutLowerThresholdSeconds = 188 kDnsTimedOutThresholdSeconds - kSamplingIntervalMilliseconds / 1000; 189 string device_ip_address = device_->ipconfig()->properties().address; 190 for (const auto& info : connection_infos) { 191 if (info.protocol() != IPPROTO_UDP || 192 info.time_to_expire_seconds() > kDnsTimedOutThresholdSeconds || 193 info.time_to_expire_seconds() <= kDnsTimedOutLowerThresholdSeconds || 194 !info.is_unreplied() || 195 info.original_source_ip_address().ToString() != device_ip_address || 196 info.original_destination_port() != kDnsPort) 197 continue; 198 199 ++accummulated_dns_failures_samples_; 200 SLOG(device_.get(), 2) << __func__ 201 << ": DNS failures detected (" 202 << accummulated_dns_failures_samples_ << ")"; 203 return true; 204 } 205 } 206 ResetDnsFailingStatsWithLogging(); 207 return false; 208 } 209 210 void TrafficMonitor::SampleTraffic() { 211 SLOG(device_.get(), 3) << __func__; 212 213 // Schedule the sample callback first, so it is possible for the network 214 // problem callback to stop the traffic monitor. 215 dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(), 216 kSamplingIntervalMilliseconds); 217 218 if (IsCongestedTxQueues() && 219 accummulated_congested_tx_queues_samples_ == 220 kMinimumFailedSamplesToTrigger) { 221 LOG(WARNING) << "Congested tx queues detected, out-of-credits?"; 222 network_problem_detected_callback_.Run(kNetworkProblemCongestedTxQueue); 223 } else if (IsDnsFailing() && 224 accummulated_dns_failures_samples_ == 225 kMinimumFailedSamplesToTrigger) { 226 LOG(WARNING) << "DNS queries failing, out-of-credits?"; 227 network_problem_detected_callback_.Run(kNetworkProblemDNSFailure); 228 } 229 } 230 231 } // namespace shill 232