1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.net.ip; 18 19 import com.android.internal.annotations.GuardedBy; 20 21 import android.content.Context; 22 import android.net.LinkAddress; 23 import android.net.LinkProperties; 24 import android.net.LinkProperties.ProvisioningChange; 25 import android.net.ProxyInfo; 26 import android.net.RouteInfo; 27 import android.net.metrics.IpConnectivityLog; 28 import android.net.metrics.IpReachabilityEvent; 29 import android.net.netlink.NetlinkConstants; 30 import android.net.netlink.NetlinkErrorMessage; 31 import android.net.netlink.NetlinkMessage; 32 import android.net.netlink.NetlinkSocket; 33 import android.net.netlink.RtNetlinkNeighborMessage; 34 import android.net.netlink.StructNdaCacheInfo; 35 import android.net.netlink.StructNdMsg; 36 import android.net.netlink.StructNlMsgHdr; 37 import android.os.PowerManager; 38 import android.os.SystemClock; 39 import android.system.ErrnoException; 40 import android.system.NetlinkSocketAddress; 41 import android.system.OsConstants; 42 import android.util.Log; 43 44 import java.io.InterruptedIOException; 45 import java.net.InetAddress; 46 import java.net.InetSocketAddress; 47 import java.net.NetworkInterface; 48 import java.net.SocketAddress; 49 import java.net.SocketException; 50 import java.nio.ByteBuffer; 51 import java.util.Arrays; 52 import java.util.HashMap; 53 import java.util.HashSet; 54 import java.util.List; 55 import java.util.Map; 56 import java.util.Set; 57 58 59 /** 60 * IpReachabilityMonitor. 61 * 62 * Monitors on-link IP reachability and notifies callers whenever any on-link 63 * addresses of interest appear to have become unresponsive. 64 * 65 * This code does not concern itself with "why" a neighbour might have become 66 * unreachable. Instead, it primarily reacts to the kernel's notion of IP 67 * reachability for each of the neighbours we know to be critically important 68 * to normal network connectivity. As such, it is often "just the messenger": 69 * the neighbours about which it warns are already deemed by the kernel to have 70 * become unreachable. 71 * 72 * 73 * How it works: 74 * 75 * 1. The "on-link neighbours of interest" found in a given LinkProperties 76 * instance are added to a "watch list" via #updateLinkProperties(). 77 * This usually means all default gateways and any on-link DNS servers. 78 * 79 * 2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH, 80 * RTM_DELNEIGH), watching only for neighbours in the watch list. 81 * 82 * - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and 83 * even NUD_PROBE is perfectly normal; we merely record the new state. 84 * 85 * - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due 86 * to garbage collection. This is not necessarily of immediate 87 * concern; we record the neighbour as moving to NUD_NONE. 88 * 89 * - A neighbour transitioning to NUD_FAILED (for any reason) is 90 * critically important and is handled as described below in #4. 91 * 92 * 3. All on-link neighbours in the watch list can be forcibly "probed" by 93 * calling #probeAll(). This should be called whenever it is important to 94 * verify that critical neighbours on the link are still reachable, e.g. 95 * when roaming between BSSIDs. 96 * 97 * - The kernel will send unicast ARP requests for IPv4 neighbours and 98 * unicast NS packets for IPv6 neighbours. The expected replies will 99 * likely be unicast. 100 * 101 * - The forced probing is done holding a wakelock. The kernel may, 102 * however, initiate probing of a neighbor on its own, i.e. whenever 103 * a neighbour has expired from NUD_DELAY. 104 * 105 * - The kernel sends: 106 * 107 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit 108 * 109 * number of probes (usually 3) every: 110 * 111 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms 112 * 113 * number of milliseconds (usually 1000ms). This normally results in 114 * 3 unicast packets, 1 per second. 115 * 116 * - If no response is received to any of the probe packets, the kernel 117 * marks the neighbour as being in state NUD_FAILED, and the listening 118 * process in #2 will learn of it. 119 * 120 * 4. We call the supplied Callback#notifyLost() function if the loss of a 121 * neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to 122 * become incomplete (a loss of provisioning). 123 * 124 * - For example, losing all our IPv4 on-link DNS servers (or losing 125 * our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6) 126 * provisioning; Callback#notifyLost() would be called. 127 * 128 * - Since it can be non-trivial to reacquire certain IP provisioning 129 * state it may be best for the link to disconnect completely and 130 * reconnect afresh. 131 * 132 * @hide 133 */ 134 public class IpReachabilityMonitor { 135 private static final String TAG = "IpReachabilityMonitor"; 136 private static final boolean DBG = false; 137 private static final boolean VDBG = false; 138 139 public interface Callback { 140 // This callback function must execute as quickly as possible as it is 141 // run on the same thread that listens to kernel neighbor updates. 142 // 143 // TODO: refactor to something like notifyProvisioningLost(String msg). 144 public void notifyLost(InetAddress ip, String logMsg); 145 } 146 147 private final Object mLock = new Object(); 148 private final PowerManager.WakeLock mWakeLock; 149 private final String mInterfaceName; 150 private final int mInterfaceIndex; 151 private final Callback mCallback; 152 private final NetlinkSocketObserver mNetlinkSocketObserver; 153 private final Thread mObserverThread; 154 private final IpConnectivityLog mMetricsLog = new IpConnectivityLog(); 155 @GuardedBy("mLock") 156 private LinkProperties mLinkProperties = new LinkProperties(); 157 // TODO: consider a map to a private NeighborState class holding more 158 // information than a single NUD state entry. 159 @GuardedBy("mLock") 160 private Map<InetAddress, Short> mIpWatchList = new HashMap<>(); 161 @GuardedBy("mLock") 162 private int mIpWatchListVersion; 163 @GuardedBy("mLock") 164 private boolean mRunning; 165 // Time in milliseconds of the last forced probe request. 166 private volatile long mLastProbeTimeMs; 167 168 /** 169 * Make the kernel perform neighbor reachability detection (IPv4 ARP or IPv6 ND) 170 * for the given IP address on the specified interface index. 171 * 172 * @return 0 if the request was successfully passed to the kernel; otherwise return 173 * a non-zero error code. 174 */ 175 private static int probeNeighbor(int ifIndex, InetAddress ip) { 176 final String msgSnippet = "probing ip=" + ip.getHostAddress() + "%" + ifIndex; 177 if (DBG) { Log.d(TAG, msgSnippet); } 178 179 final byte[] msg = RtNetlinkNeighborMessage.newNewNeighborMessage( 180 1, ip, StructNdMsg.NUD_PROBE, ifIndex, null); 181 182 int errno = -OsConstants.EPROTO; 183 try (NetlinkSocket nlSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE)) { 184 final long IO_TIMEOUT = 300L; 185 nlSocket.connectToKernel(); 186 nlSocket.sendMessage(msg, 0, msg.length, IO_TIMEOUT); 187 final ByteBuffer bytes = nlSocket.recvMessage(IO_TIMEOUT); 188 // recvMessage() guaranteed to not return null if it did not throw. 189 final NetlinkMessage response = NetlinkMessage.parse(bytes); 190 if (response != null && response instanceof NetlinkErrorMessage && 191 (((NetlinkErrorMessage) response).getNlMsgError() != null)) { 192 errno = ((NetlinkErrorMessage) response).getNlMsgError().error; 193 if (errno != 0) { 194 // TODO: consider ignoring EINVAL (-22), which appears to be 195 // normal when probing a neighbor for which the kernel does 196 // not already have / no longer has a link layer address. 197 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + response.toString()); 198 } 199 } else { 200 String errmsg; 201 if (response == null) { 202 bytes.position(0); 203 errmsg = "raw bytes: " + NetlinkConstants.hexify(bytes); 204 } else { 205 errmsg = response.toString(); 206 } 207 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + errmsg); 208 } 209 } catch (ErrnoException e) { 210 Log.e(TAG, "Error " + msgSnippet, e); 211 errno = -e.errno; 212 } catch (InterruptedIOException e) { 213 Log.e(TAG, "Error " + msgSnippet, e); 214 errno = -OsConstants.ETIMEDOUT; 215 } catch (SocketException e) { 216 Log.e(TAG, "Error " + msgSnippet, e); 217 errno = -OsConstants.EIO; 218 } 219 return errno; 220 } 221 222 public IpReachabilityMonitor(Context context, String ifName, Callback callback) 223 throws IllegalArgumentException { 224 mInterfaceName = ifName; 225 int ifIndex = -1; 226 try { 227 NetworkInterface netIf = NetworkInterface.getByName(ifName); 228 mInterfaceIndex = netIf.getIndex(); 229 } catch (SocketException | NullPointerException e) { 230 throw new IllegalArgumentException("invalid interface '" + ifName + "': ", e); 231 } 232 mWakeLock = ((PowerManager) context.getSystemService(Context.POWER_SERVICE)).newWakeLock( 233 PowerManager.PARTIAL_WAKE_LOCK, TAG + "." + mInterfaceName); 234 mCallback = callback; 235 mNetlinkSocketObserver = new NetlinkSocketObserver(); 236 mObserverThread = new Thread(mNetlinkSocketObserver); 237 mObserverThread.start(); 238 } 239 240 public void stop() { 241 synchronized (mLock) { mRunning = false; } 242 clearLinkProperties(); 243 mNetlinkSocketObserver.clearNetlinkSocket(); 244 } 245 246 // TODO: add a public dump() method that can be called during a bug report. 247 248 private String describeWatchList() { 249 final String delimiter = ", "; 250 StringBuilder sb = new StringBuilder(); 251 synchronized (mLock) { 252 sb.append("iface{" + mInterfaceName + "/" + mInterfaceIndex + "}, "); 253 sb.append("v{" + mIpWatchListVersion + "}, "); 254 sb.append("ntable=["); 255 boolean firstTime = true; 256 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) { 257 if (firstTime) { 258 firstTime = false; 259 } else { 260 sb.append(delimiter); 261 } 262 sb.append(entry.getKey().getHostAddress() + "/" + 263 StructNdMsg.stringForNudState(entry.getValue())); 264 } 265 sb.append("]"); 266 } 267 return sb.toString(); 268 } 269 270 private boolean isWatching(InetAddress ip) { 271 synchronized (mLock) { 272 return mRunning && mIpWatchList.containsKey(ip); 273 } 274 } 275 276 private boolean stillRunning() { 277 synchronized (mLock) { 278 return mRunning; 279 } 280 } 281 282 private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) { 283 for (RouteInfo route : routes) { 284 if (!route.hasGateway() && route.matches(ip)) { 285 return true; 286 } 287 } 288 return false; 289 } 290 291 private short getNeighborStateLocked(InetAddress ip) { 292 if (mIpWatchList.containsKey(ip)) { 293 return mIpWatchList.get(ip); 294 } 295 return StructNdMsg.NUD_NONE; 296 } 297 298 public void updateLinkProperties(LinkProperties lp) { 299 if (!mInterfaceName.equals(lp.getInterfaceName())) { 300 // TODO: figure out whether / how to cope with interface changes. 301 Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() + 302 "' does not match: " + mInterfaceName); 303 return; 304 } 305 306 synchronized (mLock) { 307 mLinkProperties = new LinkProperties(lp); 308 Map<InetAddress, Short> newIpWatchList = new HashMap<>(); 309 310 final List<RouteInfo> routes = mLinkProperties.getRoutes(); 311 for (RouteInfo route : routes) { 312 if (route.hasGateway()) { 313 InetAddress gw = route.getGateway(); 314 if (isOnLink(routes, gw)) { 315 newIpWatchList.put(gw, getNeighborStateLocked(gw)); 316 } 317 } 318 } 319 320 for (InetAddress nameserver : lp.getDnsServers()) { 321 if (isOnLink(routes, nameserver)) { 322 newIpWatchList.put(nameserver, getNeighborStateLocked(nameserver)); 323 } 324 } 325 326 mIpWatchList = newIpWatchList; 327 mIpWatchListVersion++; 328 } 329 if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); } 330 } 331 332 public void clearLinkProperties() { 333 synchronized (mLock) { 334 mLinkProperties.clear(); 335 mIpWatchList.clear(); 336 mIpWatchListVersion++; 337 } 338 if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); } 339 } 340 341 private void handleNeighborLost(String msg) { 342 InetAddress ip = null; 343 final ProvisioningChange delta; 344 synchronized (mLock) { 345 LinkProperties whatIfLp = new LinkProperties(mLinkProperties); 346 347 for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) { 348 if (entry.getValue() != StructNdMsg.NUD_FAILED) { 349 continue; 350 } 351 352 ip = entry.getKey(); 353 for (RouteInfo route : mLinkProperties.getRoutes()) { 354 if (ip.equals(route.getGateway())) { 355 whatIfLp.removeRoute(route); 356 } 357 } 358 whatIfLp.removeDnsServer(ip); 359 } 360 361 delta = LinkProperties.compareProvisioning(mLinkProperties, whatIfLp); 362 } 363 364 if (delta == ProvisioningChange.LOST_PROVISIONING) { 365 final String logMsg = "FAILURE: LOST_PROVISIONING, " + msg; 366 Log.w(TAG, logMsg); 367 if (mCallback != null) { 368 // TODO: remove |ip| when the callback signature no longer has 369 // an InetAddress argument. 370 mCallback.notifyLost(ip, logMsg); 371 } 372 } 373 logNudFailed(delta); 374 } 375 376 public void probeAll() { 377 Set<InetAddress> ipProbeList = new HashSet<InetAddress>(); 378 synchronized (mLock) { 379 ipProbeList.addAll(mIpWatchList.keySet()); 380 } 381 382 if (!ipProbeList.isEmpty() && stillRunning()) { 383 // Keep the CPU awake long enough to allow all ARP/ND 384 // probes a reasonable chance at success. See b/23197666. 385 // 386 // The wakelock we use is (by default) refcounted, and this version 387 // of acquire(timeout) queues a release message to keep acquisitions 388 // and releases balanced. 389 mWakeLock.acquire(getProbeWakeLockDuration()); 390 } 391 392 for (InetAddress target : ipProbeList) { 393 if (!stillRunning()) { 394 break; 395 } 396 final int returnValue = probeNeighbor(mInterfaceIndex, target); 397 logEvent(IpReachabilityEvent.PROBE, returnValue); 398 } 399 mLastProbeTimeMs = SystemClock.elapsedRealtime(); 400 } 401 402 private static long getProbeWakeLockDuration() { 403 // Ideally, this would be computed by examining the values of: 404 // 405 // /proc/sys/net/ipv[46]/neigh/<ifname>/ucast_solicit 406 // 407 // and: 408 // 409 // /proc/sys/net/ipv[46]/neigh/<ifname>/retrans_time_ms 410 // 411 // For now, just make some assumptions. 412 final long numUnicastProbes = 3; 413 final long retransTimeMs = 1000; 414 final long gracePeriodMs = 500; 415 return (numUnicastProbes * retransTimeMs) + gracePeriodMs; 416 } 417 418 private void logEvent(int probeType, int errorCode) { 419 int eventType = probeType | (errorCode & 0xff); 420 mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType)); 421 } 422 423 private void logNudFailed(ProvisioningChange delta) { 424 long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs; 425 boolean isFromProbe = (duration < getProbeWakeLockDuration()); 426 boolean isProvisioningLost = (delta == ProvisioningChange.LOST_PROVISIONING); 427 int eventType = IpReachabilityEvent.nudFailureEventType(isFromProbe, isProvisioningLost); 428 mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType)); 429 } 430 431 // TODO: simplify the number of objects by making this extend Thread. 432 private final class NetlinkSocketObserver implements Runnable { 433 private NetlinkSocket mSocket; 434 435 @Override 436 public void run() { 437 if (VDBG) { Log.d(TAG, "Starting observing thread."); } 438 synchronized (mLock) { mRunning = true; } 439 440 try { 441 setupNetlinkSocket(); 442 } catch (ErrnoException | SocketException e) { 443 Log.e(TAG, "Failed to suitably initialize a netlink socket", e); 444 synchronized (mLock) { mRunning = false; } 445 } 446 447 ByteBuffer byteBuffer; 448 while (stillRunning()) { 449 try { 450 byteBuffer = recvKernelReply(); 451 } catch (ErrnoException e) { 452 if (stillRunning()) { Log.w(TAG, "ErrnoException: ", e); } 453 break; 454 } 455 final long whenMs = SystemClock.elapsedRealtime(); 456 if (byteBuffer == null) { 457 continue; 458 } 459 parseNetlinkMessageBuffer(byteBuffer, whenMs); 460 } 461 462 clearNetlinkSocket(); 463 464 synchronized (mLock) { mRunning = false; } 465 if (VDBG) { Log.d(TAG, "Finishing observing thread."); } 466 } 467 468 private void clearNetlinkSocket() { 469 if (mSocket != null) { 470 mSocket.close(); 471 } 472 } 473 474 // TODO: Refactor the main loop to recreate the socket upon recoverable errors. 475 private void setupNetlinkSocket() throws ErrnoException, SocketException { 476 clearNetlinkSocket(); 477 mSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE); 478 479 final NetlinkSocketAddress listenAddr = new NetlinkSocketAddress( 480 0, OsConstants.RTMGRP_NEIGH); 481 mSocket.bind(listenAddr); 482 483 if (VDBG) { 484 final NetlinkSocketAddress nlAddr = mSocket.getLocalAddress(); 485 Log.d(TAG, "bound to sockaddr_nl{" 486 + ((long) (nlAddr.getPortId() & 0xffffffff)) + ", " 487 + nlAddr.getGroupsMask() 488 + "}"); 489 } 490 } 491 492 private ByteBuffer recvKernelReply() throws ErrnoException { 493 try { 494 return mSocket.recvMessage(0); 495 } catch (InterruptedIOException e) { 496 // Interruption or other error, e.g. another thread closed our file descriptor. 497 } catch (ErrnoException e) { 498 if (e.errno != OsConstants.EAGAIN) { 499 throw e; 500 } 501 } 502 return null; 503 } 504 505 private void parseNetlinkMessageBuffer(ByteBuffer byteBuffer, long whenMs) { 506 while (byteBuffer.remaining() > 0) { 507 final int position = byteBuffer.position(); 508 final NetlinkMessage nlMsg = NetlinkMessage.parse(byteBuffer); 509 if (nlMsg == null || nlMsg.getHeader() == null) { 510 byteBuffer.position(position); 511 Log.e(TAG, "unparsable netlink msg: " + NetlinkConstants.hexify(byteBuffer)); 512 break; 513 } 514 515 final int srcPortId = nlMsg.getHeader().nlmsg_pid; 516 if (srcPortId != 0) { 517 Log.e(TAG, "non-kernel source portId: " + ((long) (srcPortId & 0xffffffff))); 518 break; 519 } 520 521 if (nlMsg instanceof NetlinkErrorMessage) { 522 Log.e(TAG, "netlink error: " + nlMsg); 523 continue; 524 } else if (!(nlMsg instanceof RtNetlinkNeighborMessage)) { 525 if (DBG) { 526 Log.d(TAG, "non-rtnetlink neighbor msg: " + nlMsg); 527 } 528 continue; 529 } 530 531 evaluateRtNetlinkNeighborMessage((RtNetlinkNeighborMessage) nlMsg, whenMs); 532 } 533 } 534 535 private void evaluateRtNetlinkNeighborMessage( 536 RtNetlinkNeighborMessage neighMsg, long whenMs) { 537 final StructNdMsg ndMsg = neighMsg.getNdHeader(); 538 if (ndMsg == null || ndMsg.ndm_ifindex != mInterfaceIndex) { 539 return; 540 } 541 542 final InetAddress destination = neighMsg.getDestination(); 543 if (!isWatching(destination)) { 544 return; 545 } 546 547 final short msgType = neighMsg.getHeader().nlmsg_type; 548 final short nudState = ndMsg.ndm_state; 549 final String eventMsg = "NeighborEvent{" 550 + "elapsedMs=" + whenMs + ", " 551 + destination.getHostAddress() + ", " 552 + "[" + NetlinkConstants.hexify(neighMsg.getLinkLayerAddress()) + "], " 553 + NetlinkConstants.stringForNlMsgType(msgType) + ", " 554 + StructNdMsg.stringForNudState(nudState) 555 + "}"; 556 557 if (VDBG) { 558 Log.d(TAG, neighMsg.toString()); 559 } else if (DBG) { 560 Log.d(TAG, eventMsg); 561 } 562 563 synchronized (mLock) { 564 if (mIpWatchList.containsKey(destination)) { 565 final short value = 566 (msgType == NetlinkConstants.RTM_DELNEIGH) 567 ? StructNdMsg.NUD_NONE 568 : nudState; 569 mIpWatchList.put(destination, value); 570 } 571 } 572 573 if (nudState == StructNdMsg.NUD_FAILED) { 574 Log.w(TAG, "ALERT: " + eventMsg); 575 handleNeighborLost(eventMsg); 576 } 577 } 578 } 579 } 580