Home | History | Annotate | Download | only in ip
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.net.ip;
     18 
     19 import com.android.internal.annotations.GuardedBy;
     20 
     21 import android.content.Context;
     22 import android.net.LinkAddress;
     23 import android.net.LinkProperties;
     24 import android.net.LinkProperties.ProvisioningChange;
     25 import android.net.ProxyInfo;
     26 import android.net.RouteInfo;
     27 import android.net.metrics.IpConnectivityLog;
     28 import android.net.metrics.IpReachabilityEvent;
     29 import android.net.netlink.NetlinkConstants;
     30 import android.net.netlink.NetlinkErrorMessage;
     31 import android.net.netlink.NetlinkMessage;
     32 import android.net.netlink.NetlinkSocket;
     33 import android.net.netlink.RtNetlinkNeighborMessage;
     34 import android.net.netlink.StructNdaCacheInfo;
     35 import android.net.netlink.StructNdMsg;
     36 import android.net.netlink.StructNlMsgHdr;
     37 import android.os.PowerManager;
     38 import android.os.SystemClock;
     39 import android.system.ErrnoException;
     40 import android.system.NetlinkSocketAddress;
     41 import android.system.OsConstants;
     42 import android.util.Log;
     43 
     44 import java.io.InterruptedIOException;
     45 import java.net.InetAddress;
     46 import java.net.InetSocketAddress;
     47 import java.net.NetworkInterface;
     48 import java.net.SocketAddress;
     49 import java.net.SocketException;
     50 import java.nio.ByteBuffer;
     51 import java.util.Arrays;
     52 import java.util.HashMap;
     53 import java.util.HashSet;
     54 import java.util.List;
     55 import java.util.Map;
     56 import java.util.Set;
     57 
     58 
     59 /**
     60  * IpReachabilityMonitor.
     61  *
     62  * Monitors on-link IP reachability and notifies callers whenever any on-link
     63  * addresses of interest appear to have become unresponsive.
     64  *
     65  * This code does not concern itself with "why" a neighbour might have become
     66  * unreachable. Instead, it primarily reacts to the kernel's notion of IP
     67  * reachability for each of the neighbours we know to be critically important
     68  * to normal network connectivity. As such, it is often "just the messenger":
     69  * the neighbours about which it warns are already deemed by the kernel to have
     70  * become unreachable.
     71  *
     72  *
     73  * How it works:
     74  *
     75  *   1. The "on-link neighbours of interest" found in a given LinkProperties
     76  *      instance are added to a "watch list" via #updateLinkProperties().
     77  *      This usually means all default gateways and any on-link DNS servers.
     78  *
     79  *   2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH,
     80  *      RTM_DELNEIGH), watching only for neighbours in the watch list.
     81  *
     82  *        - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and
     83  *          even NUD_PROBE is perfectly normal; we merely record the new state.
     84  *
     85  *        - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due
     86  *          to garbage collection.  This is not necessarily of immediate
     87  *          concern; we record the neighbour as moving to NUD_NONE.
     88  *
     89  *        - A neighbour transitioning to NUD_FAILED (for any reason) is
     90  *          critically important and is handled as described below in #4.
     91  *
     92  *   3. All on-link neighbours in the watch list can be forcibly "probed" by
     93  *      calling #probeAll(). This should be called whenever it is important to
     94  *      verify that critical neighbours on the link are still reachable, e.g.
     95  *      when roaming between BSSIDs.
     96  *
     97  *        - The kernel will send unicast ARP requests for IPv4 neighbours and
     98  *          unicast NS packets for IPv6 neighbours.  The expected replies will
     99  *          likely be unicast.
    100  *
    101  *        - The forced probing is done holding a wakelock. The kernel may,
    102  *          however, initiate probing of a neighbor on its own, i.e. whenever
    103  *          a neighbour has expired from NUD_DELAY.
    104  *
    105  *        - The kernel sends:
    106  *
    107  *              /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit
    108  *
    109  *          number of probes (usually 3) every:
    110  *
    111  *              /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms
    112  *
    113  *          number of milliseconds (usually 1000ms). This normally results in
    114  *          3 unicast packets, 1 per second.
    115  *
    116  *        - If no response is received to any of the probe packets, the kernel
    117  *          marks the neighbour as being in state NUD_FAILED, and the listening
    118  *          process in #2 will learn of it.
    119  *
    120  *   4. We call the supplied Callback#notifyLost() function if the loss of a
    121  *      neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to
    122  *      become incomplete (a loss of provisioning).
    123  *
    124  *        - For example, losing all our IPv4 on-link DNS servers (or losing
    125  *          our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6)
    126  *          provisioning; Callback#notifyLost() would be called.
    127  *
    128  *        - Since it can be non-trivial to reacquire certain IP provisioning
    129  *          state it may be best for the link to disconnect completely and
    130  *          reconnect afresh.
    131  *
    132  * @hide
    133  */
    134 public class IpReachabilityMonitor {
    135     private static final String TAG = "IpReachabilityMonitor";
    136     private static final boolean DBG = false;
    137     private static final boolean VDBG = false;
    138 
    139     public interface Callback {
    140         // This callback function must execute as quickly as possible as it is
    141         // run on the same thread that listens to kernel neighbor updates.
    142         //
    143         // TODO: refactor to something like notifyProvisioningLost(String msg).
    144         public void notifyLost(InetAddress ip, String logMsg);
    145     }
    146 
    147     private final Object mLock = new Object();
    148     private final PowerManager.WakeLock mWakeLock;
    149     private final String mInterfaceName;
    150     private final int mInterfaceIndex;
    151     private final Callback mCallback;
    152     private final NetlinkSocketObserver mNetlinkSocketObserver;
    153     private final Thread mObserverThread;
    154     private final IpConnectivityLog mMetricsLog = new IpConnectivityLog();
    155     @GuardedBy("mLock")
    156     private LinkProperties mLinkProperties = new LinkProperties();
    157     // TODO: consider a map to a private NeighborState class holding more
    158     // information than a single NUD state entry.
    159     @GuardedBy("mLock")
    160     private Map<InetAddress, Short> mIpWatchList = new HashMap<>();
    161     @GuardedBy("mLock")
    162     private int mIpWatchListVersion;
    163     @GuardedBy("mLock")
    164     private boolean mRunning;
    165     // Time in milliseconds of the last forced probe request.
    166     private volatile long mLastProbeTimeMs;
    167 
    168     /**
    169      * Make the kernel perform neighbor reachability detection (IPv4 ARP or IPv6 ND)
    170      * for the given IP address on the specified interface index.
    171      *
    172      * @return 0 if the request was successfully passed to the kernel; otherwise return
    173      *         a non-zero error code.
    174      */
    175     private static int probeNeighbor(int ifIndex, InetAddress ip) {
    176         final String msgSnippet = "probing ip=" + ip.getHostAddress() + "%" + ifIndex;
    177         if (DBG) { Log.d(TAG, msgSnippet); }
    178 
    179         final byte[] msg = RtNetlinkNeighborMessage.newNewNeighborMessage(
    180                 1, ip, StructNdMsg.NUD_PROBE, ifIndex, null);
    181 
    182         int errno = -OsConstants.EPROTO;
    183         try (NetlinkSocket nlSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE)) {
    184             final long IO_TIMEOUT = 300L;
    185             nlSocket.connectToKernel();
    186             nlSocket.sendMessage(msg, 0, msg.length, IO_TIMEOUT);
    187             final ByteBuffer bytes = nlSocket.recvMessage(IO_TIMEOUT);
    188             // recvMessage() guaranteed to not return null if it did not throw.
    189             final NetlinkMessage response = NetlinkMessage.parse(bytes);
    190             if (response != null && response instanceof NetlinkErrorMessage &&
    191                     (((NetlinkErrorMessage) response).getNlMsgError() != null)) {
    192                 errno = ((NetlinkErrorMessage) response).getNlMsgError().error;
    193                 if (errno != 0) {
    194                     // TODO: consider ignoring EINVAL (-22), which appears to be
    195                     // normal when probing a neighbor for which the kernel does
    196                     // not already have / no longer has a link layer address.
    197                     Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + response.toString());
    198                 }
    199             } else {
    200                 String errmsg;
    201                 if (response == null) {
    202                     bytes.position(0);
    203                     errmsg = "raw bytes: " + NetlinkConstants.hexify(bytes);
    204                 } else {
    205                     errmsg = response.toString();
    206                 }
    207                 Log.e(TAG, "Error " + msgSnippet + ", errmsg=" + errmsg);
    208             }
    209         } catch (ErrnoException e) {
    210             Log.e(TAG, "Error " + msgSnippet, e);
    211             errno = -e.errno;
    212         } catch (InterruptedIOException e) {
    213             Log.e(TAG, "Error " + msgSnippet, e);
    214             errno = -OsConstants.ETIMEDOUT;
    215         } catch (SocketException e) {
    216             Log.e(TAG, "Error " + msgSnippet, e);
    217             errno = -OsConstants.EIO;
    218         }
    219         return errno;
    220     }
    221 
    222     public IpReachabilityMonitor(Context context, String ifName, Callback callback)
    223                 throws IllegalArgumentException {
    224         mInterfaceName = ifName;
    225         int ifIndex = -1;
    226         try {
    227             NetworkInterface netIf = NetworkInterface.getByName(ifName);
    228             mInterfaceIndex = netIf.getIndex();
    229         } catch (SocketException | NullPointerException e) {
    230             throw new IllegalArgumentException("invalid interface '" + ifName + "': ", e);
    231         }
    232         mWakeLock = ((PowerManager) context.getSystemService(Context.POWER_SERVICE)).newWakeLock(
    233                 PowerManager.PARTIAL_WAKE_LOCK, TAG + "." + mInterfaceName);
    234         mCallback = callback;
    235         mNetlinkSocketObserver = new NetlinkSocketObserver();
    236         mObserverThread = new Thread(mNetlinkSocketObserver);
    237         mObserverThread.start();
    238     }
    239 
    240     public void stop() {
    241         synchronized (mLock) { mRunning = false; }
    242         clearLinkProperties();
    243         mNetlinkSocketObserver.clearNetlinkSocket();
    244     }
    245 
    246     // TODO: add a public dump() method that can be called during a bug report.
    247 
    248     private String describeWatchList() {
    249         final String delimiter = ", ";
    250         StringBuilder sb = new StringBuilder();
    251         synchronized (mLock) {
    252             sb.append("iface{" + mInterfaceName + "/" + mInterfaceIndex + "}, ");
    253             sb.append("v{" + mIpWatchListVersion + "}, ");
    254             sb.append("ntable=[");
    255             boolean firstTime = true;
    256             for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) {
    257                 if (firstTime) {
    258                     firstTime = false;
    259                 } else {
    260                     sb.append(delimiter);
    261                 }
    262                 sb.append(entry.getKey().getHostAddress() + "/" +
    263                         StructNdMsg.stringForNudState(entry.getValue()));
    264             }
    265             sb.append("]");
    266         }
    267         return sb.toString();
    268     }
    269 
    270     private boolean isWatching(InetAddress ip) {
    271         synchronized (mLock) {
    272             return mRunning && mIpWatchList.containsKey(ip);
    273         }
    274     }
    275 
    276     private boolean stillRunning() {
    277         synchronized (mLock) {
    278             return mRunning;
    279         }
    280     }
    281 
    282     private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) {
    283         for (RouteInfo route : routes) {
    284             if (!route.hasGateway() && route.matches(ip)) {
    285                 return true;
    286             }
    287         }
    288         return false;
    289     }
    290 
    291     private short getNeighborStateLocked(InetAddress ip) {
    292         if (mIpWatchList.containsKey(ip)) {
    293             return mIpWatchList.get(ip);
    294         }
    295         return StructNdMsg.NUD_NONE;
    296     }
    297 
    298     public void updateLinkProperties(LinkProperties lp) {
    299         if (!mInterfaceName.equals(lp.getInterfaceName())) {
    300             // TODO: figure out whether / how to cope with interface changes.
    301             Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() +
    302                     "' does not match: " + mInterfaceName);
    303             return;
    304         }
    305 
    306         synchronized (mLock) {
    307             mLinkProperties = new LinkProperties(lp);
    308             Map<InetAddress, Short> newIpWatchList = new HashMap<>();
    309 
    310             final List<RouteInfo> routes = mLinkProperties.getRoutes();
    311             for (RouteInfo route : routes) {
    312                 if (route.hasGateway()) {
    313                     InetAddress gw = route.getGateway();
    314                     if (isOnLink(routes, gw)) {
    315                         newIpWatchList.put(gw, getNeighborStateLocked(gw));
    316                     }
    317                 }
    318             }
    319 
    320             for (InetAddress nameserver : lp.getDnsServers()) {
    321                 if (isOnLink(routes, nameserver)) {
    322                     newIpWatchList.put(nameserver, getNeighborStateLocked(nameserver));
    323                 }
    324             }
    325 
    326             mIpWatchList = newIpWatchList;
    327             mIpWatchListVersion++;
    328         }
    329         if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); }
    330     }
    331 
    332     public void clearLinkProperties() {
    333         synchronized (mLock) {
    334             mLinkProperties.clear();
    335             mIpWatchList.clear();
    336             mIpWatchListVersion++;
    337         }
    338         if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); }
    339     }
    340 
    341     private void handleNeighborLost(String msg) {
    342         InetAddress ip = null;
    343         final ProvisioningChange delta;
    344         synchronized (mLock) {
    345             LinkProperties whatIfLp = new LinkProperties(mLinkProperties);
    346 
    347             for (Map.Entry<InetAddress, Short> entry : mIpWatchList.entrySet()) {
    348                 if (entry.getValue() != StructNdMsg.NUD_FAILED) {
    349                     continue;
    350                 }
    351 
    352                 ip = entry.getKey();
    353                 for (RouteInfo route : mLinkProperties.getRoutes()) {
    354                     if (ip.equals(route.getGateway())) {
    355                         whatIfLp.removeRoute(route);
    356                     }
    357                 }
    358                 whatIfLp.removeDnsServer(ip);
    359             }
    360 
    361             delta = LinkProperties.compareProvisioning(mLinkProperties, whatIfLp);
    362         }
    363 
    364         if (delta == ProvisioningChange.LOST_PROVISIONING) {
    365             final String logMsg = "FAILURE: LOST_PROVISIONING, " + msg;
    366             Log.w(TAG, logMsg);
    367             if (mCallback != null) {
    368                 // TODO: remove |ip| when the callback signature no longer has
    369                 // an InetAddress argument.
    370                 mCallback.notifyLost(ip, logMsg);
    371             }
    372         }
    373         logNudFailed(delta);
    374     }
    375 
    376     public void probeAll() {
    377         Set<InetAddress> ipProbeList = new HashSet<InetAddress>();
    378         synchronized (mLock) {
    379             ipProbeList.addAll(mIpWatchList.keySet());
    380         }
    381 
    382         if (!ipProbeList.isEmpty() && stillRunning()) {
    383             // Keep the CPU awake long enough to allow all ARP/ND
    384             // probes a reasonable chance at success. See b/23197666.
    385             //
    386             // The wakelock we use is (by default) refcounted, and this version
    387             // of acquire(timeout) queues a release message to keep acquisitions
    388             // and releases balanced.
    389             mWakeLock.acquire(getProbeWakeLockDuration());
    390         }
    391 
    392         for (InetAddress target : ipProbeList) {
    393             if (!stillRunning()) {
    394                 break;
    395             }
    396             final int returnValue = probeNeighbor(mInterfaceIndex, target);
    397             logEvent(IpReachabilityEvent.PROBE, returnValue);
    398         }
    399         mLastProbeTimeMs = SystemClock.elapsedRealtime();
    400     }
    401 
    402     private static long getProbeWakeLockDuration() {
    403         // Ideally, this would be computed by examining the values of:
    404         //
    405         //     /proc/sys/net/ipv[46]/neigh/<ifname>/ucast_solicit
    406         //
    407         // and:
    408         //
    409         //     /proc/sys/net/ipv[46]/neigh/<ifname>/retrans_time_ms
    410         //
    411         // For now, just make some assumptions.
    412         final long numUnicastProbes = 3;
    413         final long retransTimeMs = 1000;
    414         final long gracePeriodMs = 500;
    415         return (numUnicastProbes * retransTimeMs) + gracePeriodMs;
    416     }
    417 
    418     private void logEvent(int probeType, int errorCode) {
    419         int eventType = probeType | (errorCode & 0xff);
    420         mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType));
    421     }
    422 
    423     private void logNudFailed(ProvisioningChange delta) {
    424         long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs;
    425         boolean isFromProbe = (duration < getProbeWakeLockDuration());
    426         boolean isProvisioningLost = (delta == ProvisioningChange.LOST_PROVISIONING);
    427         int eventType = IpReachabilityEvent.nudFailureEventType(isFromProbe, isProvisioningLost);
    428         mMetricsLog.log(new IpReachabilityEvent(mInterfaceName, eventType));
    429     }
    430 
    431     // TODO: simplify the number of objects by making this extend Thread.
    432     private final class NetlinkSocketObserver implements Runnable {
    433         private NetlinkSocket mSocket;
    434 
    435         @Override
    436         public void run() {
    437             if (VDBG) { Log.d(TAG, "Starting observing thread."); }
    438             synchronized (mLock) { mRunning = true; }
    439 
    440             try {
    441                 setupNetlinkSocket();
    442             } catch (ErrnoException | SocketException e) {
    443                 Log.e(TAG, "Failed to suitably initialize a netlink socket", e);
    444                 synchronized (mLock) { mRunning = false; }
    445             }
    446 
    447             ByteBuffer byteBuffer;
    448             while (stillRunning()) {
    449                 try {
    450                     byteBuffer = recvKernelReply();
    451                 } catch (ErrnoException e) {
    452                     if (stillRunning()) { Log.w(TAG, "ErrnoException: ", e); }
    453                     break;
    454                 }
    455                 final long whenMs = SystemClock.elapsedRealtime();
    456                 if (byteBuffer == null) {
    457                     continue;
    458                 }
    459                 parseNetlinkMessageBuffer(byteBuffer, whenMs);
    460             }
    461 
    462             clearNetlinkSocket();
    463 
    464             synchronized (mLock) { mRunning = false; }
    465             if (VDBG) { Log.d(TAG, "Finishing observing thread."); }
    466         }
    467 
    468         private void clearNetlinkSocket() {
    469             if (mSocket != null) {
    470                 mSocket.close();
    471             }
    472         }
    473 
    474             // TODO: Refactor the main loop to recreate the socket upon recoverable errors.
    475         private void setupNetlinkSocket() throws ErrnoException, SocketException {
    476             clearNetlinkSocket();
    477             mSocket = new NetlinkSocket(OsConstants.NETLINK_ROUTE);
    478 
    479             final NetlinkSocketAddress listenAddr = new NetlinkSocketAddress(
    480                     0, OsConstants.RTMGRP_NEIGH);
    481             mSocket.bind(listenAddr);
    482 
    483             if (VDBG) {
    484                 final NetlinkSocketAddress nlAddr = mSocket.getLocalAddress();
    485                 Log.d(TAG, "bound to sockaddr_nl{"
    486                         + ((long) (nlAddr.getPortId() & 0xffffffff)) + ", "
    487                         + nlAddr.getGroupsMask()
    488                         + "}");
    489             }
    490         }
    491 
    492         private ByteBuffer recvKernelReply() throws ErrnoException {
    493             try {
    494                 return mSocket.recvMessage(0);
    495             } catch (InterruptedIOException e) {
    496                 // Interruption or other error, e.g. another thread closed our file descriptor.
    497             } catch (ErrnoException e) {
    498                 if (e.errno != OsConstants.EAGAIN) {
    499                     throw e;
    500                 }
    501             }
    502             return null;
    503         }
    504 
    505         private void parseNetlinkMessageBuffer(ByteBuffer byteBuffer, long whenMs) {
    506             while (byteBuffer.remaining() > 0) {
    507                 final int position = byteBuffer.position();
    508                 final NetlinkMessage nlMsg = NetlinkMessage.parse(byteBuffer);
    509                 if (nlMsg == null || nlMsg.getHeader() == null) {
    510                     byteBuffer.position(position);
    511                     Log.e(TAG, "unparsable netlink msg: " + NetlinkConstants.hexify(byteBuffer));
    512                     break;
    513                 }
    514 
    515                 final int srcPortId = nlMsg.getHeader().nlmsg_pid;
    516                 if (srcPortId !=  0) {
    517                     Log.e(TAG, "non-kernel source portId: " + ((long) (srcPortId & 0xffffffff)));
    518                     break;
    519                 }
    520 
    521                 if (nlMsg instanceof NetlinkErrorMessage) {
    522                     Log.e(TAG, "netlink error: " + nlMsg);
    523                     continue;
    524                 } else if (!(nlMsg instanceof RtNetlinkNeighborMessage)) {
    525                     if (DBG) {
    526                         Log.d(TAG, "non-rtnetlink neighbor msg: " + nlMsg);
    527                     }
    528                     continue;
    529                 }
    530 
    531                 evaluateRtNetlinkNeighborMessage((RtNetlinkNeighborMessage) nlMsg, whenMs);
    532             }
    533         }
    534 
    535         private void evaluateRtNetlinkNeighborMessage(
    536                 RtNetlinkNeighborMessage neighMsg, long whenMs) {
    537             final StructNdMsg ndMsg = neighMsg.getNdHeader();
    538             if (ndMsg == null || ndMsg.ndm_ifindex != mInterfaceIndex) {
    539                 return;
    540             }
    541 
    542             final InetAddress destination = neighMsg.getDestination();
    543             if (!isWatching(destination)) {
    544                 return;
    545             }
    546 
    547             final short msgType = neighMsg.getHeader().nlmsg_type;
    548             final short nudState = ndMsg.ndm_state;
    549             final String eventMsg = "NeighborEvent{"
    550                     + "elapsedMs=" + whenMs + ", "
    551                     + destination.getHostAddress() + ", "
    552                     + "[" + NetlinkConstants.hexify(neighMsg.getLinkLayerAddress()) + "], "
    553                     + NetlinkConstants.stringForNlMsgType(msgType) + ", "
    554                     + StructNdMsg.stringForNudState(nudState)
    555                     + "}";
    556 
    557             if (VDBG) {
    558                 Log.d(TAG, neighMsg.toString());
    559             } else if (DBG) {
    560                 Log.d(TAG, eventMsg);
    561             }
    562 
    563             synchronized (mLock) {
    564                 if (mIpWatchList.containsKey(destination)) {
    565                     final short value =
    566                             (msgType == NetlinkConstants.RTM_DELNEIGH)
    567                             ? StructNdMsg.NUD_NONE
    568                             : nudState;
    569                     mIpWatchList.put(destination, value);
    570                 }
    571             }
    572 
    573             if (nudState == StructNdMsg.NUD_FAILED) {
    574                 Log.w(TAG, "ALERT: " + eventMsg);
    575                 handleNeighborLost(eventMsg);
    576             }
    577         }
    578     }
    579 }
    580