Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "RouteController.h"
     18 
     19 #include "Fwmark.h"
     20 #include "UidRanges.h"
     21 
     22 #define LOG_TAG "Netd"
     23 #include "log/log.h"
     24 #include "logwrap/logwrap.h"
     25 #include "resolv_netid.h"
     26 
     27 #include <arpa/inet.h>
     28 #include <fcntl.h>
     29 #include <linux/fib_rules.h>
     30 #include <map>
     31 #include <net/if.h>
     32 #include <sys/stat.h>
     33 
     34 namespace {
     35 
     36 // BEGIN CONSTANTS --------------------------------------------------------------------------------
     37 
     38 const uint32_t RULE_PRIORITY_VPN_OVERRIDE_SYSTEM = 10000;
     39 const uint32_t RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL = 11000;
     40 const uint32_t RULE_PRIORITY_SECURE_VPN          = 12000;
     41 const uint32_t RULE_PRIORITY_EXPLICIT_NETWORK    = 13000;
     42 const uint32_t RULE_PRIORITY_OUTPUT_INTERFACE    = 14000;
     43 const uint32_t RULE_PRIORITY_LEGACY_SYSTEM       = 15000;
     44 const uint32_t RULE_PRIORITY_LEGACY_NETWORK      = 16000;
     45 const uint32_t RULE_PRIORITY_LOCAL_NETWORK       = 17000;
     46 const uint32_t RULE_PRIORITY_TETHERING           = 18000;
     47 const uint32_t RULE_PRIORITY_IMPLICIT_NETWORK    = 19000;
     48 const uint32_t RULE_PRIORITY_BYPASSABLE_VPN      = 20000;
     49 const uint32_t RULE_PRIORITY_VPN_FALLTHROUGH     = 21000;
     50 const uint32_t RULE_PRIORITY_DEFAULT_NETWORK     = 22000;
     51 const uint32_t RULE_PRIORITY_DIRECTLY_CONNECTED  = 23000;
     52 const uint32_t RULE_PRIORITY_UNREACHABLE         = 32000;
     53 
     54 const uint32_t ROUTE_TABLE_LOCAL_NETWORK  = 97;
     55 const uint32_t ROUTE_TABLE_LEGACY_NETWORK = 98;
     56 const uint32_t ROUTE_TABLE_LEGACY_SYSTEM  = 99;
     57 
     58 const char* const ROUTE_TABLE_NAME_LOCAL_NETWORK  = "local_network";
     59 const char* const ROUTE_TABLE_NAME_LEGACY_NETWORK = "legacy_network";
     60 const char* const ROUTE_TABLE_NAME_LEGACY_SYSTEM  = "legacy_system";
     61 
     62 const char* const ROUTE_TABLE_NAME_LOCAL = "local";
     63 const char* const ROUTE_TABLE_NAME_MAIN  = "main";
     64 
     65 // TODO: These values aren't defined by the Linux kernel, because our UID routing changes are not
     66 // upstream (yet?), so we can't just pick them up from kernel headers. When (if?) the changes make
     67 // it upstream, we'll remove this and rely on the kernel header values. For now, add a static assert
     68 // that will warn us if upstream has given these values some other meaning.
     69 const uint16_t FRA_UID_START = 18;
     70 const uint16_t FRA_UID_END   = 19;
     71 static_assert(FRA_UID_START > FRA_MAX,
     72              "Android-specific FRA_UID_{START,END} values also assigned in Linux uapi. "
     73              "Check that these values match what the kernel does and then update this assertion.");
     74 
     75 const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
     76 const uint16_t NETLINK_CREATE_REQUEST_FLAGS = NETLINK_REQUEST_FLAGS | NLM_F_CREATE | NLM_F_EXCL;
     77 
     78 const sockaddr_nl NETLINK_ADDRESS = {AF_NETLINK, 0, 0, 0};
     79 
     80 const uint8_t AF_FAMILIES[] = {AF_INET, AF_INET6};
     81 
     82 const char* const IP_VERSIONS[] = {"-4", "-6"};
     83 
     84 const uid_t UID_ROOT = 0;
     85 const char* const IIF_NONE = NULL;
     86 const char* const OIF_NONE = NULL;
     87 const bool ACTION_ADD = true;
     88 const bool ACTION_DEL = false;
     89 const bool MODIFY_NON_UID_BASED_RULES = true;
     90 
     91 const char* const RT_TABLES_PATH = "/data/misc/net/rt_tables";
     92 const int RT_TABLES_FLAGS = O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC;
     93 const mode_t RT_TABLES_MODE = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;  // mode 0644, rw-r--r--
     94 
     95 const unsigned ROUTE_FLUSH_ATTEMPTS = 2;
     96 
     97 // Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
     98 // warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
     99 constexpr uint16_t U16_RTA_LENGTH(uint16_t x) {
    100     return RTA_LENGTH(x);
    101 }
    102 
    103 // These are practically const, but can't be declared so, because they are used to initialize
    104 // non-const pointers ("void* iov_base") in iovec arrays.
    105 rtattr FRATTR_PRIORITY  = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_PRIORITY };
    106 rtattr FRATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_TABLE };
    107 rtattr FRATTR_FWMARK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMARK };
    108 rtattr FRATTR_FWMASK    = { U16_RTA_LENGTH(sizeof(uint32_t)), FRA_FWMASK };
    109 rtattr FRATTR_UID_START = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_START };
    110 rtattr FRATTR_UID_END   = { U16_RTA_LENGTH(sizeof(uid_t)),    FRA_UID_END };
    111 
    112 rtattr RTATTR_TABLE     = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_TABLE };
    113 rtattr RTATTR_OIF       = { U16_RTA_LENGTH(sizeof(uint32_t)), RTA_OIF };
    114 
    115 uint8_t PADDING_BUFFER[RTA_ALIGNTO] = {0, 0, 0, 0};
    116 
    117 // END CONSTANTS ----------------------------------------------------------------------------------
    118 
    119 // No locks needed because RouteController is accessed only from one thread (in CommandListener).
    120 std::map<std::string, uint32_t> interfaceToTable;
    121 
    122 uint32_t getRouteTableForInterface(const char* interface) {
    123     uint32_t index = if_nametoindex(interface);
    124     if (index) {
    125         index += RouteController::ROUTE_TABLE_OFFSET_FROM_INDEX;
    126         interfaceToTable[interface] = index;
    127         return index;
    128     }
    129     // If the interface goes away if_nametoindex() will return 0 but we still need to know
    130     // the index so we can remove the rules and routes.
    131     auto iter = interfaceToTable.find(interface);
    132     if (iter == interfaceToTable.end()) {
    133         ALOGE("cannot find interface %s", interface);
    134         return RT_TABLE_UNSPEC;
    135     }
    136     return iter->second;
    137 }
    138 
    139 void addTableName(uint32_t table, const std::string& name, std::string* contents) {
    140     char tableString[UINT32_STRLEN];
    141     snprintf(tableString, sizeof(tableString), "%u", table);
    142     *contents += tableString;
    143     *contents += " ";
    144     *contents += name;
    145     *contents += "\n";
    146 }
    147 
    148 // Doesn't return success/failure as the file is optional; it's okay if we fail to update it.
    149 void updateTableNamesFile() {
    150     std::string contents;
    151 
    152     addTableName(RT_TABLE_LOCAL, ROUTE_TABLE_NAME_LOCAL, &contents);
    153     addTableName(RT_TABLE_MAIN,  ROUTE_TABLE_NAME_MAIN,  &contents);
    154 
    155     addTableName(ROUTE_TABLE_LOCAL_NETWORK,  ROUTE_TABLE_NAME_LOCAL_NETWORK,  &contents);
    156     addTableName(ROUTE_TABLE_LEGACY_NETWORK, ROUTE_TABLE_NAME_LEGACY_NETWORK, &contents);
    157     addTableName(ROUTE_TABLE_LEGACY_SYSTEM,  ROUTE_TABLE_NAME_LEGACY_SYSTEM,  &contents);
    158 
    159     for (const auto& entry : interfaceToTable) {
    160         addTableName(entry.second, entry.first, &contents);
    161     }
    162 
    163     int fd = open(RT_TABLES_PATH, RT_TABLES_FLAGS, RT_TABLES_MODE);
    164     if (fd == -1) {
    165         ALOGE("failed to create %s (%s)", RT_TABLES_PATH, strerror(errno));
    166         return;
    167     }
    168     // File creation is affected by umask, so make sure the right mode bits are set.
    169     if (fchmod(fd, RT_TABLES_MODE) == -1) {
    170         ALOGE("failed to set mode 0%o on %s (%s)", RT_TABLES_MODE, RT_TABLES_PATH, strerror(errno));
    171     }
    172     ssize_t bytesWritten = write(fd, contents.data(), contents.size());
    173     if (bytesWritten != static_cast<ssize_t>(contents.size())) {
    174         ALOGE("failed to write to %s (%zd vs %zu bytes) (%s)", RT_TABLES_PATH, bytesWritten,
    175               contents.size(), strerror(errno));
    176     }
    177     close(fd);
    178 }
    179 
    180 // Sends a netlink request and expects an ack.
    181 // |iov| is an array of struct iovec that contains the netlink message payload.
    182 // The netlink header is generated by this function based on |action| and |flags|.
    183 // Returns -errno if there was an error or if the kernel reported an error.
    184 WARN_UNUSED_RESULT int sendNetlinkRequest(uint16_t action, uint16_t flags, iovec* iov, int iovlen) {
    185     nlmsghdr nlmsg = {
    186         .nlmsg_type = action,
    187         .nlmsg_flags = flags,
    188     };
    189     iov[0].iov_base = &nlmsg;
    190     iov[0].iov_len = sizeof(nlmsg);
    191     for (int i = 0; i < iovlen; ++i) {
    192         nlmsg.nlmsg_len += iov[i].iov_len;
    193     }
    194 
    195     int ret;
    196     struct {
    197         nlmsghdr msg;
    198         nlmsgerr err;
    199     } response;
    200 
    201     int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
    202     if (sock != -1 &&
    203             connect(sock, reinterpret_cast<const sockaddr*>(&NETLINK_ADDRESS),
    204                     sizeof(NETLINK_ADDRESS)) != -1 &&
    205             writev(sock, iov, iovlen) != -1 &&
    206             (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
    207         if (ret == sizeof(response)) {
    208             ret = response.err.error;  // Netlink errors are negative errno.
    209             if (ret) {
    210                 ALOGE("netlink response contains error (%s)", strerror(-ret));
    211             }
    212         } else {
    213             ALOGE("bad netlink response message size (%d != %zu)", ret, sizeof(response));
    214             ret = -EBADMSG;
    215         }
    216     } else {
    217         ALOGE("netlink socket/connect/writev/recv failed (%s)", strerror(errno));
    218         ret = -errno;
    219     }
    220 
    221     if (sock != -1) {
    222         close(sock);
    223     }
    224 
    225     return ret;
    226 }
    227 
    228 // Returns 0 on success or negative errno on failure.
    229 int padInterfaceName(const char* input, char* name, size_t* length, uint16_t* padding) {
    230     if (!input) {
    231         *length = 0;
    232         *padding = 0;
    233         return 0;
    234     }
    235     *length = strlcpy(name, input, IFNAMSIZ) + 1;
    236     if (*length > IFNAMSIZ) {
    237         ALOGE("interface name too long (%zu > %u)", *length, IFNAMSIZ);
    238         return -ENAMETOOLONG;
    239     }
    240     *padding = RTA_SPACE(*length) - RTA_LENGTH(*length);
    241     return 0;
    242 }
    243 
    244 // Adds or removes a routing rule for IPv4 and IPv6.
    245 //
    246 // + If |table| is non-zero, the rule points at the specified routing table. Otherwise, the rule
    247 //   returns ENETUNREACH.
    248 // + If |mask| is non-zero, the rule matches the specified fwmark and mask. Otherwise, |fwmark| is
    249 //   ignored.
    250 // + If |iif| is non-NULL, the rule matches the specified incoming interface.
    251 // + If |oif| is non-NULL, the rule matches the specified outgoing interface.
    252 // + If |uidStart| and |uidEnd| are not INVALID_UID, the rule matches packets from UIDs in that
    253 //   range (inclusive). Otherwise, the rule matches packets from all UIDs.
    254 //
    255 // Returns 0 on success or negative errno on failure.
    256 WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
    257                                     uint32_t fwmark, uint32_t mask, const char* iif,
    258                                     const char* oif, uid_t uidStart, uid_t uidEnd) {
    259     // Ensure that if you set a bit in the fwmark, it's not being ignored by the mask.
    260     if (fwmark & ~mask) {
    261         ALOGE("mask 0x%x does not select all the bits set in fwmark 0x%x", mask, fwmark);
    262         return -ERANGE;
    263     }
    264 
    265     // Interface names must include exactly one terminating NULL and be properly padded, or older
    266     // kernels will refuse to delete rules.
    267     char iifName[IFNAMSIZ], oifName[IFNAMSIZ];
    268     size_t iifLength, oifLength;
    269     uint16_t iifPadding, oifPadding;
    270     if (int ret = padInterfaceName(iif, iifName, &iifLength, &iifPadding)) {
    271         return ret;
    272     }
    273     if (int ret = padInterfaceName(oif, oifName, &oifLength, &oifPadding)) {
    274         return ret;
    275     }
    276 
    277     // Either both start and end UID must be specified, or neither.
    278     if ((uidStart == INVALID_UID) != (uidEnd == INVALID_UID)) {
    279         ALOGE("incompatible start and end UIDs (%u vs %u)", uidStart, uidEnd);
    280         return -EUSERS;
    281     }
    282     bool isUidRule = (uidStart != INVALID_UID);
    283 
    284     // Assemble a rule request and put it in an array of iovec structures.
    285     fib_rule_hdr rule = {
    286         .action = static_cast<uint8_t>(table != RT_TABLE_UNSPEC ? FR_ACT_TO_TBL :
    287                                                                   FR_ACT_UNREACHABLE),
    288     };
    289 
    290     rtattr fraIifName = { U16_RTA_LENGTH(iifLength), FRA_IIFNAME };
    291     rtattr fraOifName = { U16_RTA_LENGTH(oifLength), FRA_OIFNAME };
    292 
    293     iovec iov[] = {
    294         { NULL,              0 },
    295         { &rule,             sizeof(rule) },
    296         { &FRATTR_PRIORITY,  sizeof(FRATTR_PRIORITY) },
    297         { &priority,         sizeof(priority) },
    298         { &FRATTR_TABLE,     table != RT_TABLE_UNSPEC ? sizeof(FRATTR_TABLE) : 0 },
    299         { &table,            table != RT_TABLE_UNSPEC ? sizeof(table) : 0 },
    300         { &FRATTR_FWMARK,    mask ? sizeof(FRATTR_FWMARK) : 0 },
    301         { &fwmark,           mask ? sizeof(fwmark) : 0 },
    302         { &FRATTR_FWMASK,    mask ? sizeof(FRATTR_FWMASK) : 0 },
    303         { &mask,             mask ? sizeof(mask) : 0 },
    304         { &FRATTR_UID_START, isUidRule ? sizeof(FRATTR_UID_START) : 0 },
    305         { &uidStart,         isUidRule ? sizeof(uidStart) : 0 },
    306         { &FRATTR_UID_END,   isUidRule ? sizeof(FRATTR_UID_END) : 0 },
    307         { &uidEnd,           isUidRule ? sizeof(uidEnd) : 0 },
    308         { &fraIifName,       iif != IIF_NONE ? sizeof(fraIifName) : 0 },
    309         { iifName,           iifLength },
    310         { PADDING_BUFFER,    iifPadding },
    311         { &fraOifName,       oif != OIF_NONE ? sizeof(fraOifName) : 0 },
    312         { oifName,           oifLength },
    313         { PADDING_BUFFER,    oifPadding },
    314     };
    315 
    316     uint16_t flags = (action == RTM_NEWRULE) ? NETLINK_CREATE_REQUEST_FLAGS : NETLINK_REQUEST_FLAGS;
    317     for (size_t i = 0; i < ARRAY_SIZE(AF_FAMILIES); ++i) {
    318         rule.family = AF_FAMILIES[i];
    319         if (int ret = sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov))) {
    320             return ret;
    321         }
    322     }
    323 
    324     return 0;
    325 }
    326 
    327 WARN_UNUSED_RESULT int modifyIpRule(uint16_t action, uint32_t priority, uint32_t table,
    328                                     uint32_t fwmark, uint32_t mask) {
    329     return modifyIpRule(action, priority, table, fwmark, mask, IIF_NONE, OIF_NONE, INVALID_UID,
    330                         INVALID_UID);
    331 }
    332 
    333 // Adds or deletes an IPv4 or IPv6 route.
    334 // Returns 0 on success or negative errno on failure.
    335 WARN_UNUSED_RESULT int modifyIpRoute(uint16_t action, uint32_t table, const char* interface,
    336                                      const char* destination, const char* nexthop) {
    337     // At least the destination must be non-null.
    338     if (!destination) {
    339         ALOGE("null destination");
    340         return -EFAULT;
    341     }
    342 
    343     // Parse the prefix.
    344     uint8_t rawAddress[sizeof(in6_addr)];
    345     uint8_t family;
    346     uint8_t prefixLength;
    347     int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
    348                                 &prefixLength);
    349     if (rawLength < 0) {
    350         ALOGE("parsePrefix failed for destination %s (%s)", destination, strerror(-rawLength));
    351         return rawLength;
    352     }
    353 
    354     if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
    355         ALOGE("impossible! address too long (%d vs %zu)", rawLength, sizeof(rawAddress));
    356         return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
    357     }
    358 
    359     uint8_t type = RTN_UNICAST;
    360     uint32_t ifindex;
    361     uint8_t rawNexthop[sizeof(in6_addr)];
    362 
    363     if (nexthop && !strcmp(nexthop, "unreachable")) {
    364         type = RTN_UNREACHABLE;
    365         // 'interface' is likely non-NULL, as the caller (modifyRoute()) likely used it to lookup
    366         // the table number. But it's an error to specify an interface ("dev ...") or a nexthop for
    367         // unreachable routes, so nuke them. (IPv6 allows them to be specified; IPv4 doesn't.)
    368         interface = OIF_NONE;
    369         nexthop = NULL;
    370     } else if (nexthop && !strcmp(nexthop, "throw")) {
    371         type = RTN_THROW;
    372         interface = OIF_NONE;
    373         nexthop = NULL;
    374     } else {
    375         // If an interface was specified, find the ifindex.
    376         if (interface != OIF_NONE) {
    377             ifindex = if_nametoindex(interface);
    378             if (!ifindex) {
    379                 ALOGE("cannot find interface %s", interface);
    380                 return -ENODEV;
    381             }
    382         }
    383 
    384         // If a nexthop was specified, parse it as the same family as the prefix.
    385         if (nexthop && inet_pton(family, nexthop, rawNexthop) <= 0) {
    386             ALOGE("inet_pton failed for nexthop %s", nexthop);
    387             return -EINVAL;
    388         }
    389     }
    390 
    391     // Assemble a rtmsg and put it in an array of iovec structures.
    392     rtmsg route = {
    393         .rtm_protocol = RTPROT_STATIC,
    394         .rtm_type = type,
    395         .rtm_family = family,
    396         .rtm_dst_len = prefixLength,
    397         .rtm_scope = static_cast<uint8_t>(nexthop ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK),
    398     };
    399 
    400     rtattr rtaDst     = { U16_RTA_LENGTH(rawLength), RTA_DST };
    401     rtattr rtaGateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
    402 
    403     iovec iov[] = {
    404         { NULL,          0 },
    405         { &route,        sizeof(route) },
    406         { &RTATTR_TABLE, sizeof(RTATTR_TABLE) },
    407         { &table,        sizeof(table) },
    408         { &rtaDst,       sizeof(rtaDst) },
    409         { rawAddress,    static_cast<size_t>(rawLength) },
    410         { &RTATTR_OIF,   interface != OIF_NONE ? sizeof(RTATTR_OIF) : 0 },
    411         { &ifindex,      interface != OIF_NONE ? sizeof(ifindex) : 0 },
    412         { &rtaGateway,   nexthop ? sizeof(rtaGateway) : 0 },
    413         { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
    414     };
    415 
    416     uint16_t flags = (action == RTM_NEWROUTE) ? NETLINK_CREATE_REQUEST_FLAGS :
    417                                                 NETLINK_REQUEST_FLAGS;
    418     return sendNetlinkRequest(action, flags, iov, ARRAY_SIZE(iov));
    419 }
    420 
    421 // An iptables rule to mark incoming packets on a network with the netId of the network.
    422 //
    423 // This is so that the kernel can:
    424 // + Use the right fwmark for (and thus correctly route) replies (e.g.: TCP RST, ICMP errors, ping
    425 //   replies, SYN-ACKs, etc).
    426 // + Mark sockets that accept connections from this interface so that the connection stays on the
    427 //   same interface.
    428 WARN_UNUSED_RESULT int modifyIncomingPacketMark(unsigned netId, const char* interface,
    429                                                 Permission permission, bool add) {
    430     Fwmark fwmark;
    431 
    432     fwmark.netId = netId;
    433     fwmark.explicitlySelected = true;
    434     fwmark.protectedFromVpn = true;
    435     fwmark.permission = permission;
    436 
    437     char markString[UINT32_HEX_STRLEN];
    438     snprintf(markString, sizeof(markString), "0x%x", fwmark.intValue);
    439 
    440     if (execIptables(V4V6, "-t", "mangle", add ? "-A" : "-D", "INPUT", "-i", interface, "-j",
    441                      "MARK", "--set-mark", markString, NULL)) {
    442         ALOGE("failed to change iptables rule that sets incoming packet mark");
    443         return -EREMOTEIO;
    444     }
    445 
    446     return 0;
    447 }
    448 
    449 // A rule to route responses to the local network forwarded via the VPN.
    450 //
    451 // When a VPN is in effect, packets from the local network to upstream networks are forwarded into
    452 // the VPN's tunnel interface. When the VPN forwards the responses, they emerge out of the tunnel.
    453 WARN_UNUSED_RESULT int modifyVpnOutputToLocalRule(const char* vpnInterface, bool add) {
    454     return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_VPN_OUTPUT_TO_LOCAL,
    455                         ROUTE_TABLE_LOCAL_NETWORK, MARK_UNSET, MARK_UNSET, vpnInterface, OIF_NONE,
    456                         INVALID_UID, INVALID_UID);
    457 }
    458 
    459 // A rule to route all traffic from a given set of UIDs to go over the VPN.
    460 //
    461 // Notice that this rule doesn't use the netId. I.e., no matter what netId the user's socket may
    462 // have, if they are subject to this VPN, their traffic has to go through it. Allows the traffic to
    463 // bypass the VPN if the protectedFromVpn bit is set.
    464 WARN_UNUSED_RESULT int modifyVpnUidRangeRule(uint32_t table, uid_t uidStart, uid_t uidEnd,
    465                                              bool secure, bool add) {
    466     Fwmark fwmark;
    467     Fwmark mask;
    468 
    469     fwmark.protectedFromVpn = false;
    470     mask.protectedFromVpn = true;
    471 
    472     uint32_t priority;
    473 
    474     if (secure) {
    475         priority = RULE_PRIORITY_SECURE_VPN;
    476     } else {
    477         priority = RULE_PRIORITY_BYPASSABLE_VPN;
    478 
    479         fwmark.explicitlySelected = false;
    480         mask.explicitlySelected = true;
    481     }
    482 
    483     return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
    484                         mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
    485 }
    486 
    487 // A rule to allow system apps to send traffic over this VPN even if they are not part of the target
    488 // set of UIDs.
    489 //
    490 // This is needed for DnsProxyListener to correctly resolve a request for a user who is in the
    491 // target set, but where the DnsProxyListener itself is not.
    492 WARN_UNUSED_RESULT int modifyVpnSystemPermissionRule(unsigned netId, uint32_t table, bool secure,
    493                                                      bool add) {
    494     Fwmark fwmark;
    495     Fwmark mask;
    496 
    497     fwmark.netId = netId;
    498     mask.netId = FWMARK_NET_ID_MASK;
    499 
    500     fwmark.permission = PERMISSION_SYSTEM;
    501     mask.permission = PERMISSION_SYSTEM;
    502 
    503     uint32_t priority = secure ? RULE_PRIORITY_SECURE_VPN : RULE_PRIORITY_BYPASSABLE_VPN;
    504 
    505     return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, priority, table, fwmark.intValue,
    506                         mask.intValue);
    507 }
    508 
    509 // A rule to route traffic based on an explicitly chosen network.
    510 //
    511 // Supports apps that use the multinetwork APIs to restrict their traffic to a network.
    512 //
    513 // Even though we check permissions at the time we set a netId into the fwmark of a socket, we need
    514 // to check it again in the rules here, because a network's permissions may have been updated via
    515 // modifyNetworkPermission().
    516 WARN_UNUSED_RESULT int modifyExplicitNetworkRule(unsigned netId, uint32_t table,
    517                                                  Permission permission, uid_t uidStart,
    518                                                  uid_t uidEnd, bool add) {
    519     Fwmark fwmark;
    520     Fwmark mask;
    521 
    522     fwmark.netId = netId;
    523     mask.netId = FWMARK_NET_ID_MASK;
    524 
    525     fwmark.explicitlySelected = true;
    526     mask.explicitlySelected = true;
    527 
    528     fwmark.permission = permission;
    529     mask.permission = permission;
    530 
    531     return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_EXPLICIT_NETWORK, table,
    532                         fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, uidStart, uidEnd);
    533 }
    534 
    535 // A rule to route traffic based on a chosen outgoing interface.
    536 //
    537 // Supports apps that use SO_BINDTODEVICE or IP_PKTINFO options and the kernel that already knows
    538 // the outgoing interface (typically for link-local communications).
    539 WARN_UNUSED_RESULT int modifyOutputInterfaceRule(const char* interface, uint32_t table,
    540                                                  Permission permission, uid_t uidStart,
    541                                                  uid_t uidEnd, bool add) {
    542     Fwmark fwmark;
    543     Fwmark mask;
    544 
    545     fwmark.permission = permission;
    546     mask.permission = permission;
    547 
    548     return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_OUTPUT_INTERFACE, table,
    549                         fwmark.intValue, mask.intValue, IIF_NONE, interface, uidStart, uidEnd);
    550 }
    551 
    552 // A rule to route traffic based on the chosen network.
    553 //
    554 // This is for sockets that have not explicitly requested a particular network, but have been
    555 // bound to one when they called connect(). This ensures that sockets connected on a particular
    556 // network stay on that network even if the default network changes.
    557 WARN_UNUSED_RESULT int modifyImplicitNetworkRule(unsigned netId, uint32_t table,
    558                                                  Permission permission, bool add) {
    559     Fwmark fwmark;
    560     Fwmark mask;
    561 
    562     fwmark.netId = netId;
    563     mask.netId = FWMARK_NET_ID_MASK;
    564 
    565     fwmark.explicitlySelected = false;
    566     mask.explicitlySelected = true;
    567 
    568     fwmark.permission = permission;
    569     mask.permission = permission;
    570 
    571     return modifyIpRule(add ? RTM_NEWRULE : RTM_DELRULE, RULE_PRIORITY_IMPLICIT_NETWORK, table,
    572                         fwmark.intValue, mask.intValue);
    573 }
    574 
    575 // A rule to enable split tunnel VPNs.
    576 //
    577 // If a packet with a VPN's netId doesn't find a route in the VPN's routing table, it's allowed to
    578 // go over the default network, provided it wasn't explicitly restricted to the VPN and has the
    579 // permissions required by the default network.
    580 WARN_UNUSED_RESULT int modifyVpnFallthroughRule(uint16_t action, unsigned vpnNetId,
    581                                                 const char* physicalInterface,
    582                                                 Permission permission) {
    583     uint32_t table = getRouteTableForInterface(physicalInterface);
    584     if (table == RT_TABLE_UNSPEC) {
    585         return -ESRCH;
    586     }
    587 
    588     Fwmark fwmark;
    589     Fwmark mask;
    590 
    591     fwmark.netId = vpnNetId;
    592     mask.netId = FWMARK_NET_ID_MASK;
    593 
    594     fwmark.explicitlySelected = false;
    595     mask.explicitlySelected = true;
    596 
    597     fwmark.permission = permission;
    598     mask.permission = permission;
    599 
    600     return modifyIpRule(action, RULE_PRIORITY_VPN_FALLTHROUGH, table, fwmark.intValue,
    601                         mask.intValue);
    602 }
    603 
    604 // Add rules to allow legacy routes added through the requestRouteToHost() API.
    605 WARN_UNUSED_RESULT int addLegacyRouteRules() {
    606     Fwmark fwmark;
    607     Fwmark mask;
    608 
    609     fwmark.explicitlySelected = false;
    610     mask.explicitlySelected = true;
    611 
    612     // Rules to allow legacy routes to override the default network.
    613     if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
    614                                fwmark.intValue, mask.intValue)) {
    615         return ret;
    616     }
    617     if (int ret = modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LEGACY_NETWORK,
    618                                ROUTE_TABLE_LEGACY_NETWORK, fwmark.intValue, mask.intValue)) {
    619         return ret;
    620     }
    621 
    622     fwmark.permission = PERMISSION_SYSTEM;
    623     mask.permission = PERMISSION_SYSTEM;
    624 
    625     // A rule to allow legacy routes from system apps to override VPNs.
    626     return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_VPN_OVERRIDE_SYSTEM, ROUTE_TABLE_LEGACY_SYSTEM,
    627                         fwmark.intValue, mask.intValue);
    628 }
    629 
    630 // Add rules to lookup the local network when specified explicitly or otherwise.
    631 WARN_UNUSED_RESULT int addLocalNetworkRules(unsigned localNetId) {
    632     if (int ret = modifyExplicitNetworkRule(localNetId, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
    633                                             INVALID_UID, INVALID_UID, ACTION_ADD)) {
    634         return ret;
    635     }
    636 
    637     Fwmark fwmark;
    638     Fwmark mask;
    639 
    640     fwmark.explicitlySelected = false;
    641     mask.explicitlySelected = true;
    642 
    643     return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_LOCAL_NETWORK, ROUTE_TABLE_LOCAL_NETWORK,
    644                         fwmark.intValue, mask.intValue);
    645 }
    646 
    647 // Add a new rule to look up the 'main' table, with the same selectors as the "default network"
    648 // rule, but with a lower priority. We will never create routes in the main table; it should only be
    649 // used for directly-connected routes implicitly created by the kernel when adding IP addresses.
    650 // This is necessary, for example, when adding a route through a directly-connected gateway: in
    651 // order to add the route, there must already be a directly-connected route that covers the gateway.
    652 WARN_UNUSED_RESULT int addDirectlyConnectedRule() {
    653     Fwmark fwmark;
    654     Fwmark mask;
    655 
    656     fwmark.netId = NETID_UNSET;
    657     mask.netId = FWMARK_NET_ID_MASK;
    658 
    659     return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_DIRECTLY_CONNECTED, RT_TABLE_MAIN,
    660                         fwmark.intValue, mask.intValue, IIF_NONE, OIF_NONE, UID_ROOT, UID_ROOT);
    661 }
    662 
    663 // Add an explicit unreachable rule close to the end of the prioriy list to make it clear that
    664 // relying on the kernel-default "from all lookup main" rule at priority 32766 is not intended
    665 // behaviour. We do flush the kernel-default rules at startup, but having an explicit unreachable
    666 // rule will hopefully make things even clearer.
    667 WARN_UNUSED_RESULT int addUnreachableRule() {
    668     return modifyIpRule(RTM_NEWRULE, RULE_PRIORITY_UNREACHABLE, RT_TABLE_UNSPEC, MARK_UNSET,
    669                         MARK_UNSET);
    670 }
    671 
    672 WARN_UNUSED_RESULT int modifyLocalNetwork(unsigned netId, const char* interface, bool add) {
    673     if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
    674         return ret;
    675     }
    676     return modifyOutputInterfaceRule(interface, ROUTE_TABLE_LOCAL_NETWORK, PERMISSION_NONE,
    677                                      INVALID_UID, INVALID_UID, add);
    678 }
    679 
    680 WARN_UNUSED_RESULT int modifyPhysicalNetwork(unsigned netId, const char* interface,
    681                                              Permission permission, bool add) {
    682     uint32_t table = getRouteTableForInterface(interface);
    683     if (table == RT_TABLE_UNSPEC) {
    684         return -ESRCH;
    685     }
    686 
    687     if (int ret = modifyIncomingPacketMark(netId, interface, permission, add)) {
    688         return ret;
    689     }
    690     if (int ret = modifyExplicitNetworkRule(netId, table, permission, INVALID_UID, INVALID_UID,
    691                                             add)) {
    692         return ret;
    693     }
    694     if (int ret = modifyOutputInterfaceRule(interface, table, permission, INVALID_UID, INVALID_UID,
    695                                             add)) {
    696         return ret;
    697     }
    698     return modifyImplicitNetworkRule(netId, table, permission, add);
    699 }
    700 
    701 WARN_UNUSED_RESULT int modifyVirtualNetwork(unsigned netId, const char* interface,
    702                                             const UidRanges& uidRanges, bool secure, bool add,
    703                                             bool modifyNonUidBasedRules) {
    704     uint32_t table = getRouteTableForInterface(interface);
    705     if (table == RT_TABLE_UNSPEC) {
    706         return -ESRCH;
    707     }
    708 
    709     for (const UidRanges::Range& range : uidRanges.getRanges()) {
    710         if (int ret = modifyVpnUidRangeRule(table, range.first, range.second, secure, add)) {
    711             return ret;
    712         }
    713         if (int ret = modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, range.first,
    714                                                 range.second, add)) {
    715             return ret;
    716         }
    717         if (int ret = modifyOutputInterfaceRule(interface, table, PERMISSION_NONE, range.first,
    718                                                 range.second, add)) {
    719             return ret;
    720         }
    721     }
    722 
    723     if (modifyNonUidBasedRules) {
    724         if (int ret = modifyIncomingPacketMark(netId, interface, PERMISSION_NONE, add)) {
    725             return ret;
    726         }
    727         if (int ret = modifyVpnOutputToLocalRule(interface, add)) {
    728             return ret;
    729         }
    730         if (int ret = modifyVpnSystemPermissionRule(netId, table, secure, add)) {
    731             return ret;
    732         }
    733         return modifyExplicitNetworkRule(netId, table, PERMISSION_NONE, UID_ROOT, UID_ROOT, add);
    734     }
    735 
    736     return 0;
    737 }
    738 
    739 WARN_UNUSED_RESULT int modifyDefaultNetwork(uint16_t action, const char* interface,
    740                                             Permission permission) {
    741     uint32_t table = getRouteTableForInterface(interface);
    742     if (table == RT_TABLE_UNSPEC) {
    743         return -ESRCH;
    744     }
    745 
    746     Fwmark fwmark;
    747     Fwmark mask;
    748 
    749     fwmark.netId = NETID_UNSET;
    750     mask.netId = FWMARK_NET_ID_MASK;
    751 
    752     fwmark.permission = permission;
    753     mask.permission = permission;
    754 
    755     return modifyIpRule(action, RULE_PRIORITY_DEFAULT_NETWORK, table, fwmark.intValue,
    756                         mask.intValue);
    757 }
    758 
    759 WARN_UNUSED_RESULT int modifyTetheredNetwork(uint16_t action, const char* inputInterface,
    760                                              const char* outputInterface) {
    761     uint32_t table = getRouteTableForInterface(outputInterface);
    762     if (table == RT_TABLE_UNSPEC) {
    763         return -ESRCH;
    764     }
    765 
    766     return modifyIpRule(action, RULE_PRIORITY_TETHERING, table, MARK_UNSET, MARK_UNSET,
    767                         inputInterface, OIF_NONE, INVALID_UID, INVALID_UID);
    768 }
    769 
    770 // Returns 0 on success or negative errno on failure.
    771 WARN_UNUSED_RESULT int flushRules() {
    772     for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
    773         const char* argv[] = {
    774             IP_PATH,
    775             IP_VERSIONS[i],
    776             "rule",
    777             "flush",
    778         };
    779         if (android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv), NULL, false, false)) {
    780             ALOGE("failed to flush rules");
    781             return -EREMOTEIO;
    782         }
    783     }
    784     return 0;
    785 }
    786 
    787 // Adds or removes an IPv4 or IPv6 route to the specified table and, if it's a directly-connected
    788 // route, to the main table as well.
    789 // Returns 0 on success or negative errno on failure.
    790 WARN_UNUSED_RESULT int modifyRoute(uint16_t action, const char* interface, const char* destination,
    791                                    const char* nexthop, RouteController::TableType tableType) {
    792     uint32_t table;
    793     switch (tableType) {
    794         case RouteController::INTERFACE: {
    795             table = getRouteTableForInterface(interface);
    796             if (table == RT_TABLE_UNSPEC) {
    797                 return -ESRCH;
    798             }
    799             break;
    800         }
    801         case RouteController::LOCAL_NETWORK: {
    802             table = ROUTE_TABLE_LOCAL_NETWORK;
    803             break;
    804         }
    805         case RouteController::LEGACY_NETWORK: {
    806             table = ROUTE_TABLE_LEGACY_NETWORK;
    807             break;
    808         }
    809         case RouteController::LEGACY_SYSTEM: {
    810             table = ROUTE_TABLE_LEGACY_SYSTEM;
    811             break;
    812         }
    813     }
    814 
    815     int ret = modifyIpRoute(action, table, interface, destination, nexthop);
    816     // Trying to add a route that already exists shouldn't cause an error.
    817     if (ret && !(action == RTM_NEWROUTE && ret == -EEXIST)) {
    818         return ret;
    819     }
    820 
    821     return 0;
    822 }
    823 
    824 // Returns 0 on success or negative errno on failure.
    825 WARN_UNUSED_RESULT int flushRoutes(const char* interface) {
    826     uint32_t table = getRouteTableForInterface(interface);
    827     if (table == RT_TABLE_UNSPEC) {
    828         return -ESRCH;
    829     }
    830 
    831     char tableString[UINT32_STRLEN];
    832     snprintf(tableString, sizeof(tableString), "%u", table);
    833 
    834     int ret = 0;
    835     for (size_t i = 0; i < ARRAY_SIZE(IP_VERSIONS); ++i) {
    836         const char* argv[] = {
    837             IP_PATH,
    838             IP_VERSIONS[i],
    839             "route",
    840             "flush",
    841             "table",
    842             tableString,
    843         };
    844 
    845         // A flush works by dumping routes and deleting each route as it's returned, and it can
    846         // fail if something else deletes the route between the dump and the delete. This can
    847         // happen, for example, if an interface goes down while we're trying to flush its routes.
    848         // So try multiple times and only return an error if the last attempt fails.
    849         //
    850         // TODO: replace this with our own netlink code.
    851         unsigned attempts = 0;
    852         int err;
    853         do {
    854             err = android_fork_execvp(ARRAY_SIZE(argv), const_cast<char**>(argv),
    855                                       NULL, false, false);
    856             ++attempts;
    857         } while (err != 0 && attempts < ROUTE_FLUSH_ATTEMPTS);
    858         if (err) {
    859             ALOGE("failed to flush %s routes in table %s after %d attempts",
    860                   IP_VERSIONS[i], tableString, attempts);
    861             ret = -EREMOTEIO;
    862         }
    863     }
    864 
    865     // If we failed to flush routes, the caller may elect to keep this interface around, so keep
    866     // track of its name.
    867     if (!ret) {
    868         interfaceToTable.erase(interface);
    869     }
    870 
    871     return ret;
    872 }
    873 
    874 }  // namespace
    875 
    876 int RouteController::Init(unsigned localNetId) {
    877     if (int ret = flushRules()) {
    878         return ret;
    879     }
    880     if (int ret = addLegacyRouteRules()) {
    881         return ret;
    882     }
    883     if (int ret = addLocalNetworkRules(localNetId)) {
    884         return ret;
    885     }
    886     if (int ret = addDirectlyConnectedRule()) {
    887         return ret;
    888     }
    889     if (int ret = addUnreachableRule()) {
    890         return ret;
    891     }
    892     updateTableNamesFile();
    893     return 0;
    894 }
    895 
    896 int RouteController::addInterfaceToLocalNetwork(unsigned netId, const char* interface) {
    897     return modifyLocalNetwork(netId, interface, ACTION_ADD);
    898 }
    899 
    900 int RouteController::removeInterfaceFromLocalNetwork(unsigned netId, const char* interface) {
    901     return modifyLocalNetwork(netId, interface, ACTION_DEL);
    902 }
    903 
    904 int RouteController::addInterfaceToPhysicalNetwork(unsigned netId, const char* interface,
    905                                                    Permission permission) {
    906     if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_ADD)) {
    907         return ret;
    908     }
    909     updateTableNamesFile();
    910     return 0;
    911 }
    912 
    913 int RouteController::removeInterfaceFromPhysicalNetwork(unsigned netId, const char* interface,
    914                                                         Permission permission) {
    915     if (int ret = modifyPhysicalNetwork(netId, interface, permission, ACTION_DEL)) {
    916         return ret;
    917     }
    918     if (int ret = flushRoutes(interface)) {
    919         return ret;
    920     }
    921     updateTableNamesFile();
    922     return 0;
    923 }
    924 
    925 int RouteController::addInterfaceToVirtualNetwork(unsigned netId, const char* interface,
    926                                                   bool secure, const UidRanges& uidRanges) {
    927     if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
    928                                        MODIFY_NON_UID_BASED_RULES)) {
    929         return ret;
    930     }
    931     updateTableNamesFile();
    932     return 0;
    933 }
    934 
    935 int RouteController::removeInterfaceFromVirtualNetwork(unsigned netId, const char* interface,
    936                                                        bool secure, const UidRanges& uidRanges) {
    937     if (int ret = modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
    938                                        MODIFY_NON_UID_BASED_RULES)) {
    939         return ret;
    940     }
    941     if (int ret = flushRoutes(interface)) {
    942         return ret;
    943     }
    944     updateTableNamesFile();
    945     return 0;
    946 }
    947 
    948 int RouteController::modifyPhysicalNetworkPermission(unsigned netId, const char* interface,
    949                                                      Permission oldPermission,
    950                                                      Permission newPermission) {
    951     // Add the new rules before deleting the old ones, to avoid race conditions.
    952     if (int ret = modifyPhysicalNetwork(netId, interface, newPermission, ACTION_ADD)) {
    953         return ret;
    954     }
    955     return modifyPhysicalNetwork(netId, interface, oldPermission, ACTION_DEL);
    956 }
    957 
    958 int RouteController::addUsersToVirtualNetwork(unsigned netId, const char* interface, bool secure,
    959                                               const UidRanges& uidRanges) {
    960     return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_ADD,
    961                                 !MODIFY_NON_UID_BASED_RULES);
    962 }
    963 
    964 int RouteController::removeUsersFromVirtualNetwork(unsigned netId, const char* interface,
    965                                                    bool secure, const UidRanges& uidRanges) {
    966     return modifyVirtualNetwork(netId, interface, uidRanges, secure, ACTION_DEL,
    967                                 !MODIFY_NON_UID_BASED_RULES);
    968 }
    969 
    970 int RouteController::addInterfaceToDefaultNetwork(const char* interface, Permission permission) {
    971     return modifyDefaultNetwork(RTM_NEWRULE, interface, permission);
    972 }
    973 
    974 int RouteController::removeInterfaceFromDefaultNetwork(const char* interface,
    975                                                        Permission permission) {
    976     return modifyDefaultNetwork(RTM_DELRULE, interface, permission);
    977 }
    978 
    979 int RouteController::addRoute(const char* interface, const char* destination, const char* nexthop,
    980                               TableType tableType) {
    981     return modifyRoute(RTM_NEWROUTE, interface, destination, nexthop, tableType);
    982 }
    983 
    984 int RouteController::removeRoute(const char* interface, const char* destination,
    985                                  const char* nexthop, TableType tableType) {
    986     return modifyRoute(RTM_DELROUTE, interface, destination, nexthop, tableType);
    987 }
    988 
    989 int RouteController::enableTethering(const char* inputInterface, const char* outputInterface) {
    990     return modifyTetheredNetwork(RTM_NEWRULE, inputInterface, outputInterface);
    991 }
    992 
    993 int RouteController::disableTethering(const char* inputInterface, const char* outputInterface) {
    994     return modifyTetheredNetwork(RTM_DELRULE, inputInterface, outputInterface);
    995 }
    996 
    997 int RouteController::addVirtualNetworkFallthrough(unsigned vpnNetId, const char* physicalInterface,
    998                                                   Permission permission) {
    999     return modifyVpnFallthroughRule(RTM_NEWRULE, vpnNetId, physicalInterface, permission);
   1000 }
   1001 
   1002 int RouteController::removeVirtualNetworkFallthrough(unsigned vpnNetId,
   1003                                                      const char* physicalInterface,
   1004                                                      Permission permission) {
   1005     return modifyVpnFallthroughRule(RTM_DELRULE, vpnNetId, physicalInterface, permission);
   1006 }
   1007