Home | History | Annotate | Download | only in server
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "IptablesRestoreController.h"
     18 
     19 #include <poll.h>
     20 #include <signal.h>
     21 #include <sys/wait.h>
     22 #include <unistd.h>
     23 
     24 #define LOG_TAG "IptablesRestoreController"
     25 #include <android-base/logging.h>
     26 #include <android-base/file.h>
     27 #include <netdutils/Syscalls.h>
     28 
     29 #include "Controllers.h"
     30 
     31 using android::netdutils::StatusOr;
     32 using android::netdutils::sSyscalls;
     33 
     34 constexpr char IPTABLES_RESTORE_PATH[] = "/system/bin/iptables-restore";
     35 constexpr char IP6TABLES_RESTORE_PATH[] = "/system/bin/ip6tables-restore";
     36 
     37 constexpr char PING[] = "#PING\n";
     38 
     39 constexpr size_t PING_SIZE = sizeof(PING) - 1;
     40 
     41 // Not compile-time constants because they are changed by the unit tests.
     42 int IptablesRestoreController::MAX_RETRIES = 50;
     43 int IptablesRestoreController::POLL_TIMEOUT_MS = 100;
     44 
     45 class IptablesProcess {
     46 public:
     47     IptablesProcess(pid_t pid, int stdIn, int stdOut, int stdErr) :
     48         pid(pid),
     49         stdIn(stdIn),
     50         processTerminated(false) {
     51 
     52         pollFds[STDOUT_IDX] = { .fd = stdOut, .events = POLLIN };
     53         pollFds[STDERR_IDX] = { .fd = stdErr, .events = POLLIN };
     54     }
     55 
     56     ~IptablesProcess() {
     57         close(stdIn);
     58         close(pollFds[STDOUT_IDX].fd);
     59         close(pollFds[STDERR_IDX].fd);
     60     }
     61 
     62     bool outputReady() {
     63         struct pollfd pollfd = { .fd = stdIn, .events = POLLOUT };
     64         int ret = poll(&pollfd, 1, 0);
     65         if (ret == -1) {
     66             ALOGE("outputReady poll failed: %s", strerror(errno));
     67             return false;
     68         }
     69         return (ret == 1) && !(pollfd.revents & POLLERR);
     70     }
     71 
     72     void stop() {
     73         if (processTerminated) return;
     74 
     75         // This can be called by drainAndWaitForAck (after a POLLHUP) or by sendCommand (if the
     76         // process was killed by something else on the system). In both cases, it's safe to send the
     77         // PID a SIGTERM, because the PID continues to exist until its parent (i.e., us) calls
     78         // waitpid on it, so there's no risk that the PID is reused.
     79         int err = kill(pid, SIGTERM);
     80         if (err) {
     81             err = errno;
     82         }
     83 
     84         if (err == ESRCH) {
     85             // This means that someone else inside netd but outside this class called waitpid(),
     86             // which is a programming error. There's no point in calling waitpid() here since we
     87             // know that the process is gone.
     88             ALOGE("iptables child process %d unexpectedly disappeared", pid);
     89             processTerminated = true;
     90             return;
     91         }
     92 
     93         if (err) {
     94             ALOGE("Error killing iptables child process %d: %s", pid, strerror(err));
     95         }
     96 
     97         int status;
     98         if (waitpid(pid, &status, 0) == -1) {
     99             ALOGE("Error waiting for iptables child process %d: %s", pid, strerror(errno));
    100         } else {
    101             ALOGW("iptables-restore process %d terminated status=%d", pid, status);
    102         }
    103 
    104         processTerminated = true;
    105     }
    106 
    107     const pid_t pid;
    108     const int stdIn;
    109 
    110     struct pollfd pollFds[2];
    111     std::string errBuf;
    112 
    113     std::atomic_bool processTerminated;
    114 
    115     static constexpr size_t STDOUT_IDX = 0;
    116     static constexpr size_t STDERR_IDX = 1;
    117 };
    118 
    119 IptablesRestoreController::IptablesRestoreController() {
    120     Init();
    121 }
    122 
    123 IptablesRestoreController::~IptablesRestoreController() {
    124 }
    125 
    126 void IptablesRestoreController::Init() {
    127     // We cannot fork these in parallel or a child process could inherit the pipe fds intended for
    128     // use by the other child process. see https://android-review.googlesource.com/469559 for what
    129     // breaks. This does not cause a latency hit, because the parent only has to wait for
    130     // forkAndExec, which is sub-millisecond, and the child processes then call exec() in parallel.
    131     mIpRestore.reset(forkAndExec(IPTABLES_PROCESS));
    132     mIp6Restore.reset(forkAndExec(IP6TABLES_PROCESS));
    133 }
    134 
    135 /* static */
    136 IptablesProcess* IptablesRestoreController::forkAndExec(const IptablesProcessType type) {
    137     const char* const cmd = (type == IPTABLES_PROCESS) ?
    138         IPTABLES_RESTORE_PATH : IP6TABLES_RESTORE_PATH;
    139 
    140     // Create the pipes we'll use for communication with the child
    141     // process. One each for the child's in, out and err files.
    142     int stdin_pipe[2];
    143     int stdout_pipe[2];
    144     int stderr_pipe[2];
    145 
    146     if (pipe2(stdin_pipe,  O_CLOEXEC) == -1 ||
    147         pipe2(stdout_pipe, O_NONBLOCK | O_CLOEXEC) == -1 ||
    148         pipe2(stderr_pipe, O_NONBLOCK | O_CLOEXEC) == -1) {
    149 
    150         ALOGE("pipe2() failed: %s", strerror(errno));
    151         return nullptr;
    152     }
    153 
    154     const auto& sys = sSyscalls.get();
    155     StatusOr<pid_t> child_pid = sys.fork();
    156     if (!isOk(child_pid)) {
    157         ALOGE("fork() failed: %s", strerror(child_pid.status().code()));
    158         return nullptr;
    159     }
    160 
    161     if (child_pid.value() == 0) {
    162         // The child process. Reads from stdin, writes to stderr and stdout.
    163 
    164         // stdin_pipe[0] : The read end of the stdin pipe.
    165         // stdout_pipe[1] : The write end of the stdout pipe.
    166         // stderr_pipe[1] : The write end of the stderr pipe.
    167         if (dup2(stdin_pipe[0], 0) == -1 ||
    168             dup2(stdout_pipe[1], 1) == -1 ||
    169             dup2(stderr_pipe[1], 2) == -1) {
    170             ALOGE("dup2() failed: %s", strerror(errno));
    171             abort();
    172         }
    173 
    174         if (execl(cmd,
    175                   cmd,
    176                   "--noflush",  // Don't flush the whole table.
    177                   "-w",         // Wait instead of failing if the lock is held.
    178                   "-v",         // Verbose mode, to make sure our ping is echoed
    179                                 // back to us.
    180                   nullptr) == -1) {
    181             ALOGE("execl(%s, ...) failed: %s", cmd, strerror(errno));
    182             abort();
    183         }
    184 
    185         // This statement is unreachable. We abort() upon error, and execl
    186         // if everything goes well.
    187         return nullptr;
    188     }
    189 
    190     // The parent process. Writes to stdout and stderr and reads from stdin.
    191     // stdin_pipe[0] : The read end of the stdin pipe.
    192     // stdout_pipe[1] : The write end of the stdout pipe.
    193     // stderr_pipe[1] : The write end of the stderr pipe.
    194     if (close(stdin_pipe[0]) == -1 ||
    195         close(stdout_pipe[1]) == -1 ||
    196         close(stderr_pipe[1]) == -1) {
    197         ALOGW("close() failed: %s", strerror(errno));
    198     }
    199 
    200     return new IptablesProcess(child_pid.value(), stdin_pipe[1], stdout_pipe[0], stderr_pipe[0]);
    201 }
    202 
    203 // TODO: Return -errno on failure instead of -1.
    204 // TODO: Maybe we should keep a rotating buffer of the last N commands
    205 // so that they can be dumped on dumpsys.
    206 int IptablesRestoreController::sendCommand(const IptablesProcessType type,
    207                                            const std::string& command,
    208                                            std::string *output) {
    209    std::unique_ptr<IptablesProcess> *process =
    210            (type == IPTABLES_PROCESS) ? &mIpRestore : &mIp6Restore;
    211 
    212 
    213     // We might need to fork a new process if we haven't forked one yet, or
    214     // if the forked process terminated.
    215     //
    216     // NOTE: For a given command, this is the last point at which we try to
    217     // recover from a child death. If the child dies at some later point during
    218     // the execution of this method, we will receive an EPIPE and return an
    219     // error. The command will then need to be retried at a higher level.
    220     IptablesProcess *existingProcess = process->get();
    221     if (existingProcess != nullptr && !existingProcess->outputReady()) {
    222         existingProcess->stop();
    223         existingProcess = nullptr;
    224     }
    225 
    226     if (existingProcess == nullptr) {
    227         // Fork a new iptables[6]-restore process.
    228         IptablesProcess *newProcess = IptablesRestoreController::forkAndExec(type);
    229         if (newProcess == nullptr) {
    230             LOG(ERROR) << "Unable to fork ip[6]tables-restore, type: " << type;
    231             return -1;
    232         }
    233 
    234         process->reset(newProcess);
    235     }
    236 
    237     if (!android::base::WriteFully((*process)->stdIn, command.data(), command.length())) {
    238         ALOGE("Unable to send command: %s", strerror(errno));
    239         return -1;
    240     }
    241 
    242     if (!android::base::WriteFully((*process)->stdIn, PING, PING_SIZE)) {
    243         ALOGE("Unable to send ping command: %s", strerror(errno));
    244         return -1;
    245     }
    246 
    247     if (!drainAndWaitForAck(*process, command, output)) {
    248         // drainAndWaitForAck has already logged an error.
    249         return -1;
    250     }
    251 
    252     return 0;
    253 }
    254 
    255 void IptablesRestoreController::maybeLogStderr(const std::unique_ptr<IptablesProcess> &process,
    256                                                const std::string& command) {
    257     if (process->errBuf.empty()) {
    258         return;
    259     }
    260 
    261     ALOGE("iptables error:\n"
    262           "------- COMMAND -------\n"
    263           "%s\n"
    264           "-------  ERROR -------\n"
    265           "%s"
    266           "----------------------\n",
    267           command.c_str(), process->errBuf.c_str());
    268     process->errBuf.clear();
    269 }
    270 
    271 /* static */
    272 bool IptablesRestoreController::drainAndWaitForAck(const std::unique_ptr<IptablesProcess> &process,
    273                                                    const std::string& command,
    274                                                    std::string *output) {
    275     bool receivedAck = false;
    276     int timeout = 0;
    277     while (!receivedAck && (timeout++ < MAX_RETRIES)) {
    278         int numEvents = TEMP_FAILURE_RETRY(
    279             poll(process->pollFds, ARRAY_SIZE(process->pollFds), POLL_TIMEOUT_MS));
    280         if (numEvents == -1) {
    281             ALOGE("Poll failed: %s", strerror(errno));
    282             return false;
    283         }
    284 
    285         // We've timed out, which means something has gone wrong - we know that stdout should have
    286         // become available to read with the ACK message, or that stderr should have been available
    287         // to read with an error message.
    288         if (numEvents == 0) {
    289             continue;
    290         }
    291 
    292         char buffer[PIPE_BUF];
    293         for (size_t i = 0; i < ARRAY_SIZE(process->pollFds); ++i) {
    294             const struct pollfd &pollfd = process->pollFds[i];
    295             if (pollfd.revents & POLLIN) {
    296                 ssize_t size;
    297                 do {
    298                     size = TEMP_FAILURE_RETRY(read(pollfd.fd, buffer, sizeof(buffer)));
    299 
    300                     if (size == -1) {
    301                         if (errno != EAGAIN) {
    302                             ALOGE("Unable to read from descriptor: %s", strerror(errno));
    303                         }
    304                         break;
    305                     }
    306 
    307                     if (i == IptablesProcess::STDOUT_IDX) {
    308                         // i == STDOUT_IDX: accumulate stdout into *output, and look
    309                         // for the ping response.
    310                         output->append(buffer, size);
    311                         size_t pos = output->find(PING);
    312                         if (pos != std::string::npos) {
    313                             if (output->size() > pos + PING_SIZE) {
    314                                 size_t extra = output->size() - (pos + PING_SIZE);
    315                                 ALOGW("%zd extra characters after iptables response: '%s...'",
    316                                       extra, output->substr(pos + PING_SIZE, 128).c_str());
    317                             }
    318                             output->resize(pos);
    319                             receivedAck = true;
    320                         }
    321                     } else {
    322                         // i == STDERR_IDX: accumulate stderr into errBuf.
    323                         process->errBuf.append(buffer, size);
    324                     }
    325                 } while (size > 0);
    326             }
    327             if (pollfd.revents & POLLHUP) {
    328                 // The pipe was closed. This likely means the subprocess is exiting, since
    329                 // iptables-restore only closes stdin on error.
    330                 process->stop();
    331                 break;
    332             }
    333         }
    334     }
    335 
    336     if (!receivedAck && !process->processTerminated) {
    337         ALOGE("Timed out waiting for response from iptables process %d", process->pid);
    338         // Kill the process so that if it eventually recovers, we don't misinterpret the ping
    339         // response (or any output) of the command we just sent as coming from future commands.
    340         process->stop();
    341     }
    342 
    343     maybeLogStderr(process, command);
    344 
    345     return receivedAck;
    346 }
    347 
    348 int IptablesRestoreController::execute(const IptablesTarget target, const std::string& command,
    349                                        std::string *output) {
    350     std::lock_guard<std::mutex> lock(mLock);
    351 
    352     std::string buffer;
    353     if (output == nullptr) {
    354         output = &buffer;
    355     } else {
    356         output->clear();
    357     }
    358 
    359     int res = 0;
    360     if (target == V4 || target == V4V6) {
    361         res |= sendCommand(IPTABLES_PROCESS, command, output);
    362     }
    363     if (target == V6 || target == V4V6) {
    364         res |= sendCommand(IP6TABLES_PROCESS, command, output);
    365     }
    366     return res;
    367 }
    368 
    369 int IptablesRestoreController::getIpRestorePid(const IptablesProcessType type) {
    370     return type == IPTABLES_PROCESS ? mIpRestore->pid : mIp6Restore->pid;
    371 }
    372