1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "IptablesRestoreController.h" 18 19 #include <poll.h> 20 #include <signal.h> 21 #include <sys/wait.h> 22 #include <unistd.h> 23 24 #define LOG_TAG "IptablesRestoreController" 25 #include <android-base/logging.h> 26 #include <android-base/file.h> 27 #include <netdutils/Syscalls.h> 28 29 #include "Controllers.h" 30 31 using android::netdutils::StatusOr; 32 using android::netdutils::sSyscalls; 33 34 constexpr char IPTABLES_RESTORE_PATH[] = "/system/bin/iptables-restore"; 35 constexpr char IP6TABLES_RESTORE_PATH[] = "/system/bin/ip6tables-restore"; 36 37 constexpr char PING[] = "#PING\n"; 38 39 constexpr size_t PING_SIZE = sizeof(PING) - 1; 40 41 // Not compile-time constants because they are changed by the unit tests. 42 int IptablesRestoreController::MAX_RETRIES = 50; 43 int IptablesRestoreController::POLL_TIMEOUT_MS = 100; 44 45 class IptablesProcess { 46 public: 47 IptablesProcess(pid_t pid, int stdIn, int stdOut, int stdErr) : 48 pid(pid), 49 stdIn(stdIn), 50 processTerminated(false) { 51 52 pollFds[STDOUT_IDX] = { .fd = stdOut, .events = POLLIN }; 53 pollFds[STDERR_IDX] = { .fd = stdErr, .events = POLLIN }; 54 } 55 56 ~IptablesProcess() { 57 close(stdIn); 58 close(pollFds[STDOUT_IDX].fd); 59 close(pollFds[STDERR_IDX].fd); 60 } 61 62 bool outputReady() { 63 struct pollfd pollfd = { .fd = stdIn, .events = POLLOUT }; 64 int ret = poll(&pollfd, 1, 0); 65 if (ret == -1) { 66 ALOGE("outputReady poll failed: %s", strerror(errno)); 67 return false; 68 } 69 return (ret == 1) && !(pollfd.revents & POLLERR); 70 } 71 72 void stop() { 73 if (processTerminated) return; 74 75 // This can be called by drainAndWaitForAck (after a POLLHUP) or by sendCommand (if the 76 // process was killed by something else on the system). In both cases, it's safe to send the 77 // PID a SIGTERM, because the PID continues to exist until its parent (i.e., us) calls 78 // waitpid on it, so there's no risk that the PID is reused. 79 int err = kill(pid, SIGTERM); 80 if (err) { 81 err = errno; 82 } 83 84 if (err == ESRCH) { 85 // This means that someone else inside netd but outside this class called waitpid(), 86 // which is a programming error. There's no point in calling waitpid() here since we 87 // know that the process is gone. 88 ALOGE("iptables child process %d unexpectedly disappeared", pid); 89 processTerminated = true; 90 return; 91 } 92 93 if (err) { 94 ALOGE("Error killing iptables child process %d: %s", pid, strerror(err)); 95 } 96 97 int status; 98 if (waitpid(pid, &status, 0) == -1) { 99 ALOGE("Error waiting for iptables child process %d: %s", pid, strerror(errno)); 100 } else { 101 ALOGW("iptables-restore process %d terminated status=%d", pid, status); 102 } 103 104 processTerminated = true; 105 } 106 107 const pid_t pid; 108 const int stdIn; 109 110 struct pollfd pollFds[2]; 111 std::string errBuf; 112 113 std::atomic_bool processTerminated; 114 115 static constexpr size_t STDOUT_IDX = 0; 116 static constexpr size_t STDERR_IDX = 1; 117 }; 118 119 IptablesRestoreController::IptablesRestoreController() { 120 Init(); 121 } 122 123 IptablesRestoreController::~IptablesRestoreController() { 124 } 125 126 void IptablesRestoreController::Init() { 127 // We cannot fork these in parallel or a child process could inherit the pipe fds intended for 128 // use by the other child process. see https://android-review.googlesource.com/469559 for what 129 // breaks. This does not cause a latency hit, because the parent only has to wait for 130 // forkAndExec, which is sub-millisecond, and the child processes then call exec() in parallel. 131 mIpRestore.reset(forkAndExec(IPTABLES_PROCESS)); 132 mIp6Restore.reset(forkAndExec(IP6TABLES_PROCESS)); 133 } 134 135 /* static */ 136 IptablesProcess* IptablesRestoreController::forkAndExec(const IptablesProcessType type) { 137 const char* const cmd = (type == IPTABLES_PROCESS) ? 138 IPTABLES_RESTORE_PATH : IP6TABLES_RESTORE_PATH; 139 140 // Create the pipes we'll use for communication with the child 141 // process. One each for the child's in, out and err files. 142 int stdin_pipe[2]; 143 int stdout_pipe[2]; 144 int stderr_pipe[2]; 145 146 if (pipe2(stdin_pipe, O_CLOEXEC) == -1 || 147 pipe2(stdout_pipe, O_NONBLOCK | O_CLOEXEC) == -1 || 148 pipe2(stderr_pipe, O_NONBLOCK | O_CLOEXEC) == -1) { 149 150 ALOGE("pipe2() failed: %s", strerror(errno)); 151 return nullptr; 152 } 153 154 const auto& sys = sSyscalls.get(); 155 StatusOr<pid_t> child_pid = sys.fork(); 156 if (!isOk(child_pid)) { 157 ALOGE("fork() failed: %s", strerror(child_pid.status().code())); 158 return nullptr; 159 } 160 161 if (child_pid.value() == 0) { 162 // The child process. Reads from stdin, writes to stderr and stdout. 163 164 // stdin_pipe[0] : The read end of the stdin pipe. 165 // stdout_pipe[1] : The write end of the stdout pipe. 166 // stderr_pipe[1] : The write end of the stderr pipe. 167 if (dup2(stdin_pipe[0], 0) == -1 || 168 dup2(stdout_pipe[1], 1) == -1 || 169 dup2(stderr_pipe[1], 2) == -1) { 170 ALOGE("dup2() failed: %s", strerror(errno)); 171 abort(); 172 } 173 174 if (execl(cmd, 175 cmd, 176 "--noflush", // Don't flush the whole table. 177 "-w", // Wait instead of failing if the lock is held. 178 "-v", // Verbose mode, to make sure our ping is echoed 179 // back to us. 180 nullptr) == -1) { 181 ALOGE("execl(%s, ...) failed: %s", cmd, strerror(errno)); 182 abort(); 183 } 184 185 // This statement is unreachable. We abort() upon error, and execl 186 // if everything goes well. 187 return nullptr; 188 } 189 190 // The parent process. Writes to stdout and stderr and reads from stdin. 191 // stdin_pipe[0] : The read end of the stdin pipe. 192 // stdout_pipe[1] : The write end of the stdout pipe. 193 // stderr_pipe[1] : The write end of the stderr pipe. 194 if (close(stdin_pipe[0]) == -1 || 195 close(stdout_pipe[1]) == -1 || 196 close(stderr_pipe[1]) == -1) { 197 ALOGW("close() failed: %s", strerror(errno)); 198 } 199 200 return new IptablesProcess(child_pid.value(), stdin_pipe[1], stdout_pipe[0], stderr_pipe[0]); 201 } 202 203 // TODO: Return -errno on failure instead of -1. 204 // TODO: Maybe we should keep a rotating buffer of the last N commands 205 // so that they can be dumped on dumpsys. 206 int IptablesRestoreController::sendCommand(const IptablesProcessType type, 207 const std::string& command, 208 std::string *output) { 209 std::unique_ptr<IptablesProcess> *process = 210 (type == IPTABLES_PROCESS) ? &mIpRestore : &mIp6Restore; 211 212 213 // We might need to fork a new process if we haven't forked one yet, or 214 // if the forked process terminated. 215 // 216 // NOTE: For a given command, this is the last point at which we try to 217 // recover from a child death. If the child dies at some later point during 218 // the execution of this method, we will receive an EPIPE and return an 219 // error. The command will then need to be retried at a higher level. 220 IptablesProcess *existingProcess = process->get(); 221 if (existingProcess != nullptr && !existingProcess->outputReady()) { 222 existingProcess->stop(); 223 existingProcess = nullptr; 224 } 225 226 if (existingProcess == nullptr) { 227 // Fork a new iptables[6]-restore process. 228 IptablesProcess *newProcess = IptablesRestoreController::forkAndExec(type); 229 if (newProcess == nullptr) { 230 LOG(ERROR) << "Unable to fork ip[6]tables-restore, type: " << type; 231 return -1; 232 } 233 234 process->reset(newProcess); 235 } 236 237 if (!android::base::WriteFully((*process)->stdIn, command.data(), command.length())) { 238 ALOGE("Unable to send command: %s", strerror(errno)); 239 return -1; 240 } 241 242 if (!android::base::WriteFully((*process)->stdIn, PING, PING_SIZE)) { 243 ALOGE("Unable to send ping command: %s", strerror(errno)); 244 return -1; 245 } 246 247 if (!drainAndWaitForAck(*process, command, output)) { 248 // drainAndWaitForAck has already logged an error. 249 return -1; 250 } 251 252 return 0; 253 } 254 255 void IptablesRestoreController::maybeLogStderr(const std::unique_ptr<IptablesProcess> &process, 256 const std::string& command) { 257 if (process->errBuf.empty()) { 258 return; 259 } 260 261 ALOGE("iptables error:\n" 262 "------- COMMAND -------\n" 263 "%s\n" 264 "------- ERROR -------\n" 265 "%s" 266 "----------------------\n", 267 command.c_str(), process->errBuf.c_str()); 268 process->errBuf.clear(); 269 } 270 271 /* static */ 272 bool IptablesRestoreController::drainAndWaitForAck(const std::unique_ptr<IptablesProcess> &process, 273 const std::string& command, 274 std::string *output) { 275 bool receivedAck = false; 276 int timeout = 0; 277 while (!receivedAck && (timeout++ < MAX_RETRIES)) { 278 int numEvents = TEMP_FAILURE_RETRY( 279 poll(process->pollFds, ARRAY_SIZE(process->pollFds), POLL_TIMEOUT_MS)); 280 if (numEvents == -1) { 281 ALOGE("Poll failed: %s", strerror(errno)); 282 return false; 283 } 284 285 // We've timed out, which means something has gone wrong - we know that stdout should have 286 // become available to read with the ACK message, or that stderr should have been available 287 // to read with an error message. 288 if (numEvents == 0) { 289 continue; 290 } 291 292 char buffer[PIPE_BUF]; 293 for (size_t i = 0; i < ARRAY_SIZE(process->pollFds); ++i) { 294 const struct pollfd &pollfd = process->pollFds[i]; 295 if (pollfd.revents & POLLIN) { 296 ssize_t size; 297 do { 298 size = TEMP_FAILURE_RETRY(read(pollfd.fd, buffer, sizeof(buffer))); 299 300 if (size == -1) { 301 if (errno != EAGAIN) { 302 ALOGE("Unable to read from descriptor: %s", strerror(errno)); 303 } 304 break; 305 } 306 307 if (i == IptablesProcess::STDOUT_IDX) { 308 // i == STDOUT_IDX: accumulate stdout into *output, and look 309 // for the ping response. 310 output->append(buffer, size); 311 size_t pos = output->find(PING); 312 if (pos != std::string::npos) { 313 if (output->size() > pos + PING_SIZE) { 314 size_t extra = output->size() - (pos + PING_SIZE); 315 ALOGW("%zd extra characters after iptables response: '%s...'", 316 extra, output->substr(pos + PING_SIZE, 128).c_str()); 317 } 318 output->resize(pos); 319 receivedAck = true; 320 } 321 } else { 322 // i == STDERR_IDX: accumulate stderr into errBuf. 323 process->errBuf.append(buffer, size); 324 } 325 } while (size > 0); 326 } 327 if (pollfd.revents & POLLHUP) { 328 // The pipe was closed. This likely means the subprocess is exiting, since 329 // iptables-restore only closes stdin on error. 330 process->stop(); 331 break; 332 } 333 } 334 } 335 336 if (!receivedAck && !process->processTerminated) { 337 ALOGE("Timed out waiting for response from iptables process %d", process->pid); 338 // Kill the process so that if it eventually recovers, we don't misinterpret the ping 339 // response (or any output) of the command we just sent as coming from future commands. 340 process->stop(); 341 } 342 343 maybeLogStderr(process, command); 344 345 return receivedAck; 346 } 347 348 int IptablesRestoreController::execute(const IptablesTarget target, const std::string& command, 349 std::string *output) { 350 std::lock_guard<std::mutex> lock(mLock); 351 352 std::string buffer; 353 if (output == nullptr) { 354 output = &buffer; 355 } else { 356 output->clear(); 357 } 358 359 int res = 0; 360 if (target == V4 || target == V4V6) { 361 res |= sendCommand(IPTABLES_PROCESS, command, output); 362 } 363 if (target == V6 || target == V4V6) { 364 res |= sendCommand(IP6TABLES_PROCESS, command, output); 365 } 366 return res; 367 } 368 369 int IptablesRestoreController::getIpRestorePid(const IptablesProcessType type) { 370 return type == IPTABLES_PROCESS ? mIpRestore->pid : mIp6Restore->pid; 371 } 372