1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <errno.h> 18 #include <fcntl.h> 19 #include <signal.h> 20 #include <stdio.h> 21 #include <sys/socket.h> 22 #include <sys/types.h> 23 #include <sys/wait.h> 24 #include <unistd.h> 25 26 #include <base/stringprintf.h> 27 #include <cutils/android_reboot.h> 28 #include <cutils/list.h> 29 #include <cutils/sockets.h> 30 31 #include "init.h" 32 #include "log.h" 33 #include "util.h" 34 35 #define CRITICAL_CRASH_THRESHOLD 4 /* if we crash >4 times ... */ 36 #define CRITICAL_CRASH_WINDOW (4*60) /* ... in 4 minutes, goto recovery */ 37 38 static int signal_write_fd = -1; 39 static int signal_read_fd = -1; 40 41 static std::string DescribeStatus(int status) { 42 if (WIFEXITED(status)) { 43 return android::base::StringPrintf("exited with status %d", WEXITSTATUS(status)); 44 } else if (WIFSIGNALED(status)) { 45 return android::base::StringPrintf("killed by signal %d", WTERMSIG(status)); 46 } else if (WIFSTOPPED(status)) { 47 return android::base::StringPrintf("stopped by signal %d", WSTOPSIG(status)); 48 } else { 49 return "state changed"; 50 } 51 } 52 53 static bool wait_for_one_process() { 54 int status; 55 pid_t pid = TEMP_FAILURE_RETRY(waitpid(-1, &status, WNOHANG)); 56 if (pid == 0) { 57 return false; 58 } else if (pid == -1) { 59 ERROR("waitpid failed: %s\n", strerror(errno)); 60 return false; 61 } 62 63 service* svc = service_find_by_pid(pid); 64 65 std::string name; 66 if (svc) { 67 name = android::base::StringPrintf("Service '%s' (pid %d)", svc->name, pid); 68 } else { 69 name = android::base::StringPrintf("Untracked pid %d", pid); 70 } 71 72 NOTICE("%s %s\n", name.c_str(), DescribeStatus(status).c_str()); 73 74 if (!svc) { 75 return true; 76 } 77 78 // TODO: all the code from here down should be a member function on service. 79 80 if (!(svc->flags & SVC_ONESHOT) || (svc->flags & SVC_RESTART)) { 81 NOTICE("Service '%s' (pid %d) killing any children in process group\n", svc->name, pid); 82 kill(-pid, SIGKILL); 83 } 84 85 // Remove any sockets we may have created. 86 for (socketinfo* si = svc->sockets; si; si = si->next) { 87 char tmp[128]; 88 snprintf(tmp, sizeof(tmp), ANDROID_SOCKET_DIR"/%s", si->name); 89 unlink(tmp); 90 } 91 92 if (svc->flags & SVC_EXEC) { 93 INFO("SVC_EXEC pid %d finished...\n", svc->pid); 94 waiting_for_exec = false; 95 list_remove(&svc->slist); 96 free(svc->name); 97 free(svc); 98 return true; 99 } 100 101 svc->pid = 0; 102 svc->flags &= (~SVC_RUNNING); 103 104 // Oneshot processes go into the disabled state on exit, 105 // except when manually restarted. 106 if ((svc->flags & SVC_ONESHOT) && !(svc->flags & SVC_RESTART)) { 107 svc->flags |= SVC_DISABLED; 108 } 109 110 // Disabled and reset processes do not get restarted automatically. 111 if (svc->flags & (SVC_DISABLED | SVC_RESET)) { 112 svc->NotifyStateChange("stopped"); 113 return true; 114 } 115 116 time_t now = gettime(); 117 if ((svc->flags & SVC_CRITICAL) && !(svc->flags & SVC_RESTART)) { 118 if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) { 119 if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) { 120 ERROR("critical process '%s' exited %d times in %d minutes; " 121 "rebooting into recovery mode\n", svc->name, 122 CRITICAL_CRASH_THRESHOLD, CRITICAL_CRASH_WINDOW / 60); 123 android_reboot(ANDROID_RB_RESTART2, 0, "recovery"); 124 return true; 125 } 126 } else { 127 svc->time_crashed = now; 128 svc->nr_crashed = 1; 129 } 130 } 131 132 svc->flags &= (~SVC_RESTART); 133 svc->flags |= SVC_RESTARTING; 134 135 // Execute all onrestart commands for this service. 136 struct listnode* node; 137 list_for_each(node, &svc->onrestart.commands) { 138 command* cmd = node_to_item(node, struct command, clist); 139 cmd->func(cmd->nargs, cmd->args); 140 } 141 svc->NotifyStateChange("restarting"); 142 return true; 143 } 144 145 static void reap_any_outstanding_children() { 146 while (wait_for_one_process()) { 147 } 148 } 149 150 static void handle_signal() { 151 // Clear outstanding requests. 152 char buf[32]; 153 read(signal_read_fd, buf, sizeof(buf)); 154 155 reap_any_outstanding_children(); 156 } 157 158 static void SIGCHLD_handler(int) { 159 if (TEMP_FAILURE_RETRY(write(signal_write_fd, "1", 1)) == -1) { 160 ERROR("write(signal_write_fd) failed: %s\n", strerror(errno)); 161 } 162 } 163 164 void signal_handler_init() { 165 // Create a signalling mechanism for SIGCHLD. 166 int s[2]; 167 if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0, s) == -1) { 168 ERROR("socketpair failed: %s\n", strerror(errno)); 169 exit(1); 170 } 171 172 signal_write_fd = s[0]; 173 signal_read_fd = s[1]; 174 175 // Write to signal_write_fd if we catch SIGCHLD. 176 struct sigaction act; 177 memset(&act, 0, sizeof(act)); 178 act.sa_handler = SIGCHLD_handler; 179 act.sa_flags = SA_NOCLDSTOP; 180 sigaction(SIGCHLD, &act, 0); 181 182 reap_any_outstanding_children(); 183 184 register_epoll_handler(signal_read_fd, handle_signal); 185 } 186