Home | History | Annotate | Download | only in jni
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "Zygote"
     18 
     19 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
     20 #include <sys/mount.h>
     21 #include <linux/fs.h>
     22 
     23 #include <list>
     24 #include <sstream>
     25 #include <string>
     26 
     27 #include <fcntl.h>
     28 #include <grp.h>
     29 #include <inttypes.h>
     30 #include <malloc.h>
     31 #include <mntent.h>
     32 #include <paths.h>
     33 #include <signal.h>
     34 #include <stdlib.h>
     35 #include <sys/capability.h>
     36 #include <sys/cdefs.h>
     37 #include <sys/personality.h>
     38 #include <sys/prctl.h>
     39 #include <sys/resource.h>
     40 #include <sys/stat.h>
     41 #include <sys/time.h>
     42 #include <sys/types.h>
     43 #include <sys/utsname.h>
     44 #include <sys/wait.h>
     45 #include <unistd.h>
     46 
     47 #include "android-base/logging.h"
     48 #include <android-base/file.h>
     49 #include <android-base/stringprintf.h>
     50 #include <cutils/fs.h>
     51 #include <cutils/multiuser.h>
     52 #include <cutils/sched_policy.h>
     53 #include <private/android_filesystem_config.h>
     54 #include <utils/String8.h>
     55 #include <selinux/android.h>
     56 #include <seccomp_policy.h>
     57 #include <processgroup/processgroup.h>
     58 
     59 #include "core_jni_helpers.h"
     60 #include <nativehelper/JNIHelp.h>
     61 #include <nativehelper/ScopedLocalRef.h>
     62 #include <nativehelper/ScopedPrimitiveArray.h>
     63 #include <nativehelper/ScopedUtfChars.h>
     64 #include "fd_utils.h"
     65 
     66 #include "nativebridge/native_bridge.h"
     67 
     68 namespace {
     69 
     70 using android::String8;
     71 using android::base::StringPrintf;
     72 using android::base::WriteStringToFile;
     73 
     74 #define CREATE_ERROR(...) StringPrintf("%s:%d: ", __FILE__, __LINE__). \
     75                               append(StringPrintf(__VA_ARGS__))
     76 
     77 static pid_t gSystemServerPid = 0;
     78 
     79 static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
     80 static jclass gZygoteClass;
     81 static jmethodID gCallPostForkChildHooks;
     82 
     83 static bool g_is_security_enforced = true;
     84 
     85 // Must match values in com.android.internal.os.Zygote.
     86 enum MountExternalKind {
     87   MOUNT_EXTERNAL_NONE = 0,
     88   MOUNT_EXTERNAL_DEFAULT = 1,
     89   MOUNT_EXTERNAL_READ = 2,
     90   MOUNT_EXTERNAL_WRITE = 3,
     91 };
     92 
     93 static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
     94   std::ostringstream oss;
     95   oss << __FILE__ << ":" << line << ": " << msg;
     96   env->FatalError(oss.str().c_str());
     97 }
     98 
     99 // This signal handler is for zygote mode, since the zygote must reap its children
    100 static void SigChldHandler(int /*signal_number*/) {
    101   pid_t pid;
    102   int status;
    103 
    104   // It's necessary to save and restore the errno during this function.
    105   // Since errno is stored per thread, changing it here modifies the errno
    106   // on the thread on which this signal handler executes. If a signal occurs
    107   // between a call and an errno check, it's possible to get the errno set
    108   // here.
    109   // See b/23572286 for extra information.
    110   int saved_errno = errno;
    111 
    112   while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
    113      // Log process-death status that we care about.  In general it is
    114      // not safe to call LOG(...) from a signal handler because of
    115      // possible reentrancy.  However, we know a priori that the
    116      // current implementation of LOG() is safe to call from a SIGCHLD
    117      // handler in the zygote process.  If the LOG() implementation
    118      // changes its locking strategy or its use of syscalls within the
    119      // lazy-init critical section, its use here may become unsafe.
    120     if (WIFEXITED(status)) {
    121       ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
    122     } else if (WIFSIGNALED(status)) {
    123       ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
    124       if (WCOREDUMP(status)) {
    125         ALOGI("Process %d dumped core.", pid);
    126       }
    127     }
    128 
    129     // If the just-crashed process is the system_server, bring down zygote
    130     // so that it is restarted by init and system server will be restarted
    131     // from there.
    132     if (pid == gSystemServerPid) {
    133       ALOGE("Exit zygote because system server (%d) has terminated", pid);
    134       kill(getpid(), SIGKILL);
    135     }
    136   }
    137 
    138   // Note that we shouldn't consider ECHILD an error because
    139   // the secondary zygote might have no children left to wait for.
    140   if (pid < 0 && errno != ECHILD) {
    141     ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
    142   }
    143 
    144   errno = saved_errno;
    145 }
    146 
    147 // Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
    148 // configured very late, because earlier in the runtime we may fork() and
    149 // exec() other processes, and we want to waitpid() for those rather than
    150 // have them be harvested immediately.
    151 //
    152 // Ignore SIGHUP because all processes forked by the zygote are in the same
    153 // process group as the zygote and we don't want to be notified if we become
    154 // an orphaned group and have one or more stopped processes. This is not a
    155 // theoretical concern :
    156 // - we can become an orphaned group if one of our direct descendants forks
    157 //   and is subsequently killed before its children.
    158 // - crash_dump routinely STOPs the process it's tracing.
    159 //
    160 // See issues b/71965619 and b/25567761 for further details.
    161 //
    162 // This ends up being called repeatedly before each fork(), but there's
    163 // no real harm in that.
    164 static void SetSignalHandlers() {
    165   struct sigaction sig_chld = {};
    166   sig_chld.sa_handler = SigChldHandler;
    167 
    168   if (sigaction(SIGCHLD, &sig_chld, NULL) < 0) {
    169     ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
    170   }
    171 
    172   struct sigaction sig_hup = {};
    173   sig_hup.sa_handler = SIG_IGN;
    174   if (sigaction(SIGHUP, &sig_hup, NULL) < 0) {
    175     ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
    176   }
    177 }
    178 
    179 // Sets the SIGCHLD handler back to default behavior in zygote children.
    180 static void UnsetChldSignalHandler() {
    181   struct sigaction sa;
    182   memset(&sa, 0, sizeof(sa));
    183   sa.sa_handler = SIG_DFL;
    184 
    185   if (sigaction(SIGCHLD, &sa, NULL) < 0) {
    186     ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
    187   }
    188 }
    189 
    190 // Calls POSIX setgroups() using the int[] object as an argument.
    191 // A NULL argument is tolerated.
    192 static bool SetGids(JNIEnv* env, jintArray javaGids, std::string* error_msg) {
    193   if (javaGids == NULL) {
    194     return true;
    195   }
    196 
    197   ScopedIntArrayRO gids(env, javaGids);
    198   if (gids.get() == NULL) {
    199     *error_msg = CREATE_ERROR("Getting gids int array failed");
    200     return false;
    201   }
    202   int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
    203   if (rc == -1) {
    204     *error_msg = CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size());
    205     return false;
    206   }
    207 
    208   return true;
    209 }
    210 
    211 // Sets the resource limits via setrlimit(2) for the values in the
    212 // two-dimensional array of integers that's passed in. The second dimension
    213 // contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
    214 // treated as an empty array.
    215 static bool SetRLimits(JNIEnv* env, jobjectArray javaRlimits, std::string* error_msg) {
    216   if (javaRlimits == NULL) {
    217     return true;
    218   }
    219 
    220   rlimit rlim;
    221   memset(&rlim, 0, sizeof(rlim));
    222 
    223   for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
    224     ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
    225     ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
    226     if (javaRlimit.size() != 3) {
    227       *error_msg = CREATE_ERROR("rlimits array must have a second dimension of size 3");
    228       return false;
    229     }
    230 
    231     rlim.rlim_cur = javaRlimit[1];
    232     rlim.rlim_max = javaRlimit[2];
    233 
    234     int rc = setrlimit(javaRlimit[0], &rlim);
    235     if (rc == -1) {
    236       *error_msg = CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed", javaRlimit[0], rlim.rlim_cur,
    237             rlim.rlim_max);
    238       return false;
    239     }
    240   }
    241 
    242   return true;
    243 }
    244 
    245 // The debug malloc library needs to know whether it's the zygote or a child.
    246 extern "C" int gMallocLeakZygoteChild;
    247 
    248 static void PreApplicationInit() {
    249   // The child process sets this to indicate it's not the zygote.
    250   gMallocLeakZygoteChild = 1;
    251 
    252   // Set the jemalloc decay time to 1.
    253   mallopt(M_DECAY_TIME, 1);
    254 }
    255 
    256 static void SetUpSeccompFilter(uid_t uid) {
    257   if (!g_is_security_enforced) {
    258     ALOGI("seccomp disabled by setenforce 0");
    259     return;
    260   }
    261 
    262   // Apply system or app filter based on uid.
    263   if (uid >= AID_APP_START) {
    264     set_app_seccomp_filter();
    265   } else {
    266     set_system_seccomp_filter();
    267   }
    268 }
    269 
    270 static bool EnableKeepCapabilities(std::string* error_msg) {
    271   int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
    272   if (rc == -1) {
    273     *error_msg = CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno));
    274     return false;
    275   }
    276   return true;
    277 }
    278 
    279 static bool DropCapabilitiesBoundingSet(std::string* error_msg) {
    280   for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
    281     int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
    282     if (rc == -1) {
    283       if (errno == EINVAL) {
    284         ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
    285               "your kernel is compiled with file capabilities support");
    286       } else {
    287         *error_msg = CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno));
    288         return false;
    289       }
    290     }
    291   }
    292   return true;
    293 }
    294 
    295 static bool SetInheritable(uint64_t inheritable, std::string* error_msg) {
    296   __user_cap_header_struct capheader;
    297   memset(&capheader, 0, sizeof(capheader));
    298   capheader.version = _LINUX_CAPABILITY_VERSION_3;
    299   capheader.pid = 0;
    300 
    301   __user_cap_data_struct capdata[2];
    302   if (capget(&capheader, &capdata[0]) == -1) {
    303     *error_msg = CREATE_ERROR("capget failed: %s", strerror(errno));
    304     return false;
    305   }
    306 
    307   capdata[0].inheritable = inheritable;
    308   capdata[1].inheritable = inheritable >> 32;
    309 
    310   if (capset(&capheader, &capdata[0]) == -1) {
    311     *error_msg = CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno));
    312     return false;
    313   }
    314 
    315   return true;
    316 }
    317 
    318 static bool SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable,
    319                             std::string* error_msg) {
    320   __user_cap_header_struct capheader;
    321   memset(&capheader, 0, sizeof(capheader));
    322   capheader.version = _LINUX_CAPABILITY_VERSION_3;
    323   capheader.pid = 0;
    324 
    325   __user_cap_data_struct capdata[2];
    326   memset(&capdata, 0, sizeof(capdata));
    327   capdata[0].effective = effective;
    328   capdata[1].effective = effective >> 32;
    329   capdata[0].permitted = permitted;
    330   capdata[1].permitted = permitted >> 32;
    331   capdata[0].inheritable = inheritable;
    332   capdata[1].inheritable = inheritable >> 32;
    333 
    334   if (capset(&capheader, &capdata[0]) == -1) {
    335     *error_msg = CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") "
    336                               "failed: %s", permitted, effective, inheritable, strerror(errno));
    337     return false;
    338   }
    339   return true;
    340 }
    341 
    342 static bool SetSchedulerPolicy(std::string* error_msg) {
    343   errno = -set_sched_policy(0, SP_DEFAULT);
    344   if (errno != 0) {
    345     *error_msg = CREATE_ERROR("set_sched_policy(0, SP_DEFAULT) failed: %s", strerror(errno));
    346     return false;
    347   }
    348   return true;
    349 }
    350 
    351 static int UnmountTree(const char* path) {
    352     size_t path_len = strlen(path);
    353 
    354     FILE* fp = setmntent("/proc/mounts", "r");
    355     if (fp == NULL) {
    356         ALOGE("Error opening /proc/mounts: %s", strerror(errno));
    357         return -errno;
    358     }
    359 
    360     // Some volumes can be stacked on each other, so force unmount in
    361     // reverse order to give us the best chance of success.
    362     std::list<std::string> toUnmount;
    363     mntent* mentry;
    364     while ((mentry = getmntent(fp)) != NULL) {
    365         if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
    366             toUnmount.push_front(std::string(mentry->mnt_dir));
    367         }
    368     }
    369     endmntent(fp);
    370 
    371     for (auto path : toUnmount) {
    372         if (umount2(path.c_str(), MNT_DETACH)) {
    373             ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
    374         }
    375     }
    376     return 0;
    377 }
    378 
    379 // Create a private mount namespace and bind mount appropriate emulated
    380 // storage for the given user.
    381 static bool MountEmulatedStorage(uid_t uid, jint mount_mode,
    382         bool force_mount_namespace, std::string* error_msg) {
    383     // See storage config details at http://source.android.com/tech/storage/
    384 
    385     String8 storageSource;
    386     if (mount_mode == MOUNT_EXTERNAL_DEFAULT) {
    387         storageSource = "/mnt/runtime/default";
    388     } else if (mount_mode == MOUNT_EXTERNAL_READ) {
    389         storageSource = "/mnt/runtime/read";
    390     } else if (mount_mode == MOUNT_EXTERNAL_WRITE) {
    391         storageSource = "/mnt/runtime/write";
    392     } else if (!force_mount_namespace) {
    393         // Sane default of no storage visible
    394         return true;
    395     }
    396 
    397     // Create a second private mount namespace for our process
    398     if (unshare(CLONE_NEWNS) == -1) {
    399         *error_msg = CREATE_ERROR("Failed to unshare(): %s", strerror(errno));
    400         return false;
    401     }
    402 
    403     // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
    404     if (mount_mode == MOUNT_EXTERNAL_NONE) {
    405         return true;
    406     }
    407 
    408     if (TEMP_FAILURE_RETRY(mount(storageSource.string(), "/storage",
    409             NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
    410         *error_msg = CREATE_ERROR("Failed to mount %s to /storage: %s",
    411                                   storageSource.string(),
    412                                   strerror(errno));
    413         return false;
    414     }
    415 
    416     // Mount user-specific symlink helper into place
    417     userid_t user_id = multiuser_get_user_id(uid);
    418     const String8 userSource(String8::format("/mnt/user/%d", user_id));
    419     if (fs_prepare_dir(userSource.string(), 0751, 0, 0) == -1) {
    420         *error_msg = CREATE_ERROR("fs_prepare_dir failed on %s", userSource.string());
    421         return false;
    422     }
    423     if (TEMP_FAILURE_RETRY(mount(userSource.string(), "/storage/self",
    424             NULL, MS_BIND, NULL)) == -1) {
    425         *error_msg = CREATE_ERROR("Failed to mount %s to /storage/self: %s",
    426                                   userSource.string(),
    427                                   strerror(errno));
    428         return false;
    429     }
    430 
    431     return true;
    432 }
    433 
    434 static bool NeedsNoRandomizeWorkaround() {
    435 #if !defined(__arm__)
    436     return false;
    437 #else
    438     int major;
    439     int minor;
    440     struct utsname uts;
    441     if (uname(&uts) == -1) {
    442         return false;
    443     }
    444 
    445     if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
    446         return false;
    447     }
    448 
    449     // Kernels before 3.4.* need the workaround.
    450     return (major < 3) || ((major == 3) && (minor < 4));
    451 #endif
    452 }
    453 
    454 // Utility to close down the Zygote socket file descriptors while
    455 // the child is still running as root with Zygote's privileges.  Each
    456 // descriptor (if any) is closed via dup2(), replacing it with a valid
    457 // (open) descriptor to /dev/null.
    458 
    459 static bool DetachDescriptors(JNIEnv* env, jintArray fdsToClose, std::string* error_msg) {
    460   if (!fdsToClose) {
    461     return true;
    462   }
    463   jsize count = env->GetArrayLength(fdsToClose);
    464   ScopedIntArrayRO ar(env, fdsToClose);
    465   if (ar.get() == NULL) {
    466     *error_msg = "Bad fd array";
    467     return false;
    468   }
    469   jsize i;
    470   int devnull;
    471   for (i = 0; i < count; i++) {
    472     devnull = open("/dev/null", O_RDWR);
    473     if (devnull < 0) {
    474       *error_msg = std::string("Failed to open /dev/null: ").append(strerror(errno));
    475       return false;
    476     }
    477     ALOGV("Switching descriptor %d to /dev/null: %s", ar[i], strerror(errno));
    478     if (dup2(devnull, ar[i]) < 0) {
    479       *error_msg = StringPrintf("Failed dup2() on descriptor %d: %s", ar[i], strerror(errno));
    480       return false;
    481     }
    482     close(devnull);
    483   }
    484   return true;
    485 }
    486 
    487 void SetThreadName(const char* thread_name) {
    488   bool hasAt = false;
    489   bool hasDot = false;
    490   const char* s = thread_name;
    491   while (*s) {
    492     if (*s == '.') {
    493       hasDot = true;
    494     } else if (*s == '@') {
    495       hasAt = true;
    496     }
    497     s++;
    498   }
    499   const int len = s - thread_name;
    500   if (len < 15 || hasAt || !hasDot) {
    501     s = thread_name;
    502   } else {
    503     s = thread_name + len - 15;
    504   }
    505   // pthread_setname_np fails rather than truncating long strings.
    506   char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
    507   strlcpy(buf, s, sizeof(buf)-1);
    508   errno = pthread_setname_np(pthread_self(), buf);
    509   if (errno != 0) {
    510     ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
    511   }
    512   // Update base::logging default tag.
    513   android::base::SetDefaultTag(buf);
    514 }
    515 
    516 // The list of open zygote file descriptors.
    517 static FileDescriptorTable* gOpenFdTable = NULL;
    518 
    519 static bool FillFileDescriptorVector(JNIEnv* env,
    520                                      jintArray java_fds,
    521                                      std::vector<int>* fds,
    522                                      std::string* error_msg) {
    523   CHECK(fds != nullptr);
    524   if (java_fds != nullptr) {
    525     ScopedIntArrayRO ar(env, java_fds);
    526     if (ar.get() == nullptr) {
    527       *error_msg = "Bad fd array";
    528       return false;
    529     }
    530     fds->reserve(ar.size());
    531     for (size_t i = 0; i < ar.size(); ++i) {
    532       fds->push_back(ar[i]);
    533     }
    534   }
    535   return true;
    536 }
    537 
    538 // Utility routine to fork zygote and specialize the child process.
    539 static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
    540                                      jint runtime_flags, jobjectArray javaRlimits,
    541                                      jlong permittedCapabilities, jlong effectiveCapabilities,
    542                                      jint mount_external,
    543                                      jstring java_se_info, jstring java_se_name,
    544                                      bool is_system_server, jintArray fdsToClose,
    545                                      jintArray fdsToIgnore, bool is_child_zygote,
    546                                      jstring instructionSet, jstring dataDir) {
    547   SetSignalHandlers();
    548 
    549   sigset_t sigchld;
    550   sigemptyset(&sigchld);
    551   sigaddset(&sigchld, SIGCHLD);
    552 
    553   auto fail_fn = [env, java_se_name, is_system_server](const std::string& msg)
    554       __attribute__ ((noreturn)) {
    555     const char* se_name_c_str = nullptr;
    556     std::unique_ptr<ScopedUtfChars> se_name;
    557     if (java_se_name != nullptr) {
    558       se_name.reset(new ScopedUtfChars(env, java_se_name));
    559       se_name_c_str = se_name->c_str();
    560     }
    561     if (se_name_c_str == nullptr && is_system_server) {
    562       se_name_c_str = "system_server";
    563     }
    564     const std::string& error_msg = (se_name_c_str == nullptr)
    565         ? msg
    566         : StringPrintf("(%s) %s", se_name_c_str, msg.c_str());
    567     env->FatalError(error_msg.c_str());
    568     __builtin_unreachable();
    569   };
    570 
    571   // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
    572   // log, which would result in the logging FDs we close being reopened.
    573   // This would cause failures because the FDs are not whitelisted.
    574   //
    575   // Note that the zygote process is single threaded at this point.
    576   if (sigprocmask(SIG_BLOCK, &sigchld, nullptr) == -1) {
    577     fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
    578   }
    579 
    580   // Close any logging related FDs before we start evaluating the list of
    581   // file descriptors.
    582   __android_log_close();
    583 
    584   std::string error_msg;
    585 
    586   // If this is the first fork for this zygote, create the open FD table.
    587   // If it isn't, we just need to check whether the list of open files has
    588   // changed (and it shouldn't in the normal case).
    589   std::vector<int> fds_to_ignore;
    590   if (!FillFileDescriptorVector(env, fdsToIgnore, &fds_to_ignore, &error_msg)) {
    591     fail_fn(error_msg);
    592   }
    593   if (gOpenFdTable == NULL) {
    594     gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, &error_msg);
    595     if (gOpenFdTable == NULL) {
    596       fail_fn(error_msg);
    597     }
    598   } else if (!gOpenFdTable->Restat(fds_to_ignore, &error_msg)) {
    599     fail_fn(error_msg);
    600   }
    601 
    602   pid_t pid = fork();
    603 
    604   if (pid == 0) {
    605     PreApplicationInit();
    606 
    607     // Clean up any descriptors which must be closed immediately
    608     if (!DetachDescriptors(env, fdsToClose, &error_msg)) {
    609       fail_fn(error_msg);
    610     }
    611 
    612     // Re-open all remaining open file descriptors so that they aren't shared
    613     // with the zygote across a fork.
    614     if (!gOpenFdTable->ReopenOrDetach(&error_msg)) {
    615       fail_fn(error_msg);
    616     }
    617 
    618     if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
    619       fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
    620     }
    621 
    622     // Keep capabilities across UID change, unless we're staying root.
    623     if (uid != 0) {
    624       if (!EnableKeepCapabilities(&error_msg)) {
    625         fail_fn(error_msg);
    626       }
    627     }
    628 
    629     if (!SetInheritable(permittedCapabilities, &error_msg)) {
    630       fail_fn(error_msg);
    631     }
    632     if (!DropCapabilitiesBoundingSet(&error_msg)) {
    633       fail_fn(error_msg);
    634     }
    635 
    636     bool use_native_bridge = !is_system_server && (instructionSet != NULL)
    637         && android::NativeBridgeAvailable();
    638     if (use_native_bridge) {
    639       ScopedUtfChars isa_string(env, instructionSet);
    640       use_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
    641     }
    642     if (use_native_bridge && dataDir == NULL) {
    643       // dataDir should never be null if we need to use a native bridge.
    644       // In general, dataDir will never be null for normal applications. It can only happen in
    645       // special cases (for isolated processes which are not associated with any app). These are
    646       // launched by the framework and should not be emulated anyway.
    647       use_native_bridge = false;
    648       ALOGW("Native bridge will not be used because dataDir == NULL.");
    649     }
    650 
    651     if (!MountEmulatedStorage(uid, mount_external, use_native_bridge, &error_msg)) {
    652       ALOGW("Failed to mount emulated storage: %s (%s)", error_msg.c_str(), strerror(errno));
    653       if (errno == ENOTCONN || errno == EROFS) {
    654         // When device is actively encrypting, we get ENOTCONN here
    655         // since FUSE was mounted before the framework restarted.
    656         // When encrypted device is booting, we get EROFS since
    657         // FUSE hasn't been created yet by init.
    658         // In either case, continue without external storage.
    659       } else {
    660         fail_fn(error_msg);
    661       }
    662     }
    663 
    664     // If this zygote isn't root, it won't be able to create a process group,
    665     // since the directory is owned by root.
    666     if (!is_system_server && getuid() == 0) {
    667         int rc = createProcessGroup(uid, getpid());
    668         if (rc != 0) {
    669             if (rc == -EROFS) {
    670                 ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
    671             } else {
    672                 ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
    673             }
    674         }
    675     }
    676 
    677     std::string error_msg;
    678     if (!SetGids(env, javaGids, &error_msg)) {
    679       fail_fn(error_msg);
    680     }
    681 
    682     if (!SetRLimits(env, javaRlimits, &error_msg)) {
    683       fail_fn(error_msg);
    684     }
    685 
    686     if (use_native_bridge) {
    687       ScopedUtfChars isa_string(env, instructionSet);
    688       ScopedUtfChars data_dir(env, dataDir);
    689       android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
    690     }
    691 
    692     int rc = setresgid(gid, gid, gid);
    693     if (rc == -1) {
    694       fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno)));
    695     }
    696 
    697     // Must be called when the new process still has CAP_SYS_ADMIN, in this case, before changing
    698     // uid from 0, which clears capabilities.  The other alternative is to call
    699     // prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that breaks SELinux domain transition (see
    700     // b/71859146).  As the result, privileged syscalls used below still need to be accessible in
    701     // app process.
    702     SetUpSeccompFilter(uid);
    703 
    704     rc = setresuid(uid, uid, uid);
    705     if (rc == -1) {
    706       fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno)));
    707     }
    708 
    709     if (NeedsNoRandomizeWorkaround()) {
    710         // Work around ARM kernel ASLR lossage (http://b/5817320).
    711         int old_personality = personality(0xffffffff);
    712         int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
    713         if (new_personality == -1) {
    714             ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
    715         }
    716     }
    717 
    718     if (!SetCapabilities(permittedCapabilities, effectiveCapabilities, permittedCapabilities,
    719                          &error_msg)) {
    720       fail_fn(error_msg);
    721     }
    722 
    723     if (!SetSchedulerPolicy(&error_msg)) {
    724       fail_fn(error_msg);
    725     }
    726 
    727     const char* se_info_c_str = NULL;
    728     ScopedUtfChars* se_info = NULL;
    729     if (java_se_info != NULL) {
    730         se_info = new ScopedUtfChars(env, java_se_info);
    731         se_info_c_str = se_info->c_str();
    732         if (se_info_c_str == NULL) {
    733           fail_fn("se_info_c_str == NULL");
    734         }
    735     }
    736     const char* se_name_c_str = NULL;
    737     ScopedUtfChars* se_name = NULL;
    738     if (java_se_name != NULL) {
    739         se_name = new ScopedUtfChars(env, java_se_name);
    740         se_name_c_str = se_name->c_str();
    741         if (se_name_c_str == NULL) {
    742           fail_fn("se_name_c_str == NULL");
    743         }
    744     }
    745     rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
    746     if (rc == -1) {
    747       fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
    748             is_system_server, se_info_c_str, se_name_c_str));
    749     }
    750 
    751     // Make it easier to debug audit logs by setting the main thread's name to the
    752     // nice name rather than "app_process".
    753     if (se_name_c_str == NULL && is_system_server) {
    754       se_name_c_str = "system_server";
    755     }
    756     if (se_name_c_str != NULL) {
    757       SetThreadName(se_name_c_str);
    758     }
    759 
    760     delete se_info;
    761     delete se_name;
    762 
    763     // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
    764     UnsetChldSignalHandler();
    765 
    766     env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
    767                               is_system_server, is_child_zygote, instructionSet);
    768     if (env->ExceptionCheck()) {
    769       fail_fn("Error calling post fork hooks.");
    770     }
    771   } else if (pid > 0) {
    772     // the parent process
    773 
    774     // We blocked SIGCHLD prior to a fork, we unblock it here.
    775     if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
    776       fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
    777     }
    778   }
    779   return pid;
    780 }
    781 
    782 static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
    783     __user_cap_header_struct capheader;
    784     memset(&capheader, 0, sizeof(capheader));
    785     capheader.version = _LINUX_CAPABILITY_VERSION_3;
    786     capheader.pid = 0;
    787 
    788     __user_cap_data_struct capdata[2];
    789     if (capget(&capheader, &capdata[0]) == -1) {
    790         ALOGE("capget failed: %s", strerror(errno));
    791         RuntimeAbort(env, __LINE__, "capget failed");
    792     }
    793 
    794     return capdata[0].effective |
    795            (static_cast<uint64_t>(capdata[1].effective) << 32);
    796 }
    797 }  // anonymous namespace
    798 
    799 namespace android {
    800 
    801 static void com_android_internal_os_Zygote_nativeSecurityInit(JNIEnv*, jclass) {
    802   // security_getenforce is not allowed on app process. Initialize and cache the value before
    803   // zygote forks.
    804   g_is_security_enforced = security_getenforce();
    805 }
    806 
    807 static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
    808   PreApplicationInit();
    809 }
    810 
    811 static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
    812         JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
    813         jint runtime_flags, jobjectArray rlimits,
    814         jint mount_external, jstring se_info, jstring se_name,
    815         jintArray fdsToClose, jintArray fdsToIgnore, jboolean is_child_zygote,
    816         jstring instructionSet, jstring appDataDir) {
    817     jlong capabilities = 0;
    818 
    819     // Grant CAP_WAKE_ALARM to the Bluetooth process.
    820     // Additionally, allow bluetooth to open packet sockets so it can start the DHCP client.
    821     // Grant CAP_SYS_NICE to allow Bluetooth to set RT priority for
    822     // audio-related threads.
    823     // TODO: consider making such functionality an RPC to netd.
    824     if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
    825       capabilities |= (1LL << CAP_WAKE_ALARM);
    826       capabilities |= (1LL << CAP_NET_RAW);
    827       capabilities |= (1LL << CAP_NET_BIND_SERVICE);
    828       capabilities |= (1LL << CAP_SYS_NICE);
    829     }
    830 
    831     // Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
    832     bool gid_wakelock_found = false;
    833     if (gid == AID_WAKELOCK) {
    834       gid_wakelock_found = true;
    835     } else if (gids != NULL) {
    836       jsize gids_num = env->GetArrayLength(gids);
    837       ScopedIntArrayRO ar(env, gids);
    838       if (ar.get() == NULL) {
    839         RuntimeAbort(env, __LINE__, "Bad gids array");
    840       }
    841       for (int i = 0; i < gids_num; i++) {
    842         if (ar[i] == AID_WAKELOCK) {
    843           gid_wakelock_found = true;
    844           break;
    845         }
    846       }
    847     }
    848     if (gid_wakelock_found) {
    849       capabilities |= (1LL << CAP_BLOCK_SUSPEND);
    850     }
    851 
    852     // If forking a child zygote process, that zygote will need to be able to change
    853     // the UID and GID of processes it forks, as well as drop those capabilities.
    854     if (is_child_zygote) {
    855       capabilities |= (1LL << CAP_SETUID);
    856       capabilities |= (1LL << CAP_SETGID);
    857       capabilities |= (1LL << CAP_SETPCAP);
    858     }
    859 
    860     // Containers run without some capabilities, so drop any caps that are not
    861     // available.
    862     capabilities &= GetEffectiveCapabilityMask(env);
    863 
    864     return ForkAndSpecializeCommon(env, uid, gid, gids, runtime_flags,
    865             rlimits, capabilities, capabilities, mount_external, se_info,
    866             se_name, false, fdsToClose, fdsToIgnore, is_child_zygote == JNI_TRUE,
    867             instructionSet, appDataDir);
    868 }
    869 
    870 static jint com_android_internal_os_Zygote_nativeForkSystemServer(
    871         JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
    872         jint runtime_flags, jobjectArray rlimits, jlong permittedCapabilities,
    873         jlong effectiveCapabilities) {
    874   pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
    875                                       runtime_flags, rlimits,
    876                                       permittedCapabilities, effectiveCapabilities,
    877                                       MOUNT_EXTERNAL_DEFAULT, NULL, NULL, true, NULL,
    878                                       NULL, false, NULL, NULL);
    879   if (pid > 0) {
    880       // The zygote process checks whether the child process has died or not.
    881       ALOGI("System server process %d has been created", pid);
    882       gSystemServerPid = pid;
    883       // There is a slight window that the system server process has crashed
    884       // but it went unnoticed because we haven't published its pid yet. So
    885       // we recheck here just to make sure that all is well.
    886       int status;
    887       if (waitpid(pid, &status, WNOHANG) == pid) {
    888           ALOGE("System server process %d has died. Restarting Zygote!", pid);
    889           RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
    890       }
    891 
    892       // Assign system_server to the correct memory cgroup.
    893       // Not all devices mount /dev/memcg so check for the file first
    894       // to avoid unnecessarily printing errors and denials in the logs.
    895       if (!access("/dev/memcg/system/tasks", F_OK) &&
    896                 !WriteStringToFile(StringPrintf("%d", pid), "/dev/memcg/system/tasks")) {
    897         ALOGE("couldn't write %d to /dev/memcg/system/tasks", pid);
    898       }
    899   }
    900   return pid;
    901 }
    902 
    903 static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
    904         JNIEnv* env, jclass, jstring path) {
    905     ScopedUtfChars path_native(env, path);
    906     const char* path_cstr = path_native.c_str();
    907     if (!path_cstr) {
    908         RuntimeAbort(env, __LINE__, "path_cstr == NULL");
    909     }
    910     FileDescriptorWhitelist::Get()->Allow(path_cstr);
    911 }
    912 
    913 static void com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv* env, jclass) {
    914     // Zygote process unmount root storage space initially before every child processes are forked.
    915     // Every forked child processes (include SystemServer) only mount their own root storage space
    916     // and no need unmount storage operation in MountEmulatedStorage method.
    917     // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
    918 
    919     // See storage config details at http://source.android.com/tech/storage/
    920     // Create private mount namespace shared by all children
    921     if (unshare(CLONE_NEWNS) == -1) {
    922         RuntimeAbort(env, __LINE__, "Failed to unshare()");
    923         return;
    924     }
    925 
    926     // Mark rootfs as being a slave so that changes from default
    927     // namespace only flow into our children.
    928     if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
    929         RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
    930         return;
    931     }
    932 
    933     // Create a staging tmpfs that is shared by our children; they will
    934     // bind mount storage into their respective private namespaces, which
    935     // are isolated from each other.
    936     const char* target_base = getenv("EMULATED_STORAGE_TARGET");
    937     if (target_base != nullptr) {
    938 #define STRINGIFY_UID(x) __STRING(x)
    939         if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
    940                   "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
    941             ALOGE("Failed to mount tmpfs to %s", target_base);
    942             RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
    943             return;
    944         }
    945 #undef STRINGIFY_UID
    946     }
    947 
    948     UnmountTree("/storage");
    949 }
    950 
    951 static const JNINativeMethod gMethods[] = {
    952     { "nativeSecurityInit", "()V",
    953       (void *) com_android_internal_os_Zygote_nativeSecurityInit },
    954     { "nativeForkAndSpecialize",
    955       "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/String;)I",
    956       (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
    957     { "nativeForkSystemServer", "(II[II[[IJJ)I",
    958       (void *) com_android_internal_os_Zygote_nativeForkSystemServer },
    959     { "nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
    960       (void *) com_android_internal_os_Zygote_nativeAllowFileAcrossFork },
    961     { "nativeUnmountStorageOnInit", "()V",
    962       (void *) com_android_internal_os_Zygote_nativeUnmountStorageOnInit },
    963     { "nativePreApplicationInit", "()V",
    964       (void *) com_android_internal_os_Zygote_nativePreApplicationInit }
    965 };
    966 
    967 int register_com_android_internal_os_Zygote(JNIEnv* env) {
    968   gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
    969   gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
    970                                                    "(IZZLjava/lang/String;)V");
    971 
    972   return RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
    973 }
    974 }  // namespace android
    975