Home | History | Annotate | Download | only in createns
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "createns"
     18 #include <log/log.h>
     19 
     20 #include <errno.h>
     21 #include <fcntl.h>
     22 #include <inttypes.h>
     23 #include <sched.h>
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #include <sys/mount.h>
     27 #include <sys/types.h>
     28 #include <sys/stat.h>
     29 #include <unistd.h>
     30 
     31 #include <limits>
     32 #include <string>
     33 #include <vector>
     34 
     35 static const char kNamespacePath[] = "/data/vendor/var/run/netns/";
     36 static const char kProcNsNet[] = "/proc/self/ns/net";
     37 
     38 class Fd {
     39 public:
     40     explicit Fd(int fd) : mFd(fd) { }
     41     Fd(const Fd&) = delete;
     42     ~Fd() {
     43         if (mFd != -1) {
     44             ::close(mFd);
     45             mFd = -1;
     46         }
     47     }
     48 
     49     int get() const { return mFd; }
     50     Fd& operator=(const Fd&) = delete;
     51 private:
     52     int mFd;
     53 };
     54 
     55 static void usage(const char* program) {
     56     ALOGE("%s <namespace>", program);
     57 }
     58 
     59 static bool removeFile(const char* file) {
     60     if (::unlink(file) == -1) {
     61         ALOGE("Failed to unlink file '%s': %s", file, strerror(errno));
     62         return false;
     63     }
     64     return true;
     65 }
     66 
     67 static std::string getNamespacePath(const char* name) {
     68     size_t len = strlen(name);
     69     if (len == 0) {
     70         ALOGE("Must provide a namespace argument that is not empty");
     71         return std::string();
     72     }
     73 
     74     if (std::numeric_limits<size_t>::max() - sizeof(kNamespacePath) < len) {
     75         // The argument is so big the resulting string can't fit in size_t
     76         ALOGE("Namespace argument too long");
     77         return std::string();
     78     }
     79 
     80     std::vector<char> nsPath(sizeof(kNamespacePath) + len);
     81     size_t totalSize = strlcpy(nsPath.data(), kNamespacePath, nsPath.size());
     82     if (totalSize >= nsPath.size()) {
     83         // The resulting string had to be concatenated to fit, this is a logic
     84         // error in the code above that determines the size of the data.
     85         ALOGE("Could not create namespace path");
     86         return std::string();
     87     }
     88     totalSize = strlcat(nsPath.data(), name, nsPath.size());
     89     if (totalSize >= nsPath.size()) {
     90         // The resulting string had to be concatenated to fit, this is a logic
     91         // error in the code above that determines the size of the data.
     92         ALOGE("Could not append to namespace path");
     93         return std::string();
     94     }
     95     return nsPath.data();
     96 }
     97 
     98 static bool writeNamespacePid(const char* name, pid_t pid) {
     99     std::string path = getNamespacePath(name);
    100     if (path.empty()) {
    101         return false;
    102     }
    103     path += ".pid";
    104 
    105     Fd fd(::open(path.c_str(),
    106                  O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC,
    107                  S_IRUSR | S_IRGRP | S_IROTH));
    108     if (fd.get() == -1) {
    109         ALOGE("Unable to create file '%s': %s", path.c_str(), strerror(errno));
    110         return false;
    111     }
    112 
    113     // In order to safely print a pid_t we use int64_t with a known format
    114     // specifier. Ensure that a pid_t will fit in a pid_t. According to POSIX
    115     // pid_t is signed.
    116     static_assert(sizeof(pid_t) <= sizeof(int64_t),
    117                   "pid_t is larger than int64_t");
    118     char pidString[32];
    119     int printed = snprintf(pidString,
    120                            sizeof(pidString),
    121                            "%" PRId64,
    122                            static_cast<int64_t>(pid));
    123     if (printed <= 0) {
    124         ALOGE("Unabled to created PID string for writing");
    125         removeFile(path.c_str());
    126         return false;
    127     }
    128 
    129     const char* toPrint = pidString;
    130     int remaining = printed;
    131     for (;;) {
    132         int result = ::write(fd.get(), toPrint, remaining);
    133         if (result < 0) {
    134             if (errno == EINTR) {
    135                 continue;
    136             }
    137             ALOGE("Unable to write pid to file %s: %s",
    138                   path.c_str(), strerror(errno));
    139             removeFile(path.c_str());
    140             return false;
    141         } else if (result < printed) {
    142             remaining -= result;
    143             toPrint += result;
    144         } else {
    145             break;
    146         }
    147     }
    148     return true;
    149 }
    150 
    151 static pid_t daemonize(int fd) {
    152     // This convoluted way of demonizing the process is described in
    153     // man (7) daemon.
    154 
    155     // (1) Close all files, we don't have any open files at this point
    156     // (2) Reset all signal handlers to default, they already are
    157     // (3) Reset the signal mask, we never changed it
    158     // (4) Sanitize environment block, we didn't change the environment
    159     // (5) Call fork
    160     pid_t pid = ::fork();
    161     if (pid != 0) {
    162         // In the parent, nothing more to do
    163         return pid;
    164     }
    165 
    166     // (6) Acquire a new session to detach from terminal
    167     ::setsid();
    168 
    169     // (7) Fork again to avoid the daemon being attached to a terminal again
    170     pid = ::fork();
    171     if (pid != 0) {
    172         // (8) This is the first child, needs to call exit
    173         exit(0);
    174         return pid;
    175     }
    176     // (9) Connect /dev/null to stdin, stdout, stderr
    177     ::close(STDIN_FILENO);
    178     ::close(STDOUT_FILENO);
    179     ::close(STDERR_FILENO);
    180     // Since open will always reuse the lowest available fd and we have closed
    181     // every single fd at this point we can just open them in the correct order.
    182     if (::open("/dev/null", O_RDONLY) == -1) {
    183         ALOGE("Unable to open /dev/null as stdin");
    184     }
    185     if (::open("/dev/null", O_WRONLY) == -1) {
    186         ALOGE("Unable to open /dev/null as stdout");
    187     }
    188     if (::open("/dev/null", O_WRONLY) == -1) {
    189         ALOGE("Unable to open /dev/null as stderr");
    190     }
    191     // (10) Reset umask to zero
    192     ::umask(0);
    193     // (11) Change directory to root (/)
    194     if (::chdir("/") != 0) {
    195         ALOGE("Failed to set working directory to root: %s", strerror(errno));
    196     }
    197     // (12) Write the pid of the daemon to a file, we're passing this to
    198     // the process that starts the daemon to ensure that the pid file exists
    199     // once that process exits. Atomicity is guaranteed by that write requiring
    200     // that the pid file does not exist to begin with.
    201     pid = ::getpid();
    202     if (::write(fd, &pid, sizeof(pid)) != sizeof(pid)) {
    203         ALOGE("Unable to write pid to pipe: %s", strerror(errno));
    204         ::close(fd);
    205         exit(1);
    206     }
    207     ::close(fd);
    208     // (13) Drop privileges, doing this causes problems for execns when it's
    209     // trying to open the proc/ns/net file of this process so we can't do that.
    210     // (14) Notify the starting process that the daemon is running, this is done
    211     // in step (12) above.
    212     // (15) Exit starting process happens in main where it returns.
    213     return 0;
    214 }
    215 
    216 int main(int argc, char* argv[]) {
    217     if (argc != 2) {
    218         usage(argv[0]);
    219         return 1;
    220     }
    221     int fds[2];
    222     if (::pipe2(fds, O_CLOEXEC) != 0) {
    223         ALOGE("Failed to create pipe: %s", strerror(errno));
    224         return 1;
    225     }
    226     Fd readPipe(fds[0]);
    227     Fd writePipe(fds[1]);
    228 
    229     if (::unshare(CLONE_NEWNET) != 0) {
    230         ALOGE("Failed to create network namespace '%s': %s",
    231               argv[1],
    232               strerror(errno));
    233         return 1;
    234     }
    235 
    236     std::string path = getNamespacePath(argv[1]);
    237     if (path.empty()) {
    238         return 1;
    239     }
    240     {
    241         // Open and then immediately close the fd
    242         Fd fd(::open(path.c_str(), O_CREAT | O_EXCL | O_RDONLY | O_CLOEXEC, 0));
    243         if (fd.get() == -1) {
    244             ALOGE("Failed to open file %s: %s", path.c_str(), strerror(errno));
    245             return 1;
    246         }
    247     }
    248     if (::mount(kProcNsNet, path.c_str(), nullptr, MS_BIND, nullptr) != 0) {
    249         ALOGE("Failed to bind %s to %s: %s",
    250               kProcNsNet,
    251               path.c_str(),
    252               strerror(errno));
    253         // Clean up on failure
    254         removeFile(path.c_str());
    255         return 1;
    256     }
    257 
    258     // At this point we fork. This way we keep a process in the namespace alive
    259     // without this command being blocking. This is valuable because it allows
    260     // us to write the pid to a file before we exit. That way we can guarantee
    261     // that after this command completes there is a pid to be read, there is no
    262     // asynchronous behavior going on.
    263     pid_t pid = daemonize(writePipe.get());
    264     if (pid == 0) {
    265         // In the child
    266         for (;;) {
    267             pause();
    268         }
    269     } else {
    270         // In the parent, read the pid of the daemon from the pipe and write it
    271         // to a file.
    272         pid_t child = 0;
    273         if (::read(readPipe.get(), &child, sizeof(child)) != sizeof(child)) {
    274             ALOGE("Failed to read child PID from pipe: %s", strerror(errno));
    275             return 1;
    276         }
    277         if (!writeNamespacePid(argv[1], child)) {
    278             return 1;
    279         }
    280     }
    281 
    282     return 0;
    283 }
    284 
    285