Home | History | Annotate | Download | only in makeparallel
      1 // Copyright (C) 2015 The Android Open Source Project
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // makeparallel communicates with the GNU make jobserver
     16 // (http://make.mad-scientist.net/papers/jobserver-implementation/)
     17 // in order claim all available jobs, and then passes the number of jobs
     18 // claimed to a subprocess with -j<jobs>.
     19 
     20 #include <errno.h>
     21 #include <fcntl.h>
     22 #include <getopt.h>
     23 #include <poll.h>
     24 #include <signal.h>
     25 #include <stdio.h>
     26 #include <stdlib.h>
     27 #include <string.h>
     28 #include <unistd.h>
     29 #include <sys/resource.h>
     30 #include <sys/time.h>
     31 #include <sys/types.h>
     32 #include <sys/wait.h>
     33 
     34 #include <string>
     35 #include <vector>
     36 
     37 #ifdef __linux__
     38 #include <error.h>
     39 #endif
     40 
     41 #ifdef __APPLE__
     42 #include <err.h>
     43 #define error(code, eval, fmt, ...) errc(eval, code, fmt, ##__VA_ARGS__)
     44 // Darwin does not interrupt syscalls by default.
     45 #define TEMP_FAILURE_RETRY(exp) (exp)
     46 #endif
     47 
     48 // Throw an error if fd is not valid.
     49 static void CheckFd(int fd) {
     50   int ret = fcntl(fd, F_GETFD);
     51   if (ret < 0) {
     52     if (errno == EBADF) {
     53       error(errno, 0, "no jobserver pipe, prefix recipe command with '+'");
     54     } else {
     55       error(errno, errno, "fnctl failed");
     56     }
     57   }
     58 }
     59 
     60 // Extract flags from MAKEFLAGS that need to be propagated to subproccess
     61 static std::vector<std::string> ReadMakeflags() {
     62   std::vector<std::string> args;
     63 
     64   const char* makeflags_env = getenv("MAKEFLAGS");
     65   if (makeflags_env == nullptr) {
     66     return args;
     67   }
     68 
     69   // The MAKEFLAGS format is pretty useless.  The first argument might be empty
     70   // (starts with a leading space), or it might be a set of one-character flags
     71   // merged together with no leading space, or it might be a variable
     72   // definition.
     73 
     74   std::string makeflags = makeflags_env;
     75 
     76   // Split makeflags into individual args on spaces.  Multiple spaces are
     77   // elided, but an initial space will result in a blank arg.
     78   size_t base = 0;
     79   size_t found;
     80   do {
     81     found = makeflags.find_first_of(" ", base);
     82     args.push_back(makeflags.substr(base, found - base));
     83     base = found + 1;
     84   } while (found != makeflags.npos);
     85 
     86   // Drop the first argument if it is empty
     87   while (args.size() > 0 && args[0].size() == 0) {
     88 	  args.erase(args.begin());
     89   }
     90 
     91   // Prepend a - to the first argument if it does not have one and is not a
     92   // variable definition
     93   if (args.size() > 0 && args[0][0] != '-') {
     94     if (args[0].find('=') == makeflags.npos) {
     95       args[0] = '-' + args[0];
     96     }
     97   }
     98 
     99   return args;
    100 }
    101 
    102 static bool ParseMakeflags(std::vector<std::string>& args,
    103     int* in_fd, int* out_fd, bool* parallel, bool* keep_going) {
    104 
    105   std::vector<char*> getopt_argv;
    106   // getopt starts reading at argv[1]
    107   getopt_argv.reserve(args.size() + 1);
    108   getopt_argv.push_back(strdup(""));
    109   for (std::string& v : args) {
    110     getopt_argv.push_back(strdup(v.c_str()));
    111   }
    112 
    113   opterr = 0;
    114   optind = 1;
    115   while (1) {
    116     const static option longopts[] = {
    117         {"jobserver-fds", required_argument, 0, 0},
    118         {0, 0, 0, 0},
    119     };
    120     int longopt_index = 0;
    121 
    122     int c = getopt_long(getopt_argv.size(), getopt_argv.data(), "kj",
    123         longopts, &longopt_index);
    124 
    125     if (c == -1) {
    126       break;
    127     }
    128 
    129     switch (c) {
    130     case 0:
    131       switch (longopt_index) {
    132       case 0:
    133       {
    134         // jobserver-fds
    135         if (sscanf(optarg, "%d,%d", in_fd, out_fd) != 2) {
    136           error(EXIT_FAILURE, 0, "incorrect format for --jobserver-fds: %s", optarg);
    137         }
    138         // TODO: propagate in_fd, out_fd
    139         break;
    140       }
    141       default:
    142         abort();
    143       }
    144       break;
    145     case 'j':
    146       *parallel = true;
    147       break;
    148     case 'k':
    149       *keep_going = true;
    150       break;
    151     case '?':
    152       // ignore unknown arguments
    153       break;
    154     default:
    155       abort();
    156     }
    157   }
    158 
    159   for (char *v : getopt_argv) {
    160     free(v);
    161   }
    162 
    163   return true;
    164 }
    165 
    166 // Read a single byte from fd, with timeout in milliseconds.  Returns true if
    167 // a byte was read, false on timeout.  Throws away the read value.
    168 // Non-reentrant, uses timer and signal handler global state, plus static
    169 // variable to communicate with signal handler.
    170 //
    171 // Uses a SIGALRM timer to fire a signal after timeout_ms that will interrupt
    172 // the read syscall if it hasn't yet completed.  If the timer fires before the
    173 // read the read could block forever, so read from a dup'd fd and close it from
    174 // the signal handler, which will cause the read to return EBADF if it occurs
    175 // after the signal.
    176 // The dup/read/close combo is very similar to the system described to avoid
    177 // a deadlock between SIGCHLD and read at
    178 // http://make.mad-scientist.net/papers/jobserver-implementation/
    179 static bool ReadByteTimeout(int fd, int timeout_ms) {
    180   // global variable to communicate with the signal handler
    181   static int dup_fd = -1;
    182 
    183   // dup the fd so the signal handler can close it without losing the real one
    184   dup_fd = dup(fd);
    185   if (dup_fd < 0) {
    186     error(errno, errno, "dup failed");
    187   }
    188 
    189   // set up a signal handler that closes dup_fd on SIGALRM
    190   struct sigaction action = {};
    191   action.sa_flags = SA_SIGINFO,
    192   action.sa_sigaction = [](int, siginfo_t*, void*) {
    193     close(dup_fd);
    194   };
    195   struct sigaction oldaction = {};
    196   int ret = sigaction(SIGALRM, &action, &oldaction);
    197   if (ret < 0) {
    198     error(errno, errno, "sigaction failed");
    199   }
    200 
    201   // queue a SIGALRM after timeout_ms
    202   const struct itimerval timeout = {{}, {0, timeout_ms * 1000}};
    203   ret = setitimer(ITIMER_REAL, &timeout, NULL);
    204   if (ret < 0) {
    205     error(errno, errno, "setitimer failed");
    206   }
    207 
    208   // start the blocking read
    209   char buf;
    210   int read_ret = read(dup_fd, &buf, 1);
    211   int read_errno = errno;
    212 
    213   // cancel the alarm in case it hasn't fired yet
    214   const struct itimerval cancel = {};
    215   ret = setitimer(ITIMER_REAL, &cancel, NULL);
    216   if (ret < 0) {
    217     error(errno, errno, "reset setitimer failed");
    218   }
    219 
    220   // remove the signal handler
    221   ret = sigaction(SIGALRM, &oldaction, NULL);
    222   if (ret < 0) {
    223     error(errno, errno, "reset sigaction failed");
    224   }
    225 
    226   // clean up the dup'd fd in case the signal never fired
    227   close(dup_fd);
    228   dup_fd = -1;
    229 
    230   if (read_ret == 0) {
    231     error(EXIT_FAILURE, 0, "EOF on jobserver pipe");
    232   } else if (read_ret > 0) {
    233     return true;
    234   } else if (read_errno == EINTR || read_errno == EBADF) {
    235     return false;
    236   } else {
    237     error(read_errno, read_errno, "read failed");
    238   }
    239   abort();
    240 }
    241 
    242 // Measure the size of the jobserver pool by reading from in_fd until it blocks
    243 static int GetJobserverTokens(int in_fd) {
    244   int tokens = 0;
    245   pollfd pollfds[] = {{in_fd, POLLIN, 0}};
    246   int ret;
    247   while ((ret = TEMP_FAILURE_RETRY(poll(pollfds, 1, 0))) != 0) {
    248     if (ret < 0) {
    249       error(errno, errno, "poll failed");
    250     } else if (pollfds[0].revents != POLLIN) {
    251       error(EXIT_FAILURE, 0, "unexpected event %d\n", pollfds[0].revents);
    252     }
    253 
    254     // There is probably a job token in the jobserver pipe.  There is a chance
    255     // another process reads it first, which would cause a blocking read to
    256     // block forever (or until another process put a token back in the pipe).
    257     // The file descriptor can't be set to O_NONBLOCK as that would affect
    258     // all users of the pipe, including the parent make process.
    259     // ReadByteTimeout emulates a non-blocking read on a !O_NONBLOCK socket
    260     // using a SIGALRM that fires after a short timeout.
    261     bool got_token = ReadByteTimeout(in_fd, 10);
    262     if (!got_token) {
    263       // No more tokens
    264       break;
    265     } else {
    266       tokens++;
    267     }
    268   }
    269 
    270   // This process implicitly gets a token, so pool size is measured size + 1
    271   return tokens;
    272 }
    273 
    274 // Return tokens to the jobserver pool.
    275 static void PutJobserverTokens(int out_fd, int tokens) {
    276   // Return all the tokens to the pipe
    277   char buf = '+';
    278   for (int i = 0; i < tokens; i++) {
    279     int ret = TEMP_FAILURE_RETRY(write(out_fd, &buf, 1));
    280     if (ret < 0) {
    281       error(errno, errno, "write failed");
    282     } else if (ret == 0) {
    283       error(EXIT_FAILURE, 0, "EOF on jobserver pipe");
    284     }
    285   }
    286 }
    287 
    288 int main(int argc, char* argv[]) {
    289   int in_fd = -1;
    290   int out_fd = -1;
    291   bool parallel = false;
    292   bool keep_going = false;
    293   bool ninja = false;
    294   int tokens = 0;
    295 
    296   if (argc > 1 && strcmp(argv[1], "--ninja") == 0) {
    297     ninja = true;
    298     argv++;
    299     argc--;
    300   }
    301 
    302   if (argc < 2) {
    303     error(EXIT_FAILURE, 0, "expected command to run");
    304   }
    305 
    306   const char* path = argv[1];
    307   std::vector<char*> args({argv[1]});
    308 
    309   std::vector<std::string> makeflags = ReadMakeflags();
    310   if (ParseMakeflags(makeflags, &in_fd, &out_fd, &parallel, &keep_going)) {
    311     if (in_fd >= 0 && out_fd >= 0) {
    312       CheckFd(in_fd);
    313       CheckFd(out_fd);
    314       fcntl(in_fd, F_SETFD, FD_CLOEXEC);
    315       fcntl(out_fd, F_SETFD, FD_CLOEXEC);
    316       tokens = GetJobserverTokens(in_fd);
    317     }
    318   }
    319 
    320   std::string jarg;
    321   if (parallel) {
    322     if (tokens == 0) {
    323       if (ninja) {
    324         // ninja is parallel by default
    325         jarg = "";
    326       } else {
    327         // make -j with no argument, guess a reasonable parallelism like ninja does
    328         jarg = "-j" + std::to_string(sysconf(_SC_NPROCESSORS_ONLN) + 2);
    329       }
    330     } else {
    331       jarg = "-j" + std::to_string(tokens + 1);
    332     }
    333   }
    334 
    335 
    336   if (ninja) {
    337     if (!parallel) {
    338       // ninja is parallel by default, pass -j1 to disable parallelism if make wasn't parallel
    339       args.push_back(strdup("-j1"));
    340     } else {
    341       if (jarg != "") {
    342         args.push_back(strdup(jarg.c_str()));
    343       }
    344     }
    345     if (keep_going) {
    346       args.push_back(strdup("-k0"));
    347     }
    348   } else {
    349     if (jarg != "") {
    350       args.push_back(strdup(jarg.c_str()));
    351     }
    352   }
    353 
    354   args.insert(args.end(), &argv[2], &argv[argc]);
    355 
    356   args.push_back(nullptr);
    357 
    358   static pid_t pid;
    359 
    360   // Set up signal handlers to forward SIGTERM to child.
    361   // Assume that all other signals are sent to the entire process group,
    362   // and that we'll wait for our child to exit instead of handling them.
    363   struct sigaction action = {};
    364   action.sa_flags = SA_RESTART;
    365   action.sa_handler = [](int signal) {
    366     if (signal == SIGTERM && pid > 0) {
    367       kill(pid, signal);
    368     }
    369   };
    370 
    371   int ret = 0;
    372   if (!ret) ret = sigaction(SIGHUP, &action, NULL);
    373   if (!ret) ret = sigaction(SIGINT, &action, NULL);
    374   if (!ret) ret = sigaction(SIGQUIT, &action, NULL);
    375   if (!ret) ret = sigaction(SIGTERM, &action, NULL);
    376   if (!ret) ret = sigaction(SIGALRM, &action, NULL);
    377   if (ret < 0) {
    378     error(errno, errno, "sigaction failed");
    379   }
    380 
    381   pid = fork();
    382   if (pid < 0) {
    383     error(errno, errno, "fork failed");
    384   } else if (pid == 0) {
    385     // child
    386     unsetenv("MAKEFLAGS");
    387     unsetenv("MAKELEVEL");
    388 
    389     // make 3.81 sets the stack ulimit to unlimited, which may cause problems
    390     // for child processes
    391     struct rlimit rlim{};
    392     if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur == RLIM_INFINITY) {
    393       rlim.rlim_cur = 8*1024*1024;
    394       setrlimit(RLIMIT_STACK, &rlim);
    395     }
    396 
    397     int ret = execvp(path, args.data());
    398     if (ret < 0) {
    399       error(errno, errno, "exec %s failed", path);
    400     }
    401     abort();
    402   }
    403 
    404   // parent
    405 
    406   siginfo_t status = {};
    407   int exit_status = 0;
    408   ret = waitid(P_PID, pid, &status, WEXITED);
    409   if (ret < 0) {
    410     error(errno, errno, "waitpid failed");
    411   } else if (status.si_code == CLD_EXITED) {
    412     exit_status = status.si_status;
    413   } else {
    414     exit_status = -(status.si_status);
    415   }
    416 
    417   if (tokens > 0) {
    418     PutJobserverTokens(out_fd, tokens);
    419   }
    420   exit(exit_status);
    421 }
    422