Home | History | Annotate | Download | only in launcher
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/test/launcher/test_launcher.h"
      6 
      7 #if defined(OS_POSIX)
      8 #include <fcntl.h>
      9 #endif
     10 
     11 #include "base/at_exit.h"
     12 #include "base/bind.h"
     13 #include "base/command_line.h"
     14 #include "base/environment.h"
     15 #include "base/file_util.h"
     16 #include "base/files/file_path.h"
     17 #include "base/files/scoped_file.h"
     18 #include "base/format_macros.h"
     19 #include "base/hash.h"
     20 #include "base/lazy_instance.h"
     21 #include "base/logging.h"
     22 #include "base/memory/scoped_ptr.h"
     23 #include "base/message_loop/message_loop.h"
     24 #include "base/process/kill.h"
     25 #include "base/process/launch.h"
     26 #include "base/strings/string_number_conversions.h"
     27 #include "base/strings/string_split.h"
     28 #include "base/strings/string_util.h"
     29 #include "base/strings/stringize_macros.h"
     30 #include "base/strings/stringprintf.h"
     31 #include "base/strings/utf_string_conversions.h"
     32 #include "base/test/launcher/test_results_tracker.h"
     33 #include "base/test/sequenced_worker_pool_owner.h"
     34 #include "base/test/test_switches.h"
     35 #include "base/test/test_timeouts.h"
     36 #include "base/threading/thread_checker.h"
     37 #include "base/time/time.h"
     38 #include "testing/gtest/include/gtest/gtest.h"
     39 
     40 #if defined(OS_MACOSX)
     41 #include "base/mac/scoped_nsautorelease_pool.h"
     42 #endif
     43 
     44 namespace base {
     45 
     46 // Launches a child process using |command_line|. If the child process is still
     47 // running after |timeout|, it is terminated and |*was_timeout| is set to true.
     48 // Returns exit code of the process.
     49 int LaunchChildTestProcessWithOptions(const CommandLine& command_line,
     50                                       const LaunchOptions& options,
     51                                       bool use_job_objects,
     52                                       base::TimeDelta timeout,
     53                                       bool* was_timeout);
     54 
     55 // See https://groups.google.com/a/chromium.org/d/msg/chromium-dev/nkdTP7sstSc/uT3FaE_sgkAJ .
     56 using ::operator<<;
     57 
     58 // The environment variable name for the total number of test shards.
     59 const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
     60 // The environment variable name for the test shard index.
     61 const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
     62 
     63 namespace {
     64 
     65 // Global tag for test runs where the results are incomplete or unreliable
     66 // for any reason, e.g. early exit because of too many broken tests.
     67 const char kUnreliableResultsTag[] = "UNRELIABLE_RESULTS";
     68 
     69 // Maximum time of no output after which we print list of processes still
     70 // running. This deliberately doesn't use TestTimeouts (which is otherwise
     71 // a recommended solution), because they can be increased. This would defeat
     72 // the purpose of this timeout, which is 1) to avoid buildbot "no output for
     73 // X seconds" timeout killing the process 2) help communicate status of
     74 // the test launcher to people looking at the output (no output for a long
     75 // time is mysterious and gives no info about what is happening) 3) help
     76 // debugging in case the process hangs anyway.
     77 const int kOutputTimeoutSeconds = 15;
     78 
     79 // Limit of output snippet lines when printing to stdout.
     80 // Avoids flooding the logs with amount of output that gums up
     81 // the infrastructure.
     82 const size_t kOutputSnippetLinesLimit = 50;
     83 
     84 // Set of live launch test processes with corresponding lock (it is allowed
     85 // for callers to launch processes on different threads).
     86 LazyInstance<std::map<ProcessHandle, CommandLine> > g_live_processes
     87     = LAZY_INSTANCE_INITIALIZER;
     88 LazyInstance<Lock> g_live_processes_lock = LAZY_INSTANCE_INITIALIZER;
     89 
     90 #if defined(OS_POSIX)
     91 // Self-pipe that makes it possible to do complex shutdown handling
     92 // outside of the signal handler.
     93 int g_shutdown_pipe[2] = { -1, -1 };
     94 
     95 void ShutdownPipeSignalHandler(int signal) {
     96   HANDLE_EINTR(write(g_shutdown_pipe[1], "q", 1));
     97 }
     98 
     99 void KillSpawnedTestProcesses() {
    100   // Keep the lock until exiting the process to prevent further processes
    101   // from being spawned.
    102   AutoLock lock(g_live_processes_lock.Get());
    103 
    104   fprintf(stdout,
    105           "Sending SIGTERM to %" PRIuS " child processes... ",
    106           g_live_processes.Get().size());
    107   fflush(stdout);
    108 
    109   for (std::map<ProcessHandle, CommandLine>::iterator i =
    110            g_live_processes.Get().begin();
    111        i != g_live_processes.Get().end();
    112        ++i) {
    113     // Send the signal to entire process group.
    114     kill((-1) * (i->first), SIGTERM);
    115   }
    116 
    117   fprintf(stdout,
    118           "done.\nGiving processes a chance to terminate cleanly... ");
    119   fflush(stdout);
    120 
    121   PlatformThread::Sleep(TimeDelta::FromMilliseconds(500));
    122 
    123   fprintf(stdout, "done.\n");
    124   fflush(stdout);
    125 
    126   fprintf(stdout,
    127           "Sending SIGKILL to %" PRIuS " child processes... ",
    128           g_live_processes.Get().size());
    129   fflush(stdout);
    130 
    131   for (std::map<ProcessHandle, CommandLine>::iterator i =
    132            g_live_processes.Get().begin();
    133        i != g_live_processes.Get().end();
    134        ++i) {
    135     // Send the signal to entire process group.
    136     kill((-1) * (i->first), SIGKILL);
    137   }
    138 
    139   fprintf(stdout, "done.\n");
    140   fflush(stdout);
    141 }
    142 
    143 // I/O watcher for the reading end of the self-pipe above.
    144 // Terminates any launched child processes and exits the process.
    145 class SignalFDWatcher : public MessageLoopForIO::Watcher {
    146  public:
    147   SignalFDWatcher() {
    148   }
    149 
    150   virtual void OnFileCanReadWithoutBlocking(int fd) OVERRIDE {
    151     fprintf(stdout, "\nCaught signal. Killing spawned test processes...\n");
    152     fflush(stdout);
    153 
    154     KillSpawnedTestProcesses();
    155 
    156     // The signal would normally kill the process, so exit now.
    157     exit(1);
    158   }
    159 
    160   virtual void OnFileCanWriteWithoutBlocking(int fd) OVERRIDE {
    161     NOTREACHED();
    162   }
    163 
    164  private:
    165   DISALLOW_COPY_AND_ASSIGN(SignalFDWatcher);
    166 };
    167 #endif  // defined(OS_POSIX)
    168 
    169 // Parses the environment variable var as an Int32.  If it is unset, returns
    170 // true.  If it is set, unsets it then converts it to Int32 before
    171 // returning it in |result|.  Returns true on success.
    172 bool TakeInt32FromEnvironment(const char* const var, int32* result) {
    173   scoped_ptr<Environment> env(Environment::Create());
    174   std::string str_val;
    175 
    176   if (!env->GetVar(var, &str_val))
    177     return true;
    178 
    179   if (!env->UnSetVar(var)) {
    180     LOG(ERROR) << "Invalid environment: we could not unset " << var << ".\n";
    181     return false;
    182   }
    183 
    184   if (!StringToInt(str_val, result)) {
    185     LOG(ERROR) << "Invalid environment: " << var << " is not an integer.\n";
    186     return false;
    187   }
    188 
    189   return true;
    190 }
    191 
    192 // Unsets the environment variable |name| and returns true on success.
    193 // Also returns true if the variable just doesn't exist.
    194 bool UnsetEnvironmentVariableIfExists(const std::string& name) {
    195   scoped_ptr<Environment> env(Environment::Create());
    196   std::string str_val;
    197 
    198   if (!env->GetVar(name.c_str(), &str_val))
    199     return true;
    200 
    201   return env->UnSetVar(name.c_str());
    202 }
    203 
    204 // Returns true if bot mode has been requested, i.e. defaults optimized
    205 // for continuous integration bots. This way developers don't have to remember
    206 // special command-line flags.
    207 bool BotModeEnabled() {
    208   scoped_ptr<Environment> env(Environment::Create());
    209   return CommandLine::ForCurrentProcess()->HasSwitch(
    210       switches::kTestLauncherBotMode) ||
    211       env->HasVar("CHROMIUM_TEST_LAUNCHER_BOT_MODE");
    212 }
    213 
    214 void RunCallback(
    215     const TestLauncher::LaunchChildGTestProcessCallback& callback,
    216     int exit_code,
    217     const TimeDelta& elapsed_time,
    218     bool was_timeout,
    219     const std::string& output) {
    220   callback.Run(exit_code, elapsed_time, was_timeout, output);
    221 }
    222 
    223 void DoLaunchChildTestProcess(
    224     const CommandLine& command_line,
    225     base::TimeDelta timeout,
    226     bool use_job_objects,
    227     bool redirect_stdio,
    228     scoped_refptr<MessageLoopProxy> message_loop_proxy,
    229     const TestLauncher::LaunchChildGTestProcessCallback& callback) {
    230   TimeTicks start_time = TimeTicks::Now();
    231 
    232   // Redirect child process output to a file.
    233   base::FilePath output_file;
    234   CHECK(base::CreateTemporaryFile(&output_file));
    235 
    236   LaunchOptions options;
    237 #if defined(OS_WIN)
    238   win::ScopedHandle handle;
    239 
    240   if (redirect_stdio) {
    241     // Make the file handle inheritable by the child.
    242     SECURITY_ATTRIBUTES sa_attr;
    243     sa_attr.nLength = sizeof(SECURITY_ATTRIBUTES);
    244     sa_attr.lpSecurityDescriptor = NULL;
    245     sa_attr.bInheritHandle = TRUE;
    246 
    247     handle.Set(CreateFile(output_file.value().c_str(),
    248                           GENERIC_WRITE,
    249                           FILE_SHARE_READ | FILE_SHARE_DELETE,
    250                           &sa_attr,
    251                           OPEN_EXISTING,
    252                           FILE_ATTRIBUTE_TEMPORARY,
    253                           NULL));
    254     CHECK(handle.IsValid());
    255     options.inherit_handles = true;
    256     options.stdin_handle = INVALID_HANDLE_VALUE;
    257     options.stdout_handle = handle.Get();
    258     options.stderr_handle = handle.Get();
    259   }
    260 #elif defined(OS_POSIX)
    261   options.new_process_group = true;
    262 
    263   base::FileHandleMappingVector fds_mapping;
    264   base::ScopedFD output_file_fd;
    265 
    266   if (redirect_stdio) {
    267     output_file_fd.reset(open(output_file.value().c_str(), O_RDWR));
    268     CHECK(output_file_fd.is_valid());
    269 
    270     fds_mapping.push_back(std::make_pair(output_file_fd.get(), STDOUT_FILENO));
    271     fds_mapping.push_back(std::make_pair(output_file_fd.get(), STDERR_FILENO));
    272     options.fds_to_remap = &fds_mapping;
    273   }
    274 #endif
    275 
    276   bool was_timeout = false;
    277   int exit_code = LaunchChildTestProcessWithOptions(
    278       command_line, options, use_job_objects, timeout, &was_timeout);
    279 
    280   if (redirect_stdio) {
    281 #if defined(OS_WIN)
    282     FlushFileBuffers(handle.Get());
    283     handle.Close();
    284 #elif defined(OS_POSIX)
    285     output_file_fd.reset();
    286 #endif
    287   }
    288 
    289   std::string output_file_contents;
    290   CHECK(base::ReadFileToString(output_file, &output_file_contents));
    291 
    292   if (!base::DeleteFile(output_file, false)) {
    293     // This needs to be non-fatal at least for Windows.
    294     LOG(WARNING) << "Failed to delete " << output_file.AsUTF8Unsafe();
    295   }
    296 
    297   // Run target callback on the thread it was originating from, not on
    298   // a worker pool thread.
    299   message_loop_proxy->PostTask(
    300       FROM_HERE,
    301       Bind(&RunCallback,
    302            callback,
    303            exit_code,
    304            TimeTicks::Now() - start_time,
    305            was_timeout,
    306            output_file_contents));
    307 }
    308 
    309 }  // namespace
    310 
    311 const char kGTestFilterFlag[] = "gtest_filter";
    312 const char kGTestHelpFlag[]   = "gtest_help";
    313 const char kGTestListTestsFlag[] = "gtest_list_tests";
    314 const char kGTestRepeatFlag[] = "gtest_repeat";
    315 const char kGTestRunDisabledTestsFlag[] = "gtest_also_run_disabled_tests";
    316 const char kGTestOutputFlag[] = "gtest_output";
    317 
    318 TestLauncherDelegate::~TestLauncherDelegate() {
    319 }
    320 
    321 TestLauncher::TestLauncher(TestLauncherDelegate* launcher_delegate,
    322                            size_t parallel_jobs)
    323     : launcher_delegate_(launcher_delegate),
    324       total_shards_(1),
    325       shard_index_(0),
    326       cycles_(1),
    327       test_started_count_(0),
    328       test_finished_count_(0),
    329       test_success_count_(0),
    330       test_broken_count_(0),
    331       retry_count_(0),
    332       retry_limit_(0),
    333       run_result_(true),
    334       watchdog_timer_(FROM_HERE,
    335                       TimeDelta::FromSeconds(kOutputTimeoutSeconds),
    336                       this,
    337                       &TestLauncher::OnOutputTimeout),
    338       parallel_jobs_(parallel_jobs) {
    339   if (BotModeEnabled()) {
    340     fprintf(stdout,
    341             "Enabling defaults optimized for continuous integration bots.\n");
    342     fflush(stdout);
    343 
    344     // Enable test retries by default for bots. This can be still overridden
    345     // from command line using --test-launcher-retry-limit flag.
    346     retry_limit_ = 3;
    347   } else {
    348     // Default to serial test execution if not running on a bot. This makes it
    349     // possible to disable stdio redirection and can still be overridden with
    350     // --test-launcher-jobs flag.
    351     parallel_jobs_ = 1;
    352   }
    353 }
    354 
    355 TestLauncher::~TestLauncher() {
    356   if (worker_pool_owner_)
    357     worker_pool_owner_->pool()->Shutdown();
    358 }
    359 
    360 bool TestLauncher::Run() {
    361   if (!Init())
    362     return false;
    363 
    364   // Value of |cycles_| changes after each iteration. Keep track of the
    365   // original value.
    366   int requested_cycles = cycles_;
    367 
    368 #if defined(OS_POSIX)
    369   CHECK_EQ(0, pipe(g_shutdown_pipe));
    370 
    371   struct sigaction action;
    372   memset(&action, 0, sizeof(action));
    373   sigemptyset(&action.sa_mask);
    374   action.sa_handler = &ShutdownPipeSignalHandler;
    375 
    376   CHECK_EQ(0, sigaction(SIGINT, &action, NULL));
    377   CHECK_EQ(0, sigaction(SIGQUIT, &action, NULL));
    378   CHECK_EQ(0, sigaction(SIGTERM, &action, NULL));
    379 
    380   MessageLoopForIO::FileDescriptorWatcher controller;
    381   SignalFDWatcher watcher;
    382 
    383   CHECK(MessageLoopForIO::current()->WatchFileDescriptor(
    384             g_shutdown_pipe[0],
    385             true,
    386             MessageLoopForIO::WATCH_READ,
    387             &controller,
    388             &watcher));
    389 #endif  // defined(OS_POSIX)
    390 
    391   // Start the watchdog timer.
    392   watchdog_timer_.Reset();
    393 
    394   MessageLoop::current()->PostTask(
    395       FROM_HERE,
    396       Bind(&TestLauncher::RunTestIteration, Unretained(this)));
    397 
    398   MessageLoop::current()->Run();
    399 
    400   if (requested_cycles != 1)
    401     results_tracker_.PrintSummaryOfAllIterations();
    402 
    403   MaybeSaveSummaryAsJSON();
    404 
    405   return run_result_;
    406 }
    407 
    408 void TestLauncher::LaunchChildGTestProcess(
    409     const CommandLine& command_line,
    410     const std::string& wrapper,
    411     base::TimeDelta timeout,
    412     bool use_job_objects,
    413     const LaunchChildGTestProcessCallback& callback) {
    414   DCHECK(thread_checker_.CalledOnValidThread());
    415 
    416   // Record the exact command line used to launch the child.
    417   CommandLine new_command_line(
    418       PrepareCommandLineForGTest(command_line, wrapper));
    419 
    420   // When running in parallel mode we need to redirect stdio to avoid mixed-up
    421   // output. We also always redirect on the bots to get the test output into
    422   // JSON summary.
    423   bool redirect_stdio = (parallel_jobs_ > 1) || BotModeEnabled();
    424 
    425   worker_pool_owner_->pool()->PostWorkerTask(
    426       FROM_HERE,
    427       Bind(&DoLaunchChildTestProcess,
    428            new_command_line,
    429            timeout,
    430            use_job_objects,
    431            redirect_stdio,
    432            MessageLoopProxy::current(),
    433            Bind(&TestLauncher::OnLaunchTestProcessFinished,
    434                 Unretained(this),
    435                 callback)));
    436 }
    437 
    438 void TestLauncher::OnTestFinished(const TestResult& result) {
    439   ++test_finished_count_;
    440 
    441   bool print_snippet = false;
    442   std::string print_test_stdio("auto");
    443   if (CommandLine::ForCurrentProcess()->HasSwitch(
    444           switches::kTestLauncherPrintTestStdio)) {
    445     print_test_stdio = CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
    446         switches::kTestLauncherPrintTestStdio);
    447   }
    448   if (print_test_stdio == "auto") {
    449     print_snippet = (result.status != TestResult::TEST_SUCCESS);
    450   } else if (print_test_stdio == "always") {
    451     print_snippet = true;
    452   } else if (print_test_stdio == "never") {
    453     print_snippet = false;
    454   } else {
    455     LOG(WARNING) << "Invalid value of " << switches::kTestLauncherPrintTestStdio
    456                  << ": " << print_test_stdio;
    457   }
    458   if (print_snippet) {
    459     std::vector<std::string> snippet_lines;
    460     SplitString(result.output_snippet, '\n', &snippet_lines);
    461     if (snippet_lines.size() > kOutputSnippetLinesLimit) {
    462       size_t truncated_size = snippet_lines.size() - kOutputSnippetLinesLimit;
    463       snippet_lines.erase(
    464           snippet_lines.begin(),
    465           snippet_lines.begin() + truncated_size);
    466       snippet_lines.insert(snippet_lines.begin(), "<truncated>");
    467     }
    468     fprintf(stdout, "%s", JoinString(snippet_lines, "\n").c_str());
    469     fflush(stdout);
    470   }
    471 
    472   if (result.status == TestResult::TEST_SUCCESS) {
    473     ++test_success_count_;
    474   } else {
    475     tests_to_retry_.insert(result.full_name);
    476   }
    477 
    478   results_tracker_.AddTestResult(result);
    479 
    480   // TODO(phajdan.jr): Align counter (padding).
    481   std::string status_line(
    482       StringPrintf("[%" PRIuS "/%" PRIuS "] %s ",
    483                    test_finished_count_,
    484                    test_started_count_,
    485                    result.full_name.c_str()));
    486   if (result.completed()) {
    487     status_line.append(StringPrintf("(%" PRId64 " ms)",
    488                                     result.elapsed_time.InMilliseconds()));
    489   } else if (result.status == TestResult::TEST_TIMEOUT) {
    490     status_line.append("(TIMED OUT)");
    491   } else if (result.status == TestResult::TEST_CRASH) {
    492     status_line.append("(CRASHED)");
    493   } else if (result.status == TestResult::TEST_SKIPPED) {
    494     status_line.append("(SKIPPED)");
    495   } else if (result.status == TestResult::TEST_UNKNOWN) {
    496     status_line.append("(UNKNOWN)");
    497   } else {
    498     // Fail very loudly so it's not ignored.
    499     CHECK(false) << "Unhandled test result status: " << result.status;
    500   }
    501   fprintf(stdout, "%s\n", status_line.c_str());
    502   fflush(stdout);
    503 
    504   // We just printed a status line, reset the watchdog timer.
    505   watchdog_timer_.Reset();
    506 
    507   // Do not waste time on timeouts. We include tests with unknown results here
    508   // because sometimes (e.g. hang in between unit tests) that's how a timeout
    509   // gets reported.
    510   if (result.status == TestResult::TEST_TIMEOUT ||
    511       result.status == TestResult::TEST_UNKNOWN) {
    512     test_broken_count_++;
    513   }
    514   size_t broken_threshold =
    515       std::max(static_cast<size_t>(20), test_started_count_ / 10);
    516   if (test_broken_count_ >= broken_threshold) {
    517     fprintf(stdout, "Too many badly broken tests (%" PRIuS "), exiting now.\n",
    518             test_broken_count_);
    519     fflush(stdout);
    520 
    521 #if defined(OS_POSIX)
    522     KillSpawnedTestProcesses();
    523 #endif  // defined(OS_POSIX)
    524 
    525     results_tracker_.AddGlobalTag("BROKEN_TEST_EARLY_EXIT");
    526     results_tracker_.AddGlobalTag(kUnreliableResultsTag);
    527     MaybeSaveSummaryAsJSON();
    528 
    529     exit(1);
    530   }
    531 
    532   if (test_finished_count_ != test_started_count_)
    533     return;
    534 
    535   if (tests_to_retry_.empty() || retry_count_ >= retry_limit_) {
    536     OnTestIterationFinished();
    537     return;
    538   }
    539 
    540   if (tests_to_retry_.size() >= broken_threshold) {
    541     fprintf(stdout,
    542             "Too many failing tests (%" PRIuS "), skipping retries.\n",
    543             tests_to_retry_.size());
    544     fflush(stdout);
    545 
    546     results_tracker_.AddGlobalTag("BROKEN_TEST_SKIPPED_RETRIES");
    547     results_tracker_.AddGlobalTag(kUnreliableResultsTag);
    548 
    549     OnTestIterationFinished();
    550     return;
    551   }
    552 
    553   retry_count_++;
    554 
    555   std::vector<std::string> test_names(tests_to_retry_.begin(),
    556                                       tests_to_retry_.end());
    557 
    558   tests_to_retry_.clear();
    559 
    560   size_t retry_started_count = launcher_delegate_->RetryTests(this, test_names);
    561   if (retry_started_count == 0) {
    562     // Signal failure, but continue to run all requested test iterations.
    563     // With the summary of all iterations at the end this is a good default.
    564     run_result_ = false;
    565 
    566     OnTestIterationFinished();
    567     return;
    568   }
    569 
    570   fprintf(stdout, "Retrying %" PRIuS " test%s (retry #%" PRIuS ")\n",
    571           retry_started_count,
    572           retry_started_count > 1 ? "s" : "",
    573           retry_count_);
    574   fflush(stdout);
    575 
    576   test_started_count_ += retry_started_count;
    577 }
    578 
    579 bool TestLauncher::Init() {
    580   const CommandLine* command_line = CommandLine::ForCurrentProcess();
    581 
    582   // Initialize sharding. Command line takes precedence over legacy environment
    583   // variables.
    584   if (command_line->HasSwitch(switches::kTestLauncherTotalShards) &&
    585       command_line->HasSwitch(switches::kTestLauncherShardIndex)) {
    586     if (!StringToInt(
    587             command_line->GetSwitchValueASCII(
    588                 switches::kTestLauncherTotalShards),
    589             &total_shards_)) {
    590       LOG(ERROR) << "Invalid value for " << switches::kTestLauncherTotalShards;
    591       return false;
    592     }
    593     if (!StringToInt(
    594             command_line->GetSwitchValueASCII(
    595                 switches::kTestLauncherShardIndex),
    596             &shard_index_)) {
    597       LOG(ERROR) << "Invalid value for " << switches::kTestLauncherShardIndex;
    598       return false;
    599     }
    600     fprintf(stdout,
    601             "Using sharding settings from command line. This is shard %d/%d\n",
    602             shard_index_, total_shards_);
    603     fflush(stdout);
    604   } else {
    605     if (!TakeInt32FromEnvironment(kTestTotalShards, &total_shards_))
    606       return false;
    607     if (!TakeInt32FromEnvironment(kTestShardIndex, &shard_index_))
    608       return false;
    609     fprintf(stdout,
    610             "Using sharding settings from environment. This is shard %d/%d\n",
    611             shard_index_, total_shards_);
    612     fflush(stdout);
    613   }
    614   if (shard_index_ < 0 ||
    615       total_shards_ < 0 ||
    616       shard_index_ >= total_shards_) {
    617     LOG(ERROR) << "Invalid sharding settings: we require 0 <= "
    618                << kTestShardIndex << " < " << kTestTotalShards
    619                << ", but you have " << kTestShardIndex << "=" << shard_index_
    620                << ", " << kTestTotalShards << "=" << total_shards_ << ".\n";
    621     return false;
    622   }
    623 
    624   // Make sure we don't pass any sharding-related environment to the child
    625   // processes. This test launcher implements the sharding completely.
    626   CHECK(UnsetEnvironmentVariableIfExists("GTEST_TOTAL_SHARDS"));
    627   CHECK(UnsetEnvironmentVariableIfExists("GTEST_SHARD_INDEX"));
    628 
    629   if (command_line->HasSwitch(kGTestRepeatFlag) &&
    630       !StringToInt(command_line->GetSwitchValueASCII(kGTestRepeatFlag),
    631                    &cycles_)) {
    632     LOG(ERROR) << "Invalid value for " << kGTestRepeatFlag;
    633     return false;
    634   }
    635 
    636   if (CommandLine::ForCurrentProcess()->HasSwitch(
    637           switches::kTestLauncherRetryLimit)) {
    638     int retry_limit = -1;
    639     if (!StringToInt(CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
    640                          switches::kTestLauncherRetryLimit), &retry_limit) ||
    641         retry_limit < 0) {
    642       LOG(ERROR) << "Invalid value for " << switches::kTestLauncherRetryLimit;
    643       return false;
    644     }
    645 
    646     retry_limit_ = retry_limit;
    647   }
    648 
    649   if (CommandLine::ForCurrentProcess()->HasSwitch(
    650           switches::kTestLauncherJobs)) {
    651     int jobs = -1;
    652     if (!StringToInt(CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
    653                          switches::kTestLauncherJobs), &jobs) ||
    654         jobs < 0) {
    655       LOG(ERROR) << "Invalid value for " << switches::kTestLauncherJobs;
    656       return false;
    657     }
    658 
    659     parallel_jobs_ = jobs;
    660   }
    661   fprintf(stdout, "Using %" PRIuS " parallel jobs.\n", parallel_jobs_);
    662   fflush(stdout);
    663   worker_pool_owner_.reset(
    664       new SequencedWorkerPoolOwner(parallel_jobs_, "test_launcher"));
    665 
    666   if (command_line->HasSwitch(switches::kTestLauncherFilterFile) &&
    667       command_line->HasSwitch(kGTestFilterFlag)) {
    668     LOG(ERROR) << "Only one of --test-launcher-filter-file and --gtest_filter "
    669                << "at a time is allowed.";
    670     return false;
    671   }
    672 
    673   if (command_line->HasSwitch(switches::kTestLauncherFilterFile)) {
    674     std::string filter;
    675     if (!ReadFileToString(
    676             command_line->GetSwitchValuePath(switches::kTestLauncherFilterFile),
    677             &filter)) {
    678       LOG(ERROR) << "Failed to read the filter file.";
    679       return false;
    680     }
    681 
    682     std::vector<std::string> filter_lines;
    683     SplitString(filter, '\n', &filter_lines);
    684     for (size_t i = 0; i < filter_lines.size(); i++) {
    685       if (filter_lines[i].empty())
    686         continue;
    687 
    688       if (filter_lines[i][0] == '-')
    689         negative_test_filter_.push_back(filter_lines[i].substr(1));
    690       else
    691         positive_test_filter_.push_back(filter_lines[i]);
    692     }
    693   } else {
    694     // Split --gtest_filter at '-', if there is one, to separate into
    695     // positive filter and negative filter portions.
    696     std::string filter = command_line->GetSwitchValueASCII(kGTestFilterFlag);
    697     size_t dash_pos = filter.find('-');
    698     if (dash_pos == std::string::npos) {
    699       SplitString(filter, ':', &positive_test_filter_);
    700     } else {
    701       // Everything up to the dash.
    702       SplitString(filter.substr(0, dash_pos), ':', &positive_test_filter_);
    703 
    704       // Everything after the dash.
    705       SplitString(filter.substr(dash_pos + 1), ':', &negative_test_filter_);
    706     }
    707   }
    708 
    709   if (!results_tracker_.Init(*command_line)) {
    710     LOG(ERROR) << "Failed to initialize test results tracker.";
    711     return 1;
    712   }
    713 
    714 #if defined(NDEBUG)
    715   results_tracker_.AddGlobalTag("MODE_RELEASE");
    716 #else
    717   results_tracker_.AddGlobalTag("MODE_DEBUG");
    718 #endif
    719 
    720   // Operating systems (sorted alphabetically).
    721   // Note that they can deliberately overlap, e.g. OS_LINUX is a subset
    722   // of OS_POSIX.
    723 #if defined(OS_ANDROID)
    724   results_tracker_.AddGlobalTag("OS_ANDROID");
    725 #endif
    726 
    727 #if defined(OS_BSD)
    728   results_tracker_.AddGlobalTag("OS_BSD");
    729 #endif
    730 
    731 #if defined(OS_FREEBSD)
    732   results_tracker_.AddGlobalTag("OS_FREEBSD");
    733 #endif
    734 
    735 #if defined(OS_IOS)
    736   results_tracker_.AddGlobalTag("OS_IOS");
    737 #endif
    738 
    739 #if defined(OS_LINUX)
    740   results_tracker_.AddGlobalTag("OS_LINUX");
    741 #endif
    742 
    743 #if defined(OS_MACOSX)
    744   results_tracker_.AddGlobalTag("OS_MACOSX");
    745 #endif
    746 
    747 #if defined(OS_NACL)
    748   results_tracker_.AddGlobalTag("OS_NACL");
    749 #endif
    750 
    751 #if defined(OS_OPENBSD)
    752   results_tracker_.AddGlobalTag("OS_OPENBSD");
    753 #endif
    754 
    755 #if defined(OS_POSIX)
    756   results_tracker_.AddGlobalTag("OS_POSIX");
    757 #endif
    758 
    759 #if defined(OS_SOLARIS)
    760   results_tracker_.AddGlobalTag("OS_SOLARIS");
    761 #endif
    762 
    763 #if defined(OS_WIN)
    764   results_tracker_.AddGlobalTag("OS_WIN");
    765 #endif
    766 
    767   // CPU-related tags.
    768 #if defined(ARCH_CPU_32_BITS)
    769   results_tracker_.AddGlobalTag("CPU_32_BITS");
    770 #endif
    771 
    772 #if defined(ARCH_CPU_64_BITS)
    773   results_tracker_.AddGlobalTag("CPU_64_BITS");
    774 #endif
    775 
    776   return true;
    777 }
    778 
    779 void TestLauncher::RunTests() {
    780   testing::UnitTest* const unit_test = testing::UnitTest::GetInstance();
    781 
    782   std::vector<std::string> test_names;
    783 
    784   for (int i = 0; i < unit_test->total_test_case_count(); ++i) {
    785     const testing::TestCase* test_case = unit_test->GetTestCase(i);
    786     for (int j = 0; j < test_case->total_test_count(); ++j) {
    787       const testing::TestInfo* test_info = test_case->GetTestInfo(j);
    788       std::string test_name = test_info->test_case_name();
    789       test_name.append(".");
    790       test_name.append(test_info->name());
    791 
    792       results_tracker_.AddTest(test_name);
    793 
    794       const CommandLine* command_line = CommandLine::ForCurrentProcess();
    795       if (test_name.find("DISABLED") != std::string::npos) {
    796         results_tracker_.AddDisabledTest(test_name);
    797 
    798         // Skip disabled tests unless explicitly requested.
    799         if (!command_line->HasSwitch(kGTestRunDisabledTestsFlag))
    800           continue;
    801       }
    802 
    803       // Skip the test that doesn't match the filter (if given).
    804       if (!positive_test_filter_.empty()) {
    805         bool found = false;
    806         for (size_t k = 0; k < positive_test_filter_.size(); ++k) {
    807           if (MatchPattern(test_name, positive_test_filter_[k])) {
    808             found = true;
    809             break;
    810           }
    811         }
    812 
    813         if (!found)
    814           continue;
    815       }
    816       bool excluded = false;
    817       for (size_t k = 0; k < negative_test_filter_.size(); ++k) {
    818         if (MatchPattern(test_name, negative_test_filter_[k])) {
    819           excluded = true;
    820           break;
    821         }
    822       }
    823       if (excluded)
    824         continue;
    825 
    826       if (!launcher_delegate_->ShouldRunTest(test_case, test_info))
    827         continue;
    828 
    829       if (base::Hash(test_name) % total_shards_ !=
    830           static_cast<uint32>(shard_index_)) {
    831         continue;
    832       }
    833 
    834       test_names.push_back(test_name);
    835     }
    836   }
    837 
    838   test_started_count_ = launcher_delegate_->RunTests(this, test_names);
    839 
    840   if (test_started_count_ == 0) {
    841     fprintf(stdout, "0 tests run\n");
    842     fflush(stdout);
    843 
    844     // No tests have actually been started, so kick off the next iteration.
    845     MessageLoop::current()->PostTask(
    846         FROM_HERE,
    847         Bind(&TestLauncher::RunTestIteration, Unretained(this)));
    848   }
    849 }
    850 
    851 void TestLauncher::RunTestIteration() {
    852   if (cycles_ == 0) {
    853     MessageLoop::current()->Quit();
    854     return;
    855   }
    856 
    857   // Special value "-1" means "repeat indefinitely".
    858   cycles_ = (cycles_ == -1) ? cycles_ : cycles_ - 1;
    859 
    860   test_started_count_ = 0;
    861   test_finished_count_ = 0;
    862   test_success_count_ = 0;
    863   test_broken_count_ = 0;
    864   retry_count_ = 0;
    865   tests_to_retry_.clear();
    866   results_tracker_.OnTestIterationStarting();
    867 
    868   MessageLoop::current()->PostTask(
    869       FROM_HERE, Bind(&TestLauncher::RunTests, Unretained(this)));
    870 }
    871 
    872 void TestLauncher::MaybeSaveSummaryAsJSON() {
    873   const CommandLine* command_line = CommandLine::ForCurrentProcess();
    874   if (command_line->HasSwitch(switches::kTestLauncherSummaryOutput)) {
    875     FilePath summary_path(command_line->GetSwitchValuePath(
    876                               switches::kTestLauncherSummaryOutput));
    877     if (!results_tracker_.SaveSummaryAsJSON(summary_path)) {
    878       LOG(ERROR) << "Failed to save test launcher output summary.";
    879     }
    880   }
    881 }
    882 
    883 void TestLauncher::OnLaunchTestProcessFinished(
    884     const LaunchChildGTestProcessCallback& callback,
    885     int exit_code,
    886     const TimeDelta& elapsed_time,
    887     bool was_timeout,
    888     const std::string& output) {
    889   DCHECK(thread_checker_.CalledOnValidThread());
    890 
    891   callback.Run(exit_code, elapsed_time, was_timeout, output);
    892 }
    893 
    894 void TestLauncher::OnTestIterationFinished() {
    895   TestResultsTracker::TestStatusMap tests_by_status(
    896       results_tracker_.GetTestStatusMapForCurrentIteration());
    897   if (!tests_by_status[TestResult::TEST_UNKNOWN].empty())
    898     results_tracker_.AddGlobalTag(kUnreliableResultsTag);
    899 
    900   // When we retry tests, success is determined by having nothing more
    901   // to retry (everything eventually passed), as opposed to having
    902   // no failures at all.
    903   if (tests_to_retry_.empty()) {
    904     fprintf(stdout, "SUCCESS: all tests passed.\n");
    905     fflush(stdout);
    906   } else {
    907     // Signal failure, but continue to run all requested test iterations.
    908     // With the summary of all iterations at the end this is a good default.
    909     run_result_ = false;
    910   }
    911 
    912   results_tracker_.PrintSummaryOfCurrentIteration();
    913 
    914   // Kick off the next iteration.
    915   MessageLoop::current()->PostTask(
    916       FROM_HERE,
    917       Bind(&TestLauncher::RunTestIteration, Unretained(this)));
    918 }
    919 
    920 void TestLauncher::OnOutputTimeout() {
    921   DCHECK(thread_checker_.CalledOnValidThread());
    922 
    923   AutoLock lock(g_live_processes_lock.Get());
    924 
    925   fprintf(stdout, "Still waiting for the following processes to finish:\n");
    926 
    927   for (std::map<ProcessHandle, CommandLine>::iterator i =
    928            g_live_processes.Get().begin();
    929        i != g_live_processes.Get().end();
    930        ++i) {
    931 #if defined(OS_WIN)
    932     fwprintf(stdout, L"\t%s\n", i->second.GetCommandLineString().c_str());
    933 #else
    934     fprintf(stdout, "\t%s\n", i->second.GetCommandLineString().c_str());
    935 #endif
    936   }
    937 
    938   fflush(stdout);
    939 
    940   // Arm the timer again - otherwise it would fire only once.
    941   watchdog_timer_.Reset();
    942 }
    943 
    944 std::string GetTestOutputSnippet(const TestResult& result,
    945                                  const std::string& full_output) {
    946   size_t run_pos = full_output.find(std::string("[ RUN      ] ") +
    947                                     result.full_name);
    948   if (run_pos == std::string::npos)
    949     return std::string();
    950 
    951   size_t end_pos = full_output.find(std::string("[  FAILED  ] ") +
    952                                     result.full_name,
    953                                     run_pos);
    954   // Only clip the snippet to the "OK" message if the test really
    955   // succeeded. It still might have e.g. crashed after printing it.
    956   if (end_pos == std::string::npos &&
    957       result.status == TestResult::TEST_SUCCESS) {
    958     end_pos = full_output.find(std::string("[       OK ] ") +
    959                                result.full_name,
    960                                run_pos);
    961   }
    962   if (end_pos != std::string::npos) {
    963     size_t newline_pos = full_output.find("\n", end_pos);
    964     if (newline_pos != std::string::npos)
    965       end_pos = newline_pos + 1;
    966   }
    967 
    968   std::string snippet(full_output.substr(run_pos));
    969   if (end_pos != std::string::npos)
    970     snippet = full_output.substr(run_pos, end_pos - run_pos);
    971 
    972   return snippet;
    973 }
    974 
    975 CommandLine PrepareCommandLineForGTest(const CommandLine& command_line,
    976                                        const std::string& wrapper) {
    977   CommandLine new_command_line(command_line.GetProgram());
    978   CommandLine::SwitchMap switches = command_line.GetSwitches();
    979 
    980   // Strip out gtest_repeat flag - this is handled by the launcher process.
    981   switches.erase(kGTestRepeatFlag);
    982 
    983   // Don't try to write the final XML report in child processes.
    984   switches.erase(kGTestOutputFlag);
    985 
    986   for (CommandLine::SwitchMap::const_iterator iter = switches.begin();
    987        iter != switches.end(); ++iter) {
    988     new_command_line.AppendSwitchNative((*iter).first, (*iter).second);
    989   }
    990 
    991   // Prepend wrapper after last CommandLine quasi-copy operation. CommandLine
    992   // does not really support removing switches well, and trying to do that
    993   // on a CommandLine with a wrapper is known to break.
    994   // TODO(phajdan.jr): Give it a try to support CommandLine removing switches.
    995 #if defined(OS_WIN)
    996   new_command_line.PrependWrapper(ASCIIToWide(wrapper));
    997 #elif defined(OS_POSIX)
    998   new_command_line.PrependWrapper(wrapper);
    999 #endif
   1000 
   1001   return new_command_line;
   1002 }
   1003 
   1004 // TODO(phajdan.jr): Move to anonymous namespace.
   1005 int LaunchChildTestProcessWithOptions(const CommandLine& command_line,
   1006                                       const LaunchOptions& options,
   1007                                       bool use_job_objects,
   1008                                       base::TimeDelta timeout,
   1009                                       bool* was_timeout) {
   1010 #if defined(OS_POSIX)
   1011   // Make sure an option we rely on is present - see LaunchChildGTestProcess.
   1012   DCHECK(options.new_process_group);
   1013 #endif
   1014 
   1015   LaunchOptions new_options(options);
   1016 
   1017 #if defined(OS_WIN)
   1018   DCHECK(!new_options.job_handle);
   1019 
   1020   win::ScopedHandle job_handle;
   1021   if (use_job_objects) {
   1022     job_handle.Set(CreateJobObject(NULL, NULL));
   1023     if (!job_handle.IsValid()) {
   1024       LOG(ERROR) << "Could not create JobObject.";
   1025       return -1;
   1026     }
   1027 
   1028     // Allow break-away from job since sandbox and few other places rely on it
   1029     // on Windows versions prior to Windows 8 (which supports nested jobs).
   1030     // TODO(phajdan.jr): Do not allow break-away on Windows 8.
   1031     if (!SetJobObjectLimitFlags(job_handle.Get(),
   1032                                 JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE |
   1033                                 JOB_OBJECT_LIMIT_BREAKAWAY_OK)) {
   1034       LOG(ERROR) << "Could not SetJobObjectLimitFlags.";
   1035       return -1;
   1036     }
   1037 
   1038     new_options.job_handle = job_handle.Get();
   1039   }
   1040 #endif  // defined(OS_WIN)
   1041 
   1042 #if defined(OS_LINUX)
   1043   // To prevent accidental privilege sharing to an untrusted child, processes
   1044   // are started with PR_SET_NO_NEW_PRIVS. Do not set that here, since this
   1045   // new child will be privileged and trusted.
   1046   new_options.allow_new_privs = true;
   1047 #endif
   1048 
   1049   base::ProcessHandle process_handle;
   1050 
   1051   {
   1052     // Note how we grab the lock before the process possibly gets created.
   1053     // This ensures that when the lock is held, ALL the processes are registered
   1054     // in the set.
   1055     AutoLock lock(g_live_processes_lock.Get());
   1056 
   1057     if (!base::LaunchProcess(command_line, new_options, &process_handle))
   1058       return -1;
   1059 
   1060     g_live_processes.Get().insert(std::make_pair(process_handle, command_line));
   1061   }
   1062 
   1063   int exit_code = 0;
   1064   if (!base::WaitForExitCodeWithTimeout(process_handle,
   1065                                         &exit_code,
   1066                                         timeout)) {
   1067     *was_timeout = true;
   1068     exit_code = -1;  // Set a non-zero exit code to signal a failure.
   1069 
   1070     // Ensure that the process terminates.
   1071     base::KillProcess(process_handle, -1, true);
   1072   }
   1073 
   1074   {
   1075     // Note how we grab the log before issuing a possibly broad process kill.
   1076     // Other code parts that grab the log kill processes, so avoid trying
   1077     // to do that twice and trigger all kinds of log messages.
   1078     AutoLock lock(g_live_processes_lock.Get());
   1079 
   1080 #if defined(OS_POSIX)
   1081     if (exit_code != 0) {
   1082       // On POSIX, in case the test does not exit cleanly, either due to a crash
   1083       // or due to it timing out, we need to clean up any child processes that
   1084       // it might have created. On Windows, child processes are automatically
   1085       // cleaned up using JobObjects.
   1086       base::KillProcessGroup(process_handle);
   1087     }
   1088 #endif
   1089 
   1090     g_live_processes.Get().erase(process_handle);
   1091   }
   1092 
   1093   base::CloseProcessHandle(process_handle);
   1094 
   1095   return exit_code;
   1096 }
   1097 
   1098 }  // namespace base
   1099