Home | History | Annotate | Download | only in crash_reporter
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "kernel_collector.h"
     18 
     19 #include <map>
     20 #include <sys/stat.h>
     21 
     22 #include <base/files/file_util.h>
     23 #include <base/logging.h>
     24 #include <base/strings/string_util.h>
     25 #include <base/strings/stringprintf.h>
     26 
     27 using base::FilePath;
     28 using base::StringPrintf;
     29 
     30 namespace {
     31 
     32 const char kDefaultKernelStackSignature[] = "kernel-UnspecifiedStackSignature";
     33 const char kDumpParentPath[] = "/sys/fs";
     34 const char kDumpPath[] = "/sys/fs/pstore";
     35 const char kDumpFormat[] = "dmesg-ramoops-%zu";
     36 const char kKernelExecName[] = "kernel";
     37 // Maximum number of records to examine in the kDumpPath.
     38 const size_t kMaxDumpRecords = 100;
     39 const pid_t kKernelPid = 0;
     40 const char kKernelSignatureKey[] = "sig";
     41 // Byte length of maximum human readable portion of a kernel crash signature.
     42 const int kMaxHumanStringLength = 40;
     43 const uid_t kRootUid = 0;
     44 // Time in seconds from the final kernel log message for a call stack
     45 // to count towards the signature of the kcrash.
     46 const int kSignatureTimestampWindow = 2;
     47 // Kernel log timestamp regular expression.
     48 const char kTimestampRegex[] = "^<.*>\\[\\s*(\\d+\\.\\d+)\\]";
     49 
     50 //
     51 // These regular expressions enable to us capture the PC in a backtrace.
     52 // The backtrace is obtained through dmesg or the kernel's preserved/kcrashmem
     53 // feature.
     54 //
     55 // For ARM we see:
     56 //   "<5>[   39.458982] PC is at write_breakme+0xd0/0x1b4"
     57 // For MIPS we see:
     58 //   "<5>[ 3378.552000] epc   : 804010f0 lkdtm_do_action+0x68/0x3f8"
     59 // For x86:
     60 //   "<0>[   37.474699] EIP: [<790ed488>] write_breakme+0x80/0x108
     61 //    SS:ESP 0068:e9dd3efc"
     62 //
     63 const char* const kPCRegex[] = {
     64   0,
     65   " PC is at ([^\\+ ]+).*",
     66   " epc\\s+:\\s+\\S+\\s+([^\\+ ]+).*",  // MIPS has an exception program counter
     67   " EIP: \\[<.*>\\] ([^\\+ ]+).*",  // X86 uses EIP for the program counter
     68   " RIP  \\[<.*>\\] ([^\\+ ]+).*",  // X86_64 uses RIP for the program counter
     69 };
     70 
     71 static_assert(arraysize(kPCRegex) == KernelCollector::kArchCount,
     72               "Missing Arch PC regexp");
     73 
     74 }  // namespace
     75 
     76 KernelCollector::KernelCollector()
     77     : is_enabled_(false),
     78       ramoops_dump_path_(kDumpPath),
     79       records_(0),
     80       // We expect crash dumps in the format of architecture we are built for.
     81       arch_(GetCompilerArch()) {
     82 }
     83 
     84 KernelCollector::~KernelCollector() {
     85 }
     86 
     87 void KernelCollector::OverridePreservedDumpPath(const FilePath &file_path) {
     88   ramoops_dump_path_ = file_path;
     89 }
     90 
     91 bool KernelCollector::ReadRecordToString(std::string *contents,
     92                                          size_t current_record,
     93                                          bool *record_found) {
     94   // A record is a ramoops dump. It has an associated size of "record_size".
     95   std::string record;
     96   std::string captured;
     97 
     98   // Ramoops appends a header to a crash which contains ==== followed by a
     99   // timestamp. Ignore the header.
    100   pcrecpp::RE record_re(
    101       "====\\d+\\.\\d+\n(.*)",
    102       pcrecpp::RE_Options().set_multiline(true).set_dotall(true));
    103 
    104   pcrecpp::RE sanity_check_re("\n<\\d+>\\[\\s*(\\d+\\.\\d+)\\]");
    105 
    106   FilePath ramoops_record;
    107   GetRamoopsRecordPath(&ramoops_record, current_record);
    108   if (!base::ReadFileToString(ramoops_record, &record)) {
    109     LOG(ERROR) << "Unable to open " << ramoops_record.value();
    110     return false;
    111   }
    112 
    113   *record_found = false;
    114   if (record_re.FullMatch(record, &captured)) {
    115     // Found a ramoops header, so strip the header and append the rest.
    116     contents->append(captured);
    117     *record_found = true;
    118   } else if (sanity_check_re.PartialMatch(record.substr(0, 1024))) {
    119     // pstore compression has been added since kernel 3.12. In order to
    120     // decompress dmesg correctly, ramoops driver has to strip the header
    121     // before handing over the record to the pstore driver, so we don't
    122     // need to do it here anymore. However, the sanity check is needed because
    123     // sometimes a pstore record is just a chunk of uninitialized memory which
    124     // is not the result of a kernel crash. See crbug.com/443764
    125     contents->append(record);
    126     *record_found = true;
    127   } else {
    128     LOG(WARNING) << "Found invalid record at " << ramoops_record.value();
    129   }
    130 
    131   // Remove the record from pstore after it's found.
    132   if (*record_found)
    133     base::DeleteFile(ramoops_record, false);
    134 
    135   return true;
    136 }
    137 
    138 void KernelCollector::GetRamoopsRecordPath(FilePath *path,
    139                                            size_t record) {
    140   // Disable error "format not a string literal, argument types not checked"
    141   // because this is valid, but GNU apparently doesn't bother checking a const
    142   // format string.
    143   #pragma GCC diagnostic push
    144   #pragma GCC diagnostic ignored "-Wformat-nonliteral"
    145   *path = ramoops_dump_path_.Append(StringPrintf(kDumpFormat, record));
    146   #pragma GCC diagnostic pop
    147 }
    148 
    149 bool KernelCollector::LoadParameters() {
    150   // Discover how many ramoops records are being exported by the driver.
    151   size_t count;
    152 
    153   for (count = 0; count < kMaxDumpRecords; ++count) {
    154     FilePath ramoops_record;
    155     GetRamoopsRecordPath(&ramoops_record, count);
    156 
    157     if (!base::PathExists(ramoops_record))
    158       break;
    159   }
    160 
    161   records_ = count;
    162   return (records_ > 0);
    163 }
    164 
    165 bool KernelCollector::LoadPreservedDump(std::string *contents) {
    166   // Load dumps from the preserved memory and save them in contents.
    167   // Since the system is set to restart on oops we won't actually ever have
    168   // multiple records (only 0 or 1), but check in case we don't restart on
    169   // oops in the future.
    170   bool any_records_found = false;
    171   bool record_found = false;
    172   // clear contents since ReadFileToString actually appends to the string.
    173   contents->clear();
    174 
    175   for (size_t i = 0; i < records_; ++i) {
    176     if (!ReadRecordToString(contents, i, &record_found)) {
    177       break;
    178     }
    179     if (record_found) {
    180       any_records_found = true;
    181     }
    182   }
    183 
    184   if (!any_records_found) {
    185     LOG(ERROR) << "No valid records found in " << ramoops_dump_path_.value();
    186     return false;
    187   }
    188 
    189   return true;
    190 }
    191 
    192 void KernelCollector::StripSensitiveData(std::string *kernel_dump) {
    193   // Strip any data that the user might not want sent up to the crash servers.
    194   // We'll read in from kernel_dump and also place our output there.
    195   //
    196   // At the moment, the only sensitive data we strip is MAC addresses.
    197 
    198   // Get rid of things that look like MAC addresses, since they could possibly
    199   // give information about where someone has been.  This is strings that look
    200   // like this: 11:22:33:44:55:66
    201   // Complications:
    202   // - Within a given kernel_dump, want to be able to tell when the same MAC
    203   //   was used more than once.  Thus, we'll consistently replace the first
    204   //   MAC found with 00:00:00:00:00:01, the second with ...:02, etc.
    205   // - ACPI commands look like MAC addresses.  We'll specifically avoid getting
    206   //   rid of those.
    207   std::ostringstream result;
    208   std::string pre_mac_str;
    209   std::string mac_str;
    210   std::map<std::string, std::string> mac_map;
    211   pcrecpp::StringPiece input(*kernel_dump);
    212 
    213   // This RE will find the next MAC address and can return us the data preceding
    214   // the MAC and the MAC itself.
    215   pcrecpp::RE mac_re("(.*?)("
    216                      "[0-9a-fA-F][0-9a-fA-F]:"
    217                      "[0-9a-fA-F][0-9a-fA-F]:"
    218                      "[0-9a-fA-F][0-9a-fA-F]:"
    219                      "[0-9a-fA-F][0-9a-fA-F]:"
    220                      "[0-9a-fA-F][0-9a-fA-F]:"
    221                      "[0-9a-fA-F][0-9a-fA-F])",
    222                      pcrecpp::RE_Options()
    223                        .set_multiline(true)
    224                        .set_dotall(true));
    225 
    226   // This RE will identify when the 'pre_mac_str' shows that the MAC address
    227   // was really an ACPI cmd.  The full string looks like this:
    228   //   ata1.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES) filtered out
    229   pcrecpp::RE acpi_re("ACPI cmd ef/$",
    230                       pcrecpp::RE_Options()
    231                         .set_multiline(true)
    232                         .set_dotall(true));
    233 
    234   // Keep consuming, building up a result string as we go.
    235   while (mac_re.Consume(&input, &pre_mac_str, &mac_str)) {
    236     if (acpi_re.PartialMatch(pre_mac_str)) {
    237       // We really saw an ACPI command; add to result w/ no stripping.
    238       result << pre_mac_str << mac_str;
    239     } else {
    240       // Found a MAC address; look up in our hash for the mapping.
    241       std::string replacement_mac = mac_map[mac_str];
    242       if (replacement_mac == "") {
    243         // It wasn't present, so build up a replacement string.
    244         int mac_id = mac_map.size();
    245 
    246         // Handle up to 2^32 unique MAC address; overkill, but doesn't hurt.
    247         replacement_mac = StringPrintf("00:00:%02x:%02x:%02x:%02x",
    248                                        (mac_id & 0xff000000) >> 24,
    249                                        (mac_id & 0x00ff0000) >> 16,
    250                                        (mac_id & 0x0000ff00) >> 8,
    251                                        (mac_id & 0x000000ff));
    252         mac_map[mac_str] = replacement_mac;
    253       }
    254 
    255       // Dump the string before the MAC and the fake MAC address into result.
    256       result << pre_mac_str << replacement_mac;
    257     }
    258   }
    259 
    260   // One last bit of data might still be in the input.
    261   result << input;
    262 
    263   // We'll just assign right back to kernel_dump.
    264   *kernel_dump = result.str();
    265 }
    266 
    267 bool KernelCollector::DumpDirMounted() {
    268   struct stat st_parent;
    269   if (stat(kDumpParentPath, &st_parent)) {
    270     PLOG(WARNING) << "Could not stat " << kDumpParentPath;
    271     return false;
    272   }
    273 
    274   struct stat st_dump;
    275   if (stat(kDumpPath, &st_dump)) {
    276     PLOG(WARNING) << "Could not stat " << kDumpPath;
    277     return false;
    278   }
    279 
    280   if (st_parent.st_dev == st_dump.st_dev) {
    281     LOG(WARNING) << "Dump dir " << kDumpPath << " not mounted";
    282     return false;
    283   }
    284 
    285   return true;
    286 }
    287 
    288 bool KernelCollector::Enable() {
    289   if (arch_ == kArchUnknown || arch_ >= kArchCount ||
    290       kPCRegex[arch_] == nullptr) {
    291     LOG(WARNING) << "KernelCollector does not understand this architecture";
    292     return false;
    293   }
    294 
    295   if (!DumpDirMounted()) {
    296     LOG(WARNING) << "Kernel does not support crash dumping";
    297     return false;
    298   }
    299 
    300   // To enable crashes, we will eventually need to set
    301   // the chnv bit in BIOS, but it does not yet work.
    302   LOG(INFO) << "Enabling kernel crash handling";
    303   is_enabled_ = true;
    304   return true;
    305 }
    306 
    307 // Hash a string to a number.  We define our own hash function to not
    308 // be dependent on a C++ library that might change.  This function
    309 // uses basically the same approach as tr1/functional_hash.h but with
    310 // a larger prime number (16127 vs 131).
    311 static unsigned HashString(const std::string &input) {
    312   unsigned hash = 0;
    313   for (size_t i = 0; i < input.length(); ++i)
    314     hash = hash * 16127 + input[i];
    315   return hash;
    316 }
    317 
    318 void KernelCollector::ProcessStackTrace(
    319     pcrecpp::StringPiece kernel_dump,
    320     bool print_diagnostics,
    321     unsigned *hash,
    322     float *last_stack_timestamp,
    323     bool *is_watchdog_crash) {
    324   pcrecpp::RE line_re("(.+)", pcrecpp::MULTILINE());
    325   pcrecpp::RE stack_trace_start_re(std::string(kTimestampRegex) +
    326         " (Call Trace|Backtrace):$");
    327 
    328   // Match lines such as the following and grab out "function_name".
    329   // The ? may or may not be present.
    330   //
    331   // For ARM:
    332   // <4>[ 3498.731164] [<c0057220>] ? (function_name+0x20/0x2c) from
    333   // [<c018062c>] (foo_bar+0xdc/0x1bc)
    334   //
    335   // For MIPS:
    336   // <5>[ 3378.656000] [<804010f0>] lkdtm_do_action+0x68/0x3f8
    337   //
    338   // For X86:
    339   // <4>[ 6066.849504]  [<7937bcee>] ? function_name+0x66/0x6c
    340   //
    341   pcrecpp::RE stack_entry_re(std::string(kTimestampRegex) +
    342     "\\s+\\[<[[:xdigit:]]+>\\]"      // Matches "  [<7937bcee>]"
    343     "([\\s\\?(]+)"                   // Matches " ? (" (ARM) or " ? " (X86)
    344     "([^\\+ )]+)");                  // Matches until delimiter reached
    345   std::string line;
    346   std::string hashable;
    347   std::string previous_hashable;
    348   bool is_watchdog = false;
    349 
    350   *hash = 0;
    351   *last_stack_timestamp = 0;
    352 
    353   // Find the last and second-to-last stack traces.  The latter is used when
    354   // the panic is from a watchdog timeout.
    355   while (line_re.FindAndConsume(&kernel_dump, &line)) {
    356     std::string certainty;
    357     std::string function_name;
    358     if (stack_trace_start_re.PartialMatch(line, last_stack_timestamp)) {
    359       if (print_diagnostics) {
    360         printf("Stack trace starting.%s\n",
    361                hashable.empty() ? "" : "  Saving prior trace.");
    362       }
    363       previous_hashable = hashable;
    364       hashable.clear();
    365       is_watchdog = false;
    366     } else if (stack_entry_re.PartialMatch(line,
    367                                            last_stack_timestamp,
    368                                            &certainty,
    369                                            &function_name)) {
    370       bool is_certain = certainty.find('?') == std::string::npos;
    371       if (print_diagnostics) {
    372         printf("@%f: stack entry for %s (%s)\n",
    373                *last_stack_timestamp,
    374                function_name.c_str(),
    375                is_certain ? "certain" : "uncertain");
    376       }
    377       // Do not include any uncertain (prefixed by '?') frames in our hash.
    378       if (!is_certain)
    379         continue;
    380       if (!hashable.empty())
    381         hashable.append("|");
    382       if (function_name == "watchdog_timer_fn" ||
    383           function_name == "watchdog") {
    384         is_watchdog = true;
    385       }
    386       hashable.append(function_name);
    387     }
    388   }
    389 
    390   // If the last stack trace contains a watchdog function we assume the panic
    391   // is from the watchdog timer, and we hash the previous stack trace rather
    392   // than the last one, assuming that the previous stack is that of the hung
    393   // thread.
    394   //
    395   // In addition, if the hashable is empty (meaning all frames are uncertain,
    396   // for whatever reason) also use the previous frame, as it cannot be any
    397   // worse.
    398   if (is_watchdog || hashable.empty()) {
    399     hashable = previous_hashable;
    400   }
    401 
    402   *hash = HashString(hashable);
    403   *is_watchdog_crash = is_watchdog;
    404 
    405   if (print_diagnostics) {
    406     printf("Hash based on stack trace: \"%s\" at %f.\n",
    407            hashable.c_str(), *last_stack_timestamp);
    408   }
    409 }
    410 
    411 // static
    412 KernelCollector::ArchKind KernelCollector::GetCompilerArch() {
    413 #if defined(COMPILER_GCC) && defined(ARCH_CPU_ARM_FAMILY)
    414   return kArchArm;
    415 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_MIPS_FAMILY)
    416   return kArchMips;
    417 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_64)
    418   return kArchX86_64;
    419 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_FAMILY)
    420   return kArchX86;
    421 #else
    422   return kArchUnknown;
    423 #endif
    424 }
    425 
    426 bool KernelCollector::FindCrashingFunction(
    427   pcrecpp::StringPiece kernel_dump,
    428   bool print_diagnostics,
    429   float stack_trace_timestamp,
    430   std::string *crashing_function) {
    431   float timestamp = 0;
    432 
    433   // Use the correct regex for this architecture.
    434   pcrecpp::RE eip_re(std::string(kTimestampRegex) + kPCRegex[arch_],
    435                      pcrecpp::MULTILINE());
    436 
    437   while (eip_re.FindAndConsume(&kernel_dump, &timestamp, crashing_function)) {
    438     if (print_diagnostics) {
    439       printf("@%f: found crashing function %s\n",
    440              timestamp,
    441              crashing_function->c_str());
    442     }
    443   }
    444   if (timestamp == 0) {
    445     if (print_diagnostics) {
    446       printf("Found no crashing function.\n");
    447     }
    448     return false;
    449   }
    450   if (stack_trace_timestamp != 0 &&
    451       abs(static_cast<int>(stack_trace_timestamp - timestamp))
    452         > kSignatureTimestampWindow) {
    453     if (print_diagnostics) {
    454       printf("Found crashing function but not within window.\n");
    455     }
    456     return false;
    457   }
    458   if (print_diagnostics) {
    459     printf("Found crashing function %s\n", crashing_function->c_str());
    460   }
    461   return true;
    462 }
    463 
    464 bool KernelCollector::FindPanicMessage(pcrecpp::StringPiece kernel_dump,
    465                                        bool print_diagnostics,
    466                                        std::string *panic_message) {
    467   // Match lines such as the following and grab out "Fatal exception"
    468   // <0>[  342.841135] Kernel panic - not syncing: Fatal exception
    469   pcrecpp::RE kernel_panic_re(std::string(kTimestampRegex) +
    470                               " Kernel panic[^\\:]*\\:\\s*(.*)",
    471                               pcrecpp::MULTILINE());
    472   float timestamp = 0;
    473   while (kernel_panic_re.FindAndConsume(&kernel_dump,
    474                                         &timestamp,
    475                                         panic_message)) {
    476     if (print_diagnostics) {
    477       printf("@%f: panic message %s\n",
    478              timestamp,
    479              panic_message->c_str());
    480     }
    481   }
    482   if (timestamp == 0) {
    483     if (print_diagnostics) {
    484       printf("Found no panic message.\n");
    485     }
    486     return false;
    487   }
    488   return true;
    489 }
    490 
    491 bool KernelCollector::ComputeKernelStackSignature(
    492     const std::string &kernel_dump,
    493     std::string *kernel_signature,
    494     bool print_diagnostics) {
    495   unsigned stack_hash = 0;
    496   float last_stack_timestamp = 0;
    497   std::string human_string;
    498   bool is_watchdog_crash;
    499 
    500   ProcessStackTrace(kernel_dump,
    501                     print_diagnostics,
    502                     &stack_hash,
    503                     &last_stack_timestamp,
    504                     &is_watchdog_crash);
    505 
    506   if (!FindCrashingFunction(kernel_dump,
    507                             print_diagnostics,
    508                             last_stack_timestamp,
    509                             &human_string)) {
    510     if (!FindPanicMessage(kernel_dump, print_diagnostics, &human_string)) {
    511       if (print_diagnostics) {
    512         printf("Found no human readable string, using empty string.\n");
    513       }
    514       human_string.clear();
    515     }
    516   }
    517 
    518   if (human_string.empty() && stack_hash == 0) {
    519     if (print_diagnostics) {
    520       printf("Found neither a stack nor a human readable string, failing.\n");
    521     }
    522     return false;
    523   }
    524 
    525   human_string = human_string.substr(0, kMaxHumanStringLength);
    526   *kernel_signature = StringPrintf("%s-%s%s-%08X",
    527                                    kKernelExecName,
    528                                    (is_watchdog_crash ? "(HANG)-" : ""),
    529                                    human_string.c_str(),
    530                                    stack_hash);
    531   return true;
    532 }
    533 
    534 bool KernelCollector::Collect() {
    535   std::string kernel_dump;
    536   FilePath root_crash_directory;
    537 
    538   if (!LoadParameters()) {
    539     return false;
    540   }
    541   if (!LoadPreservedDump(&kernel_dump)) {
    542     return false;
    543   }
    544   StripSensitiveData(&kernel_dump);
    545   if (kernel_dump.empty()) {
    546     return false;
    547   }
    548   std::string signature;
    549   if (!ComputeKernelStackSignature(kernel_dump, &signature, false)) {
    550     signature = kDefaultKernelStackSignature;
    551   }
    552 
    553   std::string reason = "handling";
    554   bool feedback = true;
    555   if (IsDeveloperImage()) {
    556     reason = "developer build - always dumping";
    557     feedback = true;
    558   } else if (!is_feedback_allowed_function_()) {
    559     reason = "ignoring - no consent";
    560     feedback = false;
    561   }
    562 
    563   LOG(INFO) << "Received prior crash notification from "
    564             << "kernel (signature " << signature << ") (" << reason << ")";
    565 
    566   if (feedback) {
    567     count_crash_function_();
    568 
    569     if (!GetCreatedCrashDirectoryByEuid(kRootUid,
    570                                         &root_crash_directory,
    571                                         nullptr)) {
    572       return true;
    573     }
    574 
    575     std::string dump_basename =
    576         FormatDumpBasename(kKernelExecName, time(nullptr), kKernelPid);
    577     FilePath kernel_crash_path = root_crash_directory.Append(
    578         StringPrintf("%s.kcrash", dump_basename.c_str()));
    579 
    580     // We must use WriteNewFile instead of base::WriteFile as we
    581     // do not want to write with root access to a symlink that an attacker
    582     // might have created.
    583     if (WriteNewFile(kernel_crash_path,
    584                      kernel_dump.data(),
    585                      kernel_dump.length()) !=
    586         static_cast<int>(kernel_dump.length())) {
    587       LOG(INFO) << "Failed to write kernel dump to "
    588                 << kernel_crash_path.value().c_str();
    589       return true;
    590     }
    591 
    592     AddCrashMetaData(kKernelSignatureKey, signature);
    593     WriteCrashMetaData(
    594         root_crash_directory.Append(
    595             StringPrintf("%s.meta", dump_basename.c_str())),
    596         kKernelExecName,
    597         kernel_crash_path.value());
    598 
    599     LOG(INFO) << "Stored kcrash to " << kernel_crash_path.value();
    600   }
    601 
    602   return true;
    603 }
    604