1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "kernel_collector.h" 18 19 #include <map> 20 #include <sys/stat.h> 21 22 #include <base/files/file_util.h> 23 #include <base/logging.h> 24 #include <base/strings/string_util.h> 25 #include <base/strings/stringprintf.h> 26 27 using base::FilePath; 28 using base::StringPrintf; 29 30 namespace { 31 32 const char kDefaultKernelStackSignature[] = "kernel-UnspecifiedStackSignature"; 33 const char kDumpParentPath[] = "/sys/fs"; 34 const char kDumpPath[] = "/sys/fs/pstore"; 35 const char kDumpFormat[] = "dmesg-ramoops-%zu"; 36 const char kKernelExecName[] = "kernel"; 37 // Maximum number of records to examine in the kDumpPath. 38 const size_t kMaxDumpRecords = 100; 39 const pid_t kKernelPid = 0; 40 const char kKernelSignatureKey[] = "sig"; 41 // Byte length of maximum human readable portion of a kernel crash signature. 42 const int kMaxHumanStringLength = 40; 43 const uid_t kRootUid = 0; 44 // Time in seconds from the final kernel log message for a call stack 45 // to count towards the signature of the kcrash. 46 const int kSignatureTimestampWindow = 2; 47 // Kernel log timestamp regular expression. 48 const char kTimestampRegex[] = "^<.*>\\[\\s*(\\d+\\.\\d+)\\]"; 49 50 // 51 // These regular expressions enable to us capture the PC in a backtrace. 52 // The backtrace is obtained through dmesg or the kernel's preserved/kcrashmem 53 // feature. 54 // 55 // For ARM we see: 56 // "<5>[ 39.458982] PC is at write_breakme+0xd0/0x1b4" 57 // For MIPS we see: 58 // "<5>[ 3378.552000] epc : 804010f0 lkdtm_do_action+0x68/0x3f8" 59 // For x86: 60 // "<0>[ 37.474699] EIP: [<790ed488>] write_breakme+0x80/0x108 61 // SS:ESP 0068:e9dd3efc" 62 // 63 const char* const kPCRegex[] = { 64 0, 65 " PC is at ([^\\+ ]+).*", 66 " epc\\s+:\\s+\\S+\\s+([^\\+ ]+).*", // MIPS has an exception program counter 67 " EIP: \\[<.*>\\] ([^\\+ ]+).*", // X86 uses EIP for the program counter 68 " RIP \\[<.*>\\] ([^\\+ ]+).*", // X86_64 uses RIP for the program counter 69 }; 70 71 static_assert(arraysize(kPCRegex) == KernelCollector::kArchCount, 72 "Missing Arch PC regexp"); 73 74 } // namespace 75 76 KernelCollector::KernelCollector() 77 : is_enabled_(false), 78 ramoops_dump_path_(kDumpPath), 79 records_(0), 80 // We expect crash dumps in the format of architecture we are built for. 81 arch_(GetCompilerArch()) { 82 } 83 84 KernelCollector::~KernelCollector() { 85 } 86 87 void KernelCollector::OverridePreservedDumpPath(const FilePath &file_path) { 88 ramoops_dump_path_ = file_path; 89 } 90 91 bool KernelCollector::ReadRecordToString(std::string *contents, 92 size_t current_record, 93 bool *record_found) { 94 // A record is a ramoops dump. It has an associated size of "record_size". 95 std::string record; 96 std::string captured; 97 98 // Ramoops appends a header to a crash which contains ==== followed by a 99 // timestamp. Ignore the header. 100 pcrecpp::RE record_re( 101 "====\\d+\\.\\d+\n(.*)", 102 pcrecpp::RE_Options().set_multiline(true).set_dotall(true)); 103 104 pcrecpp::RE sanity_check_re("\n<\\d+>\\[\\s*(\\d+\\.\\d+)\\]"); 105 106 FilePath ramoops_record; 107 GetRamoopsRecordPath(&ramoops_record, current_record); 108 if (!base::ReadFileToString(ramoops_record, &record)) { 109 LOG(ERROR) << "Unable to open " << ramoops_record.value(); 110 return false; 111 } 112 113 *record_found = false; 114 if (record_re.FullMatch(record, &captured)) { 115 // Found a ramoops header, so strip the header and append the rest. 116 contents->append(captured); 117 *record_found = true; 118 } else if (sanity_check_re.PartialMatch(record.substr(0, 1024))) { 119 // pstore compression has been added since kernel 3.12. In order to 120 // decompress dmesg correctly, ramoops driver has to strip the header 121 // before handing over the record to the pstore driver, so we don't 122 // need to do it here anymore. However, the sanity check is needed because 123 // sometimes a pstore record is just a chunk of uninitialized memory which 124 // is not the result of a kernel crash. See crbug.com/443764 125 contents->append(record); 126 *record_found = true; 127 } else { 128 LOG(WARNING) << "Found invalid record at " << ramoops_record.value(); 129 } 130 131 // Remove the record from pstore after it's found. 132 if (*record_found) 133 base::DeleteFile(ramoops_record, false); 134 135 return true; 136 } 137 138 void KernelCollector::GetRamoopsRecordPath(FilePath *path, 139 size_t record) { 140 // Disable error "format not a string literal, argument types not checked" 141 // because this is valid, but GNU apparently doesn't bother checking a const 142 // format string. 143 #pragma GCC diagnostic push 144 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 145 *path = ramoops_dump_path_.Append(StringPrintf(kDumpFormat, record)); 146 #pragma GCC diagnostic pop 147 } 148 149 bool KernelCollector::LoadParameters() { 150 // Discover how many ramoops records are being exported by the driver. 151 size_t count; 152 153 for (count = 0; count < kMaxDumpRecords; ++count) { 154 FilePath ramoops_record; 155 GetRamoopsRecordPath(&ramoops_record, count); 156 157 if (!base::PathExists(ramoops_record)) 158 break; 159 } 160 161 records_ = count; 162 return (records_ > 0); 163 } 164 165 bool KernelCollector::LoadPreservedDump(std::string *contents) { 166 // Load dumps from the preserved memory and save them in contents. 167 // Since the system is set to restart on oops we won't actually ever have 168 // multiple records (only 0 or 1), but check in case we don't restart on 169 // oops in the future. 170 bool any_records_found = false; 171 bool record_found = false; 172 // clear contents since ReadFileToString actually appends to the string. 173 contents->clear(); 174 175 for (size_t i = 0; i < records_; ++i) { 176 if (!ReadRecordToString(contents, i, &record_found)) { 177 break; 178 } 179 if (record_found) { 180 any_records_found = true; 181 } 182 } 183 184 if (!any_records_found) { 185 LOG(ERROR) << "No valid records found in " << ramoops_dump_path_.value(); 186 return false; 187 } 188 189 return true; 190 } 191 192 void KernelCollector::StripSensitiveData(std::string *kernel_dump) { 193 // Strip any data that the user might not want sent up to the crash servers. 194 // We'll read in from kernel_dump and also place our output there. 195 // 196 // At the moment, the only sensitive data we strip is MAC addresses. 197 198 // Get rid of things that look like MAC addresses, since they could possibly 199 // give information about where someone has been. This is strings that look 200 // like this: 11:22:33:44:55:66 201 // Complications: 202 // - Within a given kernel_dump, want to be able to tell when the same MAC 203 // was used more than once. Thus, we'll consistently replace the first 204 // MAC found with 00:00:00:00:00:01, the second with ...:02, etc. 205 // - ACPI commands look like MAC addresses. We'll specifically avoid getting 206 // rid of those. 207 std::ostringstream result; 208 std::string pre_mac_str; 209 std::string mac_str; 210 std::map<std::string, std::string> mac_map; 211 pcrecpp::StringPiece input(*kernel_dump); 212 213 // This RE will find the next MAC address and can return us the data preceding 214 // the MAC and the MAC itself. 215 pcrecpp::RE mac_re("(.*?)(" 216 "[0-9a-fA-F][0-9a-fA-F]:" 217 "[0-9a-fA-F][0-9a-fA-F]:" 218 "[0-9a-fA-F][0-9a-fA-F]:" 219 "[0-9a-fA-F][0-9a-fA-F]:" 220 "[0-9a-fA-F][0-9a-fA-F]:" 221 "[0-9a-fA-F][0-9a-fA-F])", 222 pcrecpp::RE_Options() 223 .set_multiline(true) 224 .set_dotall(true)); 225 226 // This RE will identify when the 'pre_mac_str' shows that the MAC address 227 // was really an ACPI cmd. The full string looks like this: 228 // ata1.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES) filtered out 229 pcrecpp::RE acpi_re("ACPI cmd ef/$", 230 pcrecpp::RE_Options() 231 .set_multiline(true) 232 .set_dotall(true)); 233 234 // Keep consuming, building up a result string as we go. 235 while (mac_re.Consume(&input, &pre_mac_str, &mac_str)) { 236 if (acpi_re.PartialMatch(pre_mac_str)) { 237 // We really saw an ACPI command; add to result w/ no stripping. 238 result << pre_mac_str << mac_str; 239 } else { 240 // Found a MAC address; look up in our hash for the mapping. 241 std::string replacement_mac = mac_map[mac_str]; 242 if (replacement_mac == "") { 243 // It wasn't present, so build up a replacement string. 244 int mac_id = mac_map.size(); 245 246 // Handle up to 2^32 unique MAC address; overkill, but doesn't hurt. 247 replacement_mac = StringPrintf("00:00:%02x:%02x:%02x:%02x", 248 (mac_id & 0xff000000) >> 24, 249 (mac_id & 0x00ff0000) >> 16, 250 (mac_id & 0x0000ff00) >> 8, 251 (mac_id & 0x000000ff)); 252 mac_map[mac_str] = replacement_mac; 253 } 254 255 // Dump the string before the MAC and the fake MAC address into result. 256 result << pre_mac_str << replacement_mac; 257 } 258 } 259 260 // One last bit of data might still be in the input. 261 result << input; 262 263 // We'll just assign right back to kernel_dump. 264 *kernel_dump = result.str(); 265 } 266 267 bool KernelCollector::DumpDirMounted() { 268 struct stat st_parent; 269 if (stat(kDumpParentPath, &st_parent)) { 270 PLOG(WARNING) << "Could not stat " << kDumpParentPath; 271 return false; 272 } 273 274 struct stat st_dump; 275 if (stat(kDumpPath, &st_dump)) { 276 PLOG(WARNING) << "Could not stat " << kDumpPath; 277 return false; 278 } 279 280 if (st_parent.st_dev == st_dump.st_dev) { 281 LOG(WARNING) << "Dump dir " << kDumpPath << " not mounted"; 282 return false; 283 } 284 285 return true; 286 } 287 288 bool KernelCollector::Enable() { 289 if (arch_ == kArchUnknown || arch_ >= kArchCount || 290 kPCRegex[arch_] == nullptr) { 291 LOG(WARNING) << "KernelCollector does not understand this architecture"; 292 return false; 293 } 294 295 if (!DumpDirMounted()) { 296 LOG(WARNING) << "Kernel does not support crash dumping"; 297 return false; 298 } 299 300 // To enable crashes, we will eventually need to set 301 // the chnv bit in BIOS, but it does not yet work. 302 LOG(INFO) << "Enabling kernel crash handling"; 303 is_enabled_ = true; 304 return true; 305 } 306 307 // Hash a string to a number. We define our own hash function to not 308 // be dependent on a C++ library that might change. This function 309 // uses basically the same approach as tr1/functional_hash.h but with 310 // a larger prime number (16127 vs 131). 311 static unsigned HashString(const std::string &input) { 312 unsigned hash = 0; 313 for (size_t i = 0; i < input.length(); ++i) 314 hash = hash * 16127 + input[i]; 315 return hash; 316 } 317 318 void KernelCollector::ProcessStackTrace( 319 pcrecpp::StringPiece kernel_dump, 320 bool print_diagnostics, 321 unsigned *hash, 322 float *last_stack_timestamp, 323 bool *is_watchdog_crash) { 324 pcrecpp::RE line_re("(.+)", pcrecpp::MULTILINE()); 325 pcrecpp::RE stack_trace_start_re(std::string(kTimestampRegex) + 326 " (Call Trace|Backtrace):$"); 327 328 // Match lines such as the following and grab out "function_name". 329 // The ? may or may not be present. 330 // 331 // For ARM: 332 // <4>[ 3498.731164] [<c0057220>] ? (function_name+0x20/0x2c) from 333 // [<c018062c>] (foo_bar+0xdc/0x1bc) 334 // 335 // For MIPS: 336 // <5>[ 3378.656000] [<804010f0>] lkdtm_do_action+0x68/0x3f8 337 // 338 // For X86: 339 // <4>[ 6066.849504] [<7937bcee>] ? function_name+0x66/0x6c 340 // 341 pcrecpp::RE stack_entry_re(std::string(kTimestampRegex) + 342 "\\s+\\[<[[:xdigit:]]+>\\]" // Matches " [<7937bcee>]" 343 "([\\s\\?(]+)" // Matches " ? (" (ARM) or " ? " (X86) 344 "([^\\+ )]+)"); // Matches until delimiter reached 345 std::string line; 346 std::string hashable; 347 std::string previous_hashable; 348 bool is_watchdog = false; 349 350 *hash = 0; 351 *last_stack_timestamp = 0; 352 353 // Find the last and second-to-last stack traces. The latter is used when 354 // the panic is from a watchdog timeout. 355 while (line_re.FindAndConsume(&kernel_dump, &line)) { 356 std::string certainty; 357 std::string function_name; 358 if (stack_trace_start_re.PartialMatch(line, last_stack_timestamp)) { 359 if (print_diagnostics) { 360 printf("Stack trace starting.%s\n", 361 hashable.empty() ? "" : " Saving prior trace."); 362 } 363 previous_hashable = hashable; 364 hashable.clear(); 365 is_watchdog = false; 366 } else if (stack_entry_re.PartialMatch(line, 367 last_stack_timestamp, 368 &certainty, 369 &function_name)) { 370 bool is_certain = certainty.find('?') == std::string::npos; 371 if (print_diagnostics) { 372 printf("@%f: stack entry for %s (%s)\n", 373 *last_stack_timestamp, 374 function_name.c_str(), 375 is_certain ? "certain" : "uncertain"); 376 } 377 // Do not include any uncertain (prefixed by '?') frames in our hash. 378 if (!is_certain) 379 continue; 380 if (!hashable.empty()) 381 hashable.append("|"); 382 if (function_name == "watchdog_timer_fn" || 383 function_name == "watchdog") { 384 is_watchdog = true; 385 } 386 hashable.append(function_name); 387 } 388 } 389 390 // If the last stack trace contains a watchdog function we assume the panic 391 // is from the watchdog timer, and we hash the previous stack trace rather 392 // than the last one, assuming that the previous stack is that of the hung 393 // thread. 394 // 395 // In addition, if the hashable is empty (meaning all frames are uncertain, 396 // for whatever reason) also use the previous frame, as it cannot be any 397 // worse. 398 if (is_watchdog || hashable.empty()) { 399 hashable = previous_hashable; 400 } 401 402 *hash = HashString(hashable); 403 *is_watchdog_crash = is_watchdog; 404 405 if (print_diagnostics) { 406 printf("Hash based on stack trace: \"%s\" at %f.\n", 407 hashable.c_str(), *last_stack_timestamp); 408 } 409 } 410 411 // static 412 KernelCollector::ArchKind KernelCollector::GetCompilerArch() { 413 #if defined(COMPILER_GCC) && defined(ARCH_CPU_ARM_FAMILY) 414 return kArchArm; 415 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_MIPS_FAMILY) 416 return kArchMips; 417 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_64) 418 return kArchX86_64; 419 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_FAMILY) 420 return kArchX86; 421 #else 422 return kArchUnknown; 423 #endif 424 } 425 426 bool KernelCollector::FindCrashingFunction( 427 pcrecpp::StringPiece kernel_dump, 428 bool print_diagnostics, 429 float stack_trace_timestamp, 430 std::string *crashing_function) { 431 float timestamp = 0; 432 433 // Use the correct regex for this architecture. 434 pcrecpp::RE eip_re(std::string(kTimestampRegex) + kPCRegex[arch_], 435 pcrecpp::MULTILINE()); 436 437 while (eip_re.FindAndConsume(&kernel_dump, ×tamp, crashing_function)) { 438 if (print_diagnostics) { 439 printf("@%f: found crashing function %s\n", 440 timestamp, 441 crashing_function->c_str()); 442 } 443 } 444 if (timestamp == 0) { 445 if (print_diagnostics) { 446 printf("Found no crashing function.\n"); 447 } 448 return false; 449 } 450 if (stack_trace_timestamp != 0 && 451 abs(static_cast<int>(stack_trace_timestamp - timestamp)) 452 > kSignatureTimestampWindow) { 453 if (print_diagnostics) { 454 printf("Found crashing function but not within window.\n"); 455 } 456 return false; 457 } 458 if (print_diagnostics) { 459 printf("Found crashing function %s\n", crashing_function->c_str()); 460 } 461 return true; 462 } 463 464 bool KernelCollector::FindPanicMessage(pcrecpp::StringPiece kernel_dump, 465 bool print_diagnostics, 466 std::string *panic_message) { 467 // Match lines such as the following and grab out "Fatal exception" 468 // <0>[ 342.841135] Kernel panic - not syncing: Fatal exception 469 pcrecpp::RE kernel_panic_re(std::string(kTimestampRegex) + 470 " Kernel panic[^\\:]*\\:\\s*(.*)", 471 pcrecpp::MULTILINE()); 472 float timestamp = 0; 473 while (kernel_panic_re.FindAndConsume(&kernel_dump, 474 ×tamp, 475 panic_message)) { 476 if (print_diagnostics) { 477 printf("@%f: panic message %s\n", 478 timestamp, 479 panic_message->c_str()); 480 } 481 } 482 if (timestamp == 0) { 483 if (print_diagnostics) { 484 printf("Found no panic message.\n"); 485 } 486 return false; 487 } 488 return true; 489 } 490 491 bool KernelCollector::ComputeKernelStackSignature( 492 const std::string &kernel_dump, 493 std::string *kernel_signature, 494 bool print_diagnostics) { 495 unsigned stack_hash = 0; 496 float last_stack_timestamp = 0; 497 std::string human_string; 498 bool is_watchdog_crash; 499 500 ProcessStackTrace(kernel_dump, 501 print_diagnostics, 502 &stack_hash, 503 &last_stack_timestamp, 504 &is_watchdog_crash); 505 506 if (!FindCrashingFunction(kernel_dump, 507 print_diagnostics, 508 last_stack_timestamp, 509 &human_string)) { 510 if (!FindPanicMessage(kernel_dump, print_diagnostics, &human_string)) { 511 if (print_diagnostics) { 512 printf("Found no human readable string, using empty string.\n"); 513 } 514 human_string.clear(); 515 } 516 } 517 518 if (human_string.empty() && stack_hash == 0) { 519 if (print_diagnostics) { 520 printf("Found neither a stack nor a human readable string, failing.\n"); 521 } 522 return false; 523 } 524 525 human_string = human_string.substr(0, kMaxHumanStringLength); 526 *kernel_signature = StringPrintf("%s-%s%s-%08X", 527 kKernelExecName, 528 (is_watchdog_crash ? "(HANG)-" : ""), 529 human_string.c_str(), 530 stack_hash); 531 return true; 532 } 533 534 bool KernelCollector::Collect() { 535 std::string kernel_dump; 536 FilePath root_crash_directory; 537 538 if (!LoadParameters()) { 539 return false; 540 } 541 if (!LoadPreservedDump(&kernel_dump)) { 542 return false; 543 } 544 StripSensitiveData(&kernel_dump); 545 if (kernel_dump.empty()) { 546 return false; 547 } 548 std::string signature; 549 if (!ComputeKernelStackSignature(kernel_dump, &signature, false)) { 550 signature = kDefaultKernelStackSignature; 551 } 552 553 std::string reason = "handling"; 554 bool feedback = true; 555 if (IsDeveloperImage()) { 556 reason = "developer build - always dumping"; 557 feedback = true; 558 } else if (!is_feedback_allowed_function_()) { 559 reason = "ignoring - no consent"; 560 feedback = false; 561 } 562 563 LOG(INFO) << "Received prior crash notification from " 564 << "kernel (signature " << signature << ") (" << reason << ")"; 565 566 if (feedback) { 567 count_crash_function_(); 568 569 if (!GetCreatedCrashDirectoryByEuid(kRootUid, 570 &root_crash_directory, 571 nullptr)) { 572 return true; 573 } 574 575 std::string dump_basename = 576 FormatDumpBasename(kKernelExecName, time(nullptr), kKernelPid); 577 FilePath kernel_crash_path = root_crash_directory.Append( 578 StringPrintf("%s.kcrash", dump_basename.c_str())); 579 580 // We must use WriteNewFile instead of base::WriteFile as we 581 // do not want to write with root access to a symlink that an attacker 582 // might have created. 583 if (WriteNewFile(kernel_crash_path, 584 kernel_dump.data(), 585 kernel_dump.length()) != 586 static_cast<int>(kernel_dump.length())) { 587 LOG(INFO) << "Failed to write kernel dump to " 588 << kernel_crash_path.value().c_str(); 589 return true; 590 } 591 592 AddCrashMetaData(kKernelSignatureKey, signature); 593 WriteCrashMetaData( 594 root_crash_directory.Append( 595 StringPrintf("%s.meta", dump_basename.c_str())), 596 kKernelExecName, 597 kernel_crash_path.value()); 598 599 LOG(INFO) << "Stored kcrash to " << kernel_crash_path.value(); 600 } 601 602 return true; 603 } 604