1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <inttypes.h> 18 #include <libgen.h> 19 #include <signal.h> 20 #include <sys/prctl.h> 21 #include <sys/utsname.h> 22 #include <time.h> 23 #include <unistd.h> 24 #include <set> 25 #include <string> 26 #include <unordered_map> 27 #include <vector> 28 29 #include <android-base/logging.h> 30 #include <android-base/file.h> 31 #include <android-base/parsedouble.h> 32 #include <android-base/parseint.h> 33 #include <android-base/strings.h> 34 #include <android-base/test_utils.h> 35 #if defined(__ANDROID__) 36 #include <android-base/properties.h> 37 #endif 38 39 #include "CallChainJoiner.h" 40 #include "command.h" 41 #include "environment.h" 42 #include "event_selection_set.h" 43 #include "event_type.h" 44 #include "IOEventLoop.h" 45 #include "OfflineUnwinder.h" 46 #include "perf_clock.h" 47 #include "read_apk.h" 48 #include "read_elf.h" 49 #include "record.h" 50 #include "record_file.h" 51 #include "thread_tree.h" 52 #include "tracing.h" 53 #include "utils.h" 54 #include "workload.h" 55 56 using namespace simpleperf; 57 58 static std::string default_measured_event_type = "cpu-cycles"; 59 60 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = { 61 {"u", PERF_SAMPLE_BRANCH_USER}, 62 {"k", PERF_SAMPLE_BRANCH_KERNEL}, 63 {"any", PERF_SAMPLE_BRANCH_ANY}, 64 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL}, 65 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN}, 66 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL}, 67 }; 68 69 static std::unordered_map<std::string, int> clockid_map = { 70 {"realtime", CLOCK_REALTIME}, 71 {"monotonic", CLOCK_MONOTONIC}, 72 {"monotonic_raw", CLOCK_MONOTONIC_RAW}, 73 {"boottime", CLOCK_BOOTTIME}, 74 }; 75 76 // The max size of records dumped by kernel is 65535, and dump stack size 77 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528. 78 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528; 79 80 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK). 81 // Here 1024 is a desired value for pages in mapped buffer. If mapped 82 // successfully, the buffer size = 1024 * 4K (page size) = 4M. 83 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024; 84 85 // Cache size used by CallChainJoiner to cache call chains in memory. 86 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024; 87 88 class RecordCommand : public Command { 89 public: 90 RecordCommand() 91 : Command( 92 "record", "record sampling info in perf.data", 93 // clang-format off 94 "Usage: simpleperf record [options] [--] [command [command-args]]\n" 95 " Gather sampling information of running [command]. And -a/-p/-t option\n" 96 " can be used to change target of sampling information.\n" 97 " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n" 98 "Select monitored threads:\n" 99 "-a System-wide collection.\n" 100 #if defined(__ANDROID__) 101 "--app package_name Profile the process of an Android application.\n" 102 " On non-rooted devices, the app must be debuggable,\n" 103 " because we use run-as to switch to the app's context.\n" 104 #endif 105 "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n" 106 " with -a.\n" 107 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n" 108 "\n" 109 "Select monitored event types:\n" 110 "-e event1[:modifier1],event2[:modifier2],...\n" 111 " Select the event list to sample. Use `simpleperf list` to find\n" 112 " all possible event names. Modifiers can be added to define how\n" 113 " the event should be monitored.\n" 114 " Possible modifiers are:\n" 115 " u - monitor user space events only\n" 116 " k - monitor kernel space events only\n" 117 "--group event1[:modifier],event2[:modifier2],...\n" 118 " Similar to -e option. But events specified in the same --group\n" 119 " option are monitored as a group, and scheduled in and out at the\n" 120 " same time.\n" 121 "--trace-offcpu Generate samples when threads are scheduled off cpu.\n" 122 " Similar to \"-c 1 -e sched:sched_switch\".\n" 123 "\n" 124 "Select monitoring options:\n" 125 "-f freq Set event sample frequency. It means recording at most [freq]\n" 126 " samples every second. For non-tracepoint events, the default\n" 127 " option is -f 4000. A -f/-c option affects all event types\n" 128 " following it until meeting another -f/-c option. For example,\n" 129 " for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n" 130 " has sample freq 1000, sched:sched_switch event has sample period 1.\n" 131 "-c count Set event sample period. It means recording one sample when\n" 132 " [count] events happen. For tracepoint events, the default option\n" 133 " is -c 1.\n" 134 "--call-graph fp | dwarf[,<dump_stack_size>]\n" 135 " Enable call graph recording. Use frame pointer or dwarf debug\n" 136 " frame as the method to parse call graph in stack.\n" 137 " Default is dwarf,65528.\n" 138 "-g Same as '--call-graph dwarf'.\n" 139 "--clockid clock_id Generate timestamps of samples using selected clock.\n" 140 " Possible values are: realtime, monotonic,\n" 141 " monotonic_raw, boottime, perf. Default is perf.\n" 142 "--cpu cpu_item1,cpu_item2,...\n" 143 " Collect samples only on the selected cpus. cpu_item can be cpu\n" 144 " number like 1, or cpu range like 0-3.\n" 145 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n" 146 " [command]. Here time_in_sec may be any positive\n" 147 " floating point number.\n" 148 "-j branch_filter1,branch_filter2,...\n" 149 " Enable taken branch stack sampling. Each sample captures a series\n" 150 " of consecutive taken branches.\n" 151 " The following filters are defined:\n" 152 " any: any type of branch\n" 153 " any_call: any function call or system call\n" 154 " any_ret: any function return or system call return\n" 155 " ind_call: any indirect branch\n" 156 " u: only when the branch target is at the user level\n" 157 " k: only when the branch target is in the kernel\n" 158 " This option requires at least one branch type among any, any_call,\n" 159 " any_ret, ind_call.\n" 160 "-b Enable taken branch stack sampling. Same as '-j any'.\n" 161 "-m mmap_pages Set the size of the buffer used to receiving sample data from\n" 162 " the kernel. It should be a power of 2. If not set, the max\n" 163 " possible value <= 1024 will be used.\n" 164 "--no-inherit Don't record created child threads/processes.\n" 165 "\n" 166 "Dwarf unwinding options:\n" 167 "--no-post-unwind If `--call-graph dwarf` option is used, then the user's stack\n" 168 " will be recorded in perf.data and unwound after recording.\n" 169 " However, this takes a lot of disk space. Use this option to\n" 170 " unwind while recording.\n" 171 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n" 172 " will be unwound by default. Use this option to disable the\n" 173 " unwinding of the user's stack.\n" 174 "--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n" 175 " callchain joiner is used to break the 64k stack limit\n" 176 " and build more complete call graphs. However, the built\n" 177 " call graphs may not be correct in all cases.\n" 178 "--callchain-joiner-min-matching-nodes count\n" 179 " When callchain joiner is used, set the matched nodes needed to join\n" 180 " callchains. The count should be >= 1. By default it is 1.\n" 181 "\n" 182 "Recording file options:\n" 183 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n" 184 " kernel symbols will be dumped when needed.\n" 185 "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n" 186 " dumped in perf.data, to support reporting in another\n" 187 " environment.\n" 188 "-o record_file_name Set record file name, default is perf.data.\n" 189 "--exit-with-parent Stop recording when the process starting\n" 190 " simpleperf dies.\n" 191 "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n" 192 " <fd_no>, then close <fd_no>.\n" 193 "--symfs <dir> Look for files with symbols relative to this directory.\n" 194 " This option is used to provide files with symbol table and\n" 195 " debug information, which are used for unwinding and dumping symbols.\n" 196 #if 0 197 // Below options are only used internally and shouldn't be visible to the public. 198 "--in-app We are already running in the app's context.\n" 199 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n" 200 #endif 201 // clang-format on 202 ), 203 system_wide_collection_(false), 204 branch_sampling_(0), 205 fp_callchain_sampling_(false), 206 dwarf_callchain_sampling_(false), 207 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE), 208 unwind_dwarf_callchain_(true), 209 post_unwind_(true), 210 child_inherit_(true), 211 duration_in_sec_(0), 212 can_dump_kernel_symbols_(true), 213 dump_symbols_(true), 214 clockid_("perf"), 215 event_selection_set_(false), 216 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)), 217 record_filename_("perf.data"), 218 start_sampling_time_in_ns_(0), 219 sample_record_count_(0), 220 lost_record_count_(0), 221 start_profiling_fd_(-1), 222 in_app_context_(false), 223 trace_offcpu_(false), 224 exclude_kernel_callchain_(false), 225 allow_callchain_joiner_(true), 226 callchain_joiner_min_matching_nodes_(1u) { 227 // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes 228 // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing 229 // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to 230 // finish properly. 231 signal(SIGPIPE, SIG_IGN); 232 app_package_name_ = GetDefaultAppPackageName(); 233 } 234 235 bool Run(const std::vector<std::string>& args); 236 237 private: 238 bool ParseOptions(const std::vector<std::string>& args, 239 std::vector<std::string>* non_option_args); 240 bool PrepareRecording(Workload* workload); 241 bool DoRecording(Workload* workload); 242 bool PostProcessRecording(const std::vector<std::string>& args); 243 bool TraceOffCpu(); 244 bool SetEventSelectionFlags(); 245 bool CreateAndInitRecordFile(); 246 std::unique_ptr<RecordFileWriter> CreateRecordFile( 247 const std::string& filename); 248 bool DumpKernelSymbol(); 249 bool DumpTracingData(); 250 bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id); 251 bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id); 252 bool ProcessRecord(Record* record); 253 bool SaveRecordForPostUnwinding(Record* record); 254 bool SaveRecordAfterUnwinding(Record* record); 255 bool SaveRecordWithoutUnwinding(Record* record); 256 257 void UpdateRecordForEmbeddedElfPath(Record* record); 258 bool UnwindRecord(SampleRecord& r); 259 bool PostUnwindRecords(); 260 bool JoinCallChains(); 261 bool DumpAdditionalFeatures(const std::vector<std::string>& args); 262 bool DumpBuildIdFeature(); 263 bool DumpFileFeature(); 264 bool DumpMetaInfoFeature(); 265 void CollectHitFileInfo(const SampleRecord& r); 266 267 std::unique_ptr<SampleSpeed> sample_speed_; 268 bool system_wide_collection_; 269 uint64_t branch_sampling_; 270 bool fp_callchain_sampling_; 271 bool dwarf_callchain_sampling_; 272 uint32_t dump_stack_size_in_dwarf_sampling_; 273 bool unwind_dwarf_callchain_; 274 bool post_unwind_; 275 std::unique_ptr<OfflineUnwinder> offline_unwinder_; 276 bool child_inherit_; 277 double duration_in_sec_; 278 bool can_dump_kernel_symbols_; 279 bool dump_symbols_; 280 std::string clockid_; 281 std::vector<int> cpus_; 282 EventSelectionSet event_selection_set_; 283 284 std::pair<size_t, size_t> mmap_page_range_; 285 286 ThreadTree thread_tree_; 287 std::string record_filename_; 288 std::unique_ptr<RecordFileWriter> record_file_writer_; 289 290 uint64_t start_sampling_time_in_ns_; // nanoseconds from machine starting 291 292 uint64_t sample_record_count_; 293 uint64_t lost_record_count_; 294 int start_profiling_fd_; 295 std::string app_package_name_; 296 bool in_app_context_; 297 bool trace_offcpu_; 298 bool exclude_kernel_callchain_; 299 300 // For CallChainJoiner 301 bool allow_callchain_joiner_; 302 size_t callchain_joiner_min_matching_nodes_; 303 std::unique_ptr<CallChainJoiner> callchain_joiner_; 304 }; 305 306 bool RecordCommand::Run(const std::vector<std::string>& args) { 307 ScopedCurrentArch scoped_arch(GetMachineArch()); 308 if (!CheckPerfEventLimit()) { 309 return false; 310 } 311 AllowMoreOpenedFiles(); 312 313 std::vector<std::string> workload_args; 314 if (!ParseOptions(args, &workload_args)) { 315 return false; 316 } 317 ScopedTempFiles scoped_temp_files(android::base::Dirname(record_filename_)); 318 if (!app_package_name_.empty() && !in_app_context_) { 319 // Some users want to profile non debuggable apps on rooted devices. If we use run-as, 320 // it will be impossible when using --app. So don't switch to app's context when we are 321 // root. 322 if (!IsRoot()) { 323 return RunInAppContext(app_package_name_, "record", args, workload_args.size(), 324 record_filename_, true); 325 } 326 } 327 std::unique_ptr<Workload> workload; 328 if (!workload_args.empty()) { 329 workload = Workload::CreateWorkload(workload_args); 330 if (workload == nullptr) { 331 return false; 332 } 333 } 334 if (!PrepareRecording(workload.get())) { 335 return false; 336 } 337 if (!DoRecording(workload.get())) { 338 return false; 339 } 340 return PostProcessRecording(args); 341 } 342 343 bool RecordCommand::PrepareRecording(Workload* workload) { 344 // 1. Prepare in other modules. 345 if (!InitPerfClock()) { 346 return false; 347 } 348 PrepareVdsoFile(); 349 350 // 2. Add default event type. 351 if (event_selection_set_.empty()) { 352 size_t group_id; 353 if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) { 354 return false; 355 } 356 if (sample_speed_) { 357 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 358 } 359 } 360 361 // 3. Process options before opening perf event files. 362 exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel(); 363 if (trace_offcpu_ && !TraceOffCpu()) { 364 return false; 365 } 366 if (!SetEventSelectionFlags()) { 367 return false; 368 } 369 if (unwind_dwarf_callchain_) { 370 offline_unwinder_.reset(new OfflineUnwinder(false)); 371 } 372 if (unwind_dwarf_callchain_ && allow_callchain_joiner_) { 373 callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE, 374 callchain_joiner_min_matching_nodes_, 375 false)); 376 } 377 378 // 4. Add monitored targets. 379 bool need_to_check_targets = false; 380 if (system_wide_collection_) { 381 event_selection_set_.AddMonitoredThreads({-1}); 382 } else if (!event_selection_set_.HasMonitoredTarget()) { 383 if (workload != nullptr) { 384 event_selection_set_.AddMonitoredProcesses({workload->GetPid()}); 385 event_selection_set_.SetEnableOnExec(true); 386 if (event_selection_set_.HasInplaceSampler()) { 387 // Start worker early, because the worker process has to setup inplace-sampler server 388 // before we try to connect it. 389 if (!workload->Start()) { 390 return false; 391 } 392 } 393 } else if (!app_package_name_.empty()) { 394 // If app process is not created, wait for it. This allows simpleperf starts before 395 // app process. In this way, we can have a better support of app start-up time profiling. 396 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_); 397 event_selection_set_.AddMonitoredProcesses(pids); 398 need_to_check_targets = true; 399 } else { 400 LOG(ERROR) 401 << "No threads to monitor. Try `simpleperf help record` for help"; 402 return false; 403 } 404 } else { 405 need_to_check_targets = true; 406 } 407 408 // 5. Open perf event files and create mapped buffers. 409 if (!event_selection_set_.OpenEventFiles(cpus_)) { 410 return false; 411 } 412 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, 413 mmap_page_range_.second)) { 414 return false; 415 } 416 417 // 6. Create perf.data. 418 if (!CreateAndInitRecordFile()) { 419 return false; 420 } 421 422 // 7. Add read/signal/periodic Events. 423 auto callback = 424 std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1); 425 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) { 426 return false; 427 } 428 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) { 429 return false; 430 } 431 IOEventLoop* loop = event_selection_set_.GetIOEventLoop(); 432 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, 433 [loop]() { return loop->ExitLoop(); })) { 434 return false; 435 } 436 437 // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup). 438 if (!SignalIsIgnored(SIGHUP)) { 439 if (!loop->AddSignalEvent(SIGHUP, [loop]() { return loop->ExitLoop(); })) { 440 return false; 441 } 442 } 443 444 if (duration_in_sec_ != 0) { 445 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), 446 [loop]() { return loop->ExitLoop(); })) { 447 return false; 448 } 449 } 450 return true; 451 } 452 453 bool RecordCommand::DoRecording(Workload* workload) { 454 // Write records in mapped buffers of perf_event_files to output file while workload is running. 455 start_sampling_time_in_ns_ = GetPerfClock(); 456 LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns"; 457 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) { 458 return false; 459 } 460 if (start_profiling_fd_ != -1) { 461 if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) { 462 PLOG(ERROR) << "failed to write to start_profiling_fd_"; 463 } 464 close(start_profiling_fd_); 465 } 466 if (!event_selection_set_.GetIOEventLoop()->RunLoop()) { 467 return false; 468 } 469 if (!event_selection_set_.FinishReadMmapEventData()) { 470 return false; 471 } 472 return true; 473 } 474 475 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) { 476 // 1. Post unwind dwarf callchain. 477 if (unwind_dwarf_callchain_ && post_unwind_) { 478 if (!PostUnwindRecords()) { 479 return false; 480 } 481 } 482 483 // 2. Optionally join Callchains. 484 if (callchain_joiner_) { 485 JoinCallChains(); 486 } 487 488 // 3. Dump additional features, and close record file. 489 if (!DumpAdditionalFeatures(args)) { 490 return false; 491 } 492 if (!record_file_writer_->Close()) { 493 return false; 494 } 495 496 // 4. Show brief record result. 497 LOG(INFO) << "Samples recorded: " << sample_record_count_ 498 << ". Samples lost: " << lost_record_count_ << "."; 499 if (sample_record_count_ + lost_record_count_ != 0) { 500 double lost_percent = static_cast<double>(lost_record_count_) / 501 (lost_record_count_ + sample_record_count_); 502 constexpr double LOST_PERCENT_WARNING_BAR = 0.1; 503 if (lost_percent >= LOST_PERCENT_WARNING_BAR) { 504 LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, " 505 << "consider increasing mmap_pages(-m), " 506 << "or decreasing sample frequency(-f), " 507 << "or increasing sample period(-c)."; 508 } 509 } 510 if (callchain_joiner_) { 511 callchain_joiner_->DumpStat(); 512 } 513 return true; 514 } 515 516 bool RecordCommand::ParseOptions(const std::vector<std::string>& args, 517 std::vector<std::string>* non_option_args) { 518 std::vector<size_t> wait_setting_speed_event_groups_; 519 size_t i; 520 for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) { 521 if (args[i] == "-a") { 522 system_wide_collection_ = true; 523 } else if (args[i] == "--app") { 524 if (!NextArgumentOrError(args, &i)) { 525 return false; 526 } 527 app_package_name_ = args[i]; 528 } else if (args[i] == "-b") { 529 branch_sampling_ = branch_sampling_type_map["any"]; 530 } else if (args[i] == "-c" || args[i] == "-f") { 531 if (!NextArgumentOrError(args, &i)) { 532 return false; 533 } 534 char* endptr; 535 uint64_t value = strtoull(args[i].c_str(), &endptr, 0); 536 if (*endptr != '\0' || value == 0) { 537 LOG(ERROR) << "Invalid option for " << args[i-1] << ": '" << args[i] << "'"; 538 return false; 539 } 540 if (args[i-1] == "-c") { 541 sample_speed_.reset(new SampleSpeed(0, value)); 542 } else { 543 sample_speed_.reset(new SampleSpeed(value, 0)); 544 } 545 for (auto group_id : wait_setting_speed_event_groups_) { 546 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 547 } 548 wait_setting_speed_event_groups_.clear(); 549 550 } else if (args[i] == "--call-graph") { 551 if (!NextArgumentOrError(args, &i)) { 552 return false; 553 } 554 std::vector<std::string> strs = android::base::Split(args[i], ","); 555 if (strs[0] == "fp") { 556 fp_callchain_sampling_ = true; 557 dwarf_callchain_sampling_ = false; 558 } else if (strs[0] == "dwarf") { 559 fp_callchain_sampling_ = false; 560 dwarf_callchain_sampling_ = true; 561 if (strs.size() > 1) { 562 char* endptr; 563 uint64_t size = strtoull(strs[1].c_str(), &endptr, 0); 564 if (*endptr != '\0' || size > UINT_MAX) { 565 LOG(ERROR) << "invalid dump stack size in --call-graph option: " 566 << strs[1]; 567 return false; 568 } 569 if ((size & 7) != 0) { 570 LOG(ERROR) << "dump stack size " << size 571 << " is not 8-byte aligned."; 572 return false; 573 } 574 if (size >= MAX_DUMP_STACK_SIZE) { 575 LOG(ERROR) << "dump stack size " << size 576 << " is bigger than max allowed size " 577 << MAX_DUMP_STACK_SIZE << "."; 578 return false; 579 } 580 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size); 581 } 582 } else { 583 LOG(ERROR) << "unexpected argument for --call-graph option: " 584 << args[i]; 585 return false; 586 } 587 } else if (args[i] == "--clockid") { 588 if (!NextArgumentOrError(args, &i)) { 589 return false; 590 } 591 if (args[i] != "perf") { 592 if (!IsSettingClockIdSupported()) { 593 LOG(ERROR) << "Setting clockid is not supported by the kernel."; 594 return false; 595 } 596 if (clockid_map.find(args[i]) == clockid_map.end()) { 597 LOG(ERROR) << "Invalid clockid: " << args[i]; 598 return false; 599 } 600 } 601 clockid_ = args[i]; 602 } else if (args[i] == "--cpu") { 603 if (!NextArgumentOrError(args, &i)) { 604 return false; 605 } 606 cpus_ = GetCpusFromString(args[i]); 607 } else if (args[i] == "--duration") { 608 if (!NextArgumentOrError(args, &i)) { 609 return false; 610 } 611 if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_, 612 1e-9)) { 613 LOG(ERROR) << "Invalid duration: " << args[i].c_str(); 614 return false; 615 } 616 } else if (args[i] == "-e") { 617 if (!NextArgumentOrError(args, &i)) { 618 return false; 619 } 620 std::vector<std::string> event_types = android::base::Split(args[i], ","); 621 for (auto& event_type : event_types) { 622 size_t group_id; 623 if (!event_selection_set_.AddEventType(event_type, &group_id)) { 624 return false; 625 } 626 if (sample_speed_) { 627 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 628 } else { 629 wait_setting_speed_event_groups_.push_back(group_id); 630 } 631 } 632 } else if (args[i] == "--exit-with-parent") { 633 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0); 634 } else if (args[i] == "-g") { 635 fp_callchain_sampling_ = false; 636 dwarf_callchain_sampling_ = true; 637 } else if (args[i] == "--group") { 638 if (!NextArgumentOrError(args, &i)) { 639 return false; 640 } 641 std::vector<std::string> event_types = android::base::Split(args[i], ","); 642 size_t group_id; 643 if (!event_selection_set_.AddEventGroup(event_types, &group_id)) { 644 return false; 645 } 646 if (sample_speed_) { 647 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); 648 } else { 649 wait_setting_speed_event_groups_.push_back(group_id); 650 } 651 } else if (args[i] == "--in-app") { 652 in_app_context_ = true; 653 } else if (args[i] == "-j") { 654 if (!NextArgumentOrError(args, &i)) { 655 return false; 656 } 657 std::vector<std::string> branch_sampling_types = 658 android::base::Split(args[i], ","); 659 for (auto& type : branch_sampling_types) { 660 auto it = branch_sampling_type_map.find(type); 661 if (it == branch_sampling_type_map.end()) { 662 LOG(ERROR) << "unrecognized branch sampling filter: " << type; 663 return false; 664 } 665 branch_sampling_ |= it->second; 666 } 667 } else if (args[i] == "-m") { 668 if (!NextArgumentOrError(args, &i)) { 669 return false; 670 } 671 char* endptr; 672 uint64_t pages = strtoull(args[i].c_str(), &endptr, 0); 673 if (*endptr != '\0' || !IsPowerOfTwo(pages)) { 674 LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'"; 675 return false; 676 } 677 mmap_page_range_.first = mmap_page_range_.second = pages; 678 } else if (args[i] == "--no-dump-kernel-symbols") { 679 can_dump_kernel_symbols_ = false; 680 } else if (args[i] == "--no-dump-symbols") { 681 dump_symbols_ = false; 682 } else if (args[i] == "--no-inherit") { 683 child_inherit_ = false; 684 } else if (args[i] == "--no-unwind") { 685 unwind_dwarf_callchain_ = false; 686 } else if (args[i] == "--no-callchain-joiner") { 687 allow_callchain_joiner_ = false; 688 } else if (args[i] == "--callchain-joiner-min-matching-nodes") { 689 if (!NextArgumentOrError(args, &i)) { 690 return false; 691 } 692 if (!android::base::ParseUint(args[i].c_str(), &callchain_joiner_min_matching_nodes_) || 693 callchain_joiner_min_matching_nodes_ < 1u) { 694 LOG(ERROR) << "unexpected argument for " << args[i - 1] << " option"; 695 return false; 696 } 697 } else if (args[i] == "-o") { 698 if (!NextArgumentOrError(args, &i)) { 699 return false; 700 } 701 record_filename_ = args[i]; 702 } else if (args[i] == "-p") { 703 if (!NextArgumentOrError(args, &i)) { 704 return false; 705 } 706 std::set<pid_t> pids; 707 if (!GetValidThreadsFromThreadString(args[i], &pids)) { 708 return false; 709 } 710 event_selection_set_.AddMonitoredProcesses(pids); 711 } else if (args[i] == "--no-post-unwind") { 712 post_unwind_ = false; 713 } else if (args[i] == "--start_profiling_fd") { 714 if (!NextArgumentOrError(args, &i)) { 715 return false; 716 } 717 if (!android::base::ParseInt(args[i].c_str(), &start_profiling_fd_, 0)) { 718 LOG(ERROR) << "Invalid start_profiling_fd: " << args[i]; 719 return false; 720 } 721 } else if (args[i] == "--symfs") { 722 if (!NextArgumentOrError(args, &i)) { 723 return false; 724 } 725 if (!Dso::SetSymFsDir(args[i])) { 726 return false; 727 } 728 } else if (args[i] == "-t") { 729 if (!NextArgumentOrError(args, &i)) { 730 return false; 731 } 732 std::set<pid_t> tids; 733 if (!GetValidThreadsFromThreadString(args[i], &tids)) { 734 return false; 735 } 736 event_selection_set_.AddMonitoredThreads(tids); 737 } else if (args[i] == "--trace-offcpu") { 738 trace_offcpu_ = true; 739 } else if (args[i] == "--tracepoint-events") { 740 if (!NextArgumentOrError(args, &i)) { 741 return false; 742 } 743 if (!SetTracepointEventsFilePath(args[i])) { 744 return false; 745 } 746 } else if (args[i] == "--") { 747 i++; 748 break; 749 } else { 750 ReportUnknownOption(args, i); 751 return false; 752 } 753 } 754 755 if (!dwarf_callchain_sampling_) { 756 if (!unwind_dwarf_callchain_) { 757 LOG(ERROR) 758 << "--no-unwind is only used with `--call-graph dwarf` option."; 759 return false; 760 } 761 unwind_dwarf_callchain_ = false; 762 } 763 if (post_unwind_) { 764 if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) { 765 post_unwind_ = false; 766 } 767 } else { 768 if (!dwarf_callchain_sampling_) { 769 LOG(ERROR) 770 << "--no-post-unwind is only used with `--call-graph dwarf` option."; 771 return false; 772 } 773 if (!unwind_dwarf_callchain_) { 774 LOG(ERROR) << "--no-post-unwind can't be used with `--no-unwind` option."; 775 return false; 776 } 777 } 778 779 if (fp_callchain_sampling_) { 780 if (GetBuildArch() == ARCH_ARM) { 781 LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, " 782 << "consider using `-g` option or profiling on aarch64 architecture."; 783 } 784 } 785 786 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) { 787 LOG(ERROR) << "Record system wide and existing processes/threads can't be " 788 "used at the same time."; 789 return false; 790 } 791 792 if (system_wide_collection_ && !IsRoot()) { 793 LOG(ERROR) << "System wide profiling needs root privilege."; 794 return false; 795 } 796 797 if (dump_symbols_ && can_dump_kernel_symbols_) { 798 // No need to dump kernel symbols as we will dump all required symbols. 799 can_dump_kernel_symbols_ = false; 800 } 801 802 non_option_args->clear(); 803 for (; i < args.size(); ++i) { 804 non_option_args->push_back(args[i]); 805 } 806 return true; 807 } 808 809 bool RecordCommand::TraceOffCpu() { 810 if (FindEventTypeByName("sched:sched_switch") == nullptr) { 811 LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available"; 812 return false; 813 } 814 for (auto& event_type : event_selection_set_.GetTracepointEvents()) { 815 if (event_type->name == "sched:sched_switch") { 816 LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event"; 817 return false; 818 } 819 } 820 if (!IsDumpingRegsForTracepointEventsSupported()) { 821 LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel"; 822 return false; 823 } 824 return event_selection_set_.AddEventType("sched:sched_switch"); 825 } 826 827 bool RecordCommand::SetEventSelectionFlags() { 828 event_selection_set_.SampleIdAll(); 829 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) { 830 return false; 831 } 832 if (fp_callchain_sampling_) { 833 event_selection_set_.EnableFpCallChainSampling(); 834 } else if (dwarf_callchain_sampling_) { 835 if (!event_selection_set_.EnableDwarfCallChainSampling( 836 dump_stack_size_in_dwarf_sampling_)) { 837 return false; 838 } 839 } 840 event_selection_set_.SetInherit(child_inherit_); 841 if (clockid_ != "perf") { 842 event_selection_set_.SetClockId(clockid_map[clockid_]); 843 } 844 return true; 845 } 846 847 bool RecordCommand::CreateAndInitRecordFile() { 848 record_file_writer_ = CreateRecordFile(record_filename_); 849 if (record_file_writer_ == nullptr) { 850 return false; 851 } 852 // Use first perf_event_attr and first event id to dump mmap and comm records. 853 EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0]; 854 if (!DumpKernelSymbol()) { 855 return false; 856 } 857 if (!DumpTracingData()) { 858 return false; 859 } 860 if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) { 861 return false; 862 } 863 if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) { 864 return false; 865 } 866 return true; 867 } 868 869 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile( 870 const std::string& filename) { 871 std::unique_ptr<RecordFileWriter> writer = 872 RecordFileWriter::CreateInstance(filename); 873 if (writer == nullptr) { 874 return nullptr; 875 } 876 877 if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) { 878 return nullptr; 879 } 880 return writer; 881 } 882 883 bool RecordCommand::DumpKernelSymbol() { 884 if (can_dump_kernel_symbols_) { 885 std::string kallsyms; 886 if (event_selection_set_.NeedKernelSymbol() && 887 CheckKernelSymbolAddresses()) { 888 if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) { 889 PLOG(ERROR) << "failed to read /proc/kallsyms"; 890 return false; 891 } 892 KernelSymbolRecord r(kallsyms); 893 if (!ProcessRecord(&r)) { 894 return false; 895 } 896 } 897 } 898 return true; 899 } 900 901 bool RecordCommand::DumpTracingData() { 902 std::vector<const EventType*> tracepoint_event_types = 903 event_selection_set_.GetTracepointEvents(); 904 if (tracepoint_event_types.empty() || !CanRecordRawData()) { 905 return true; // No need to dump tracing data, or can't do it. 906 } 907 std::vector<char> tracing_data; 908 if (!GetTracingData(tracepoint_event_types, &tracing_data)) { 909 return false; 910 } 911 TracingDataRecord record(tracing_data); 912 if (!ProcessRecord(&record)) { 913 return false; 914 } 915 return true; 916 } 917 918 bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr, 919 uint64_t event_id) { 920 KernelMmap kernel_mmap; 921 std::vector<KernelMmap> module_mmaps; 922 GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps); 923 924 MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr, 925 kernel_mmap.len, 0, kernel_mmap.filepath, event_id); 926 if (!ProcessRecord(&mmap_record)) { 927 return false; 928 } 929 for (auto& module_mmap : module_mmaps) { 930 MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr, 931 module_mmap.len, 0, module_mmap.filepath, event_id); 932 if (!ProcessRecord(&mmap_record)) { 933 return false; 934 } 935 } 936 return true; 937 } 938 939 bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr, 940 uint64_t event_id) { 941 // Decide which processes and threads to dump. 942 // For system_wide profiling, dump all threads. 943 // For non system wide profiling, build dump_threads. 944 bool all_threads = system_wide_collection_; 945 std::set<pid_t> dump_threads = event_selection_set_.GetMonitoredThreads(); 946 for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) { 947 std::vector<pid_t> tids = GetThreadsInProcess(pid); 948 dump_threads.insert(tids.begin(), tids.end()); 949 } 950 951 // Collect processes to dump. 952 std::vector<pid_t> processes; 953 if (all_threads) { 954 processes = GetAllProcesses(); 955 } else { 956 std::set<pid_t> process_set; 957 for (const auto& tid : dump_threads) { 958 pid_t pid; 959 if (!GetProcessForThread(tid, &pid)) { 960 continue; 961 } 962 process_set.insert(pid); 963 } 964 processes.insert(processes.end(), process_set.begin(), process_set.end()); 965 } 966 967 // Dump each process and its threads. 968 for (auto& pid : processes) { 969 // Dump mmap records. 970 std::vector<ThreadMmap> thread_mmaps; 971 if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) { 972 // The process may exit before we get its info. 973 continue; 974 } 975 for (const auto& map : thread_mmaps) { 976 if (map.executable == 0) { 977 continue; // No need to dump non-executable mmap info. 978 } 979 MmapRecord record(attr, false, pid, pid, map.start_addr, map.len, 980 map.pgoff, map.name, event_id); 981 if (!ProcessRecord(&record)) { 982 return false; 983 } 984 } 985 // Dump process name. 986 std::string name; 987 if (GetThreadName(pid, &name)) { 988 CommRecord record(attr, pid, pid, name, event_id, 0); 989 if (!ProcessRecord(&record)) { 990 return false; 991 } 992 } 993 // Dump thread info. 994 std::vector<pid_t> threads = GetThreadsInProcess(pid); 995 for (const auto& tid : threads) { 996 if (tid == pid) { 997 continue; 998 } 999 if (all_threads || dump_threads.find(tid) != dump_threads.end()) { 1000 ForkRecord fork_record(attr, pid, tid, pid, pid, event_id); 1001 if (!ProcessRecord(&fork_record)) { 1002 return false; 1003 } 1004 if (GetThreadName(tid, &name)) { 1005 CommRecord comm_record(attr, pid, tid, name, event_id, 0); 1006 if (!ProcessRecord(&comm_record)) { 1007 return false; 1008 } 1009 } 1010 } 1011 } 1012 } 1013 return true; 1014 } 1015 1016 bool RecordCommand::ProcessRecord(Record* record) { 1017 if (unwind_dwarf_callchain_) { 1018 if (post_unwind_) { 1019 return SaveRecordForPostUnwinding(record); 1020 } 1021 return SaveRecordAfterUnwinding(record); 1022 } 1023 return SaveRecordWithoutUnwinding(record); 1024 } 1025 1026 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) { 1027 if (record->type() == PERF_RECORD_SAMPLE) { 1028 static_cast<SampleRecord*>(record)->RemoveInvalidStackData(); 1029 } 1030 if (!record_file_writer_->WriteRecord(*record)) { 1031 LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using " 1032 << "--no-post-unwind option."; 1033 return false; 1034 } 1035 return true; 1036 } 1037 1038 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) { 1039 if (record->type() == PERF_RECORD_SAMPLE) { 1040 auto& r = *static_cast<SampleRecord*>(record); 1041 // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want 1042 // to adjust callchains generated by dwarf unwinder. 1043 r.AdjustCallChainGeneratedByKernel(); 1044 if (!UnwindRecord(r)) { 1045 return false; 1046 } 1047 // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call 1048 // chain. 1049 if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) { 1050 // If current record contains no user callchain, skip it. 1051 return true; 1052 } 1053 sample_record_count_++; 1054 } else if (record->type() == PERF_RECORD_LOST) { 1055 lost_record_count_ += static_cast<LostRecord*>(record)->lost; 1056 } else { 1057 UpdateRecordForEmbeddedElfPath(record); 1058 thread_tree_.Update(*record); 1059 } 1060 return record_file_writer_->WriteRecord(*record); 1061 } 1062 1063 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) { 1064 if (record->type() == PERF_RECORD_SAMPLE) { 1065 auto& r = *static_cast<SampleRecord*>(record); 1066 if (fp_callchain_sampling_ || dwarf_callchain_sampling_) { 1067 r.AdjustCallChainGeneratedByKernel(); 1068 } 1069 if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) { 1070 // If current record contains no user callchain, skip it. 1071 return true; 1072 } 1073 sample_record_count_++; 1074 } else if (record->type() == PERF_RECORD_LOST) { 1075 lost_record_count_ += static_cast<LostRecord*>(record)->lost; 1076 } 1077 return record_file_writer_->WriteRecord(*record); 1078 } 1079 1080 template <class RecordType> 1081 void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) { 1082 RecordType& r = *record; 1083 if (!r.InKernel() && r.data->pgoff != 0) { 1084 // For the case of a shared library "foobar.so" embedded 1085 // inside an APK, we rewrite the original MMAP from 1086 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W] 1087 // so as to make the library name explicit. This update is 1088 // done here (as part of the record operation) as opposed to 1089 // on the host during the report, since we want to report 1090 // the correct library name even if the the APK in question 1091 // is not present on the host. The new offset W is 1092 // calculated to be with respect to the start of foobar.so, 1093 // not to the start of path.apk. 1094 EmbeddedElf* ee = 1095 ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff); 1096 if (ee != nullptr) { 1097 // Compute new offset relative to start of elf in APK. 1098 auto data = *r.data; 1099 data.pgoff -= ee->entry_offset(); 1100 r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name())); 1101 } 1102 } 1103 } 1104 1105 void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) { 1106 if (record->type() == PERF_RECORD_MMAP) { 1107 UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record)); 1108 } else if (record->type() == PERF_RECORD_MMAP2) { 1109 UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record)); 1110 } 1111 } 1112 1113 bool RecordCommand::UnwindRecord(SampleRecord& r) { 1114 if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && 1115 (r.sample_type & PERF_SAMPLE_REGS_USER) && 1116 (r.regs_user_data.reg_mask != 0) && 1117 (r.sample_type & PERF_SAMPLE_STACK_USER) && 1118 (r.GetValidStackSize() > 0)) { 1119 ThreadEntry* thread = 1120 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid); 1121 RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs); 1122 std::vector<uint64_t> ips; 1123 std::vector<uint64_t> sps; 1124 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data, 1125 r.GetValidStackSize(), &ips, &sps)) { 1126 return false; 1127 } 1128 r.ReplaceRegAndStackWithCallChain(ips); 1129 if (callchain_joiner_) { 1130 return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid, 1131 CallChainJoiner::ORIGINAL_OFFLINE, ips, sps); 1132 } 1133 } 1134 return true; 1135 } 1136 1137 bool RecordCommand::PostUnwindRecords() { 1138 // 1. Move records from record_filename_ to a temporary file. 1139 if (!record_file_writer_->Close()) { 1140 return false; 1141 } 1142 record_file_writer_.reset(); 1143 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile(); 1144 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) { 1145 return false; 1146 } 1147 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path); 1148 if (!reader) { 1149 return false; 1150 } 1151 1152 // 2. Read records from the temporary file, and write unwound records back to record_filename_. 1153 record_file_writer_ = CreateRecordFile(record_filename_); 1154 if (!record_file_writer_) { 1155 return false; 1156 } 1157 sample_record_count_ = 0; 1158 lost_record_count_ = 0; 1159 auto callback = [this](std::unique_ptr<Record> record) { 1160 return SaveRecordAfterUnwinding(record.get()); 1161 }; 1162 return reader->ReadDataSection(callback, false); 1163 } 1164 1165 bool RecordCommand::JoinCallChains() { 1166 // 1. Prepare joined callchains. 1167 if (!callchain_joiner_->JoinCallChains()) { 1168 return false; 1169 } 1170 // 2. Move records from record_filename_ to a temporary file. 1171 if (!record_file_writer_->Close()) { 1172 return false; 1173 } 1174 record_file_writer_.reset(); 1175 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile(); 1176 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) { 1177 return false; 1178 } 1179 1180 // 3. Read records from the temporary file, and write record with joined call chains back 1181 // to record_filename_. 1182 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path); 1183 record_file_writer_ = CreateRecordFile(record_filename_); 1184 if (!reader || !record_file_writer_) { 1185 return false; 1186 } 1187 1188 auto record_callback = [&](std::unique_ptr<Record> r) { 1189 if (r->type() != PERF_RECORD_SAMPLE) { 1190 return record_file_writer_->WriteRecord(*r); 1191 } 1192 SampleRecord& sr = *static_cast<SampleRecord*>(r.get()); 1193 if (!sr.HasUserCallChain()) { 1194 return record_file_writer_->WriteRecord(sr); 1195 } 1196 pid_t pid; 1197 pid_t tid; 1198 CallChainJoiner::ChainType type; 1199 std::vector<uint64_t> ips; 1200 std::vector<uint64_t> sps; 1201 if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) { 1202 return false; 1203 } 1204 CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE); 1205 CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid)); 1206 CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid)); 1207 sr.UpdateUserCallChain(ips); 1208 return record_file_writer_->WriteRecord(sr); 1209 }; 1210 return reader->ReadDataSection(record_callback, false); 1211 } 1212 1213 bool RecordCommand::DumpAdditionalFeatures( 1214 const std::vector<std::string>& args) { 1215 // Read data section of perf.data to collect hit file information. 1216 thread_tree_.ClearThreadAndMap(); 1217 if (CheckKernelSymbolAddresses()) { 1218 Dso::ReadKernelSymbolsFromProc(); 1219 } 1220 auto callback = [&](const Record* r) { 1221 thread_tree_.Update(*r); 1222 if (r->type() == PERF_RECORD_SAMPLE) { 1223 CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r)); 1224 } 1225 }; 1226 if (!record_file_writer_->ReadDataSection(callback)) { 1227 return false; 1228 } 1229 1230 size_t feature_count = 5; 1231 if (branch_sampling_) { 1232 feature_count++; 1233 } 1234 if (dump_symbols_) { 1235 feature_count++; 1236 } 1237 if (!record_file_writer_->BeginWriteFeatures(feature_count)) { 1238 return false; 1239 } 1240 if (!DumpBuildIdFeature()) { 1241 return false; 1242 } 1243 if (dump_symbols_ && !DumpFileFeature()) { 1244 return false; 1245 } 1246 utsname uname_buf; 1247 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) { 1248 PLOG(ERROR) << "uname() failed"; 1249 return false; 1250 } 1251 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, 1252 uname_buf.release)) { 1253 return false; 1254 } 1255 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, 1256 uname_buf.machine)) { 1257 return false; 1258 } 1259 1260 std::string exec_path = android::base::GetExecutablePath(); 1261 if (exec_path.empty()) exec_path = "simpleperf"; 1262 std::vector<std::string> cmdline; 1263 cmdline.push_back(exec_path); 1264 cmdline.push_back("record"); 1265 cmdline.insert(cmdline.end(), args.begin(), args.end()); 1266 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) { 1267 return false; 1268 } 1269 if (branch_sampling_ != 0 && 1270 !record_file_writer_->WriteBranchStackFeature()) { 1271 return false; 1272 } 1273 if (!DumpMetaInfoFeature()) { 1274 return false; 1275 } 1276 1277 if (!record_file_writer_->EndWriteFeatures()) { 1278 return false; 1279 } 1280 return true; 1281 } 1282 1283 bool RecordCommand::DumpBuildIdFeature() { 1284 std::vector<BuildIdRecord> build_id_records; 1285 BuildId build_id; 1286 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos(); 1287 for (Dso* dso : dso_v) { 1288 if (!dso->HasDumpId()) { 1289 continue; 1290 } 1291 if (dso->type() == DSO_KERNEL) { 1292 if (!GetKernelBuildId(&build_id)) { 1293 continue; 1294 } 1295 build_id_records.push_back( 1296 BuildIdRecord(true, UINT_MAX, build_id, dso->Path())); 1297 } else if (dso->type() == DSO_KERNEL_MODULE) { 1298 std::string path = dso->Path(); 1299 std::string module_name = basename(&path[0]); 1300 if (android::base::EndsWith(module_name, ".ko")) { 1301 module_name = module_name.substr(0, module_name.size() - 3); 1302 } 1303 if (!GetModuleBuildId(module_name, &build_id)) { 1304 LOG(DEBUG) << "can't read build_id for module " << module_name; 1305 continue; 1306 } 1307 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path)); 1308 } else { 1309 if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) { 1310 continue; 1311 } 1312 auto tuple = SplitUrlInApk(dso->Path()); 1313 if (std::get<0>(tuple)) { 1314 ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple), 1315 std::get<2>(tuple), &build_id); 1316 if (result != ElfStatus::NO_ERROR) { 1317 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": " 1318 << result; 1319 continue; 1320 } 1321 } else { 1322 ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id); 1323 if (result != ElfStatus::NO_ERROR) { 1324 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": " 1325 << result; 1326 continue; 1327 } 1328 } 1329 build_id_records.push_back( 1330 BuildIdRecord(false, UINT_MAX, build_id, dso->Path())); 1331 } 1332 } 1333 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) { 1334 return false; 1335 } 1336 return true; 1337 } 1338 1339 bool RecordCommand::DumpFileFeature() { 1340 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos(); 1341 return record_file_writer_->WriteFileFeatures(thread_tree_.GetAllDsos()); 1342 } 1343 1344 bool RecordCommand::DumpMetaInfoFeature() { 1345 std::unordered_map<std::string, std::string> info_map; 1346 info_map["simpleperf_version"] = GetSimpleperfVersion(); 1347 info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false"; 1348 info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false"; 1349 // By storing event types information in perf.data, the readers of perf.data have the same 1350 // understanding of event types, even if they are on another machine. 1351 info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents()); 1352 #if defined(__ANDROID__) 1353 info_map["product_props"] = android::base::StringPrintf("%s:%s:%s", 1354 android::base::GetProperty("ro.product.manufacturer", "").c_str(), 1355 android::base::GetProperty("ro.product.model", "").c_str(), 1356 android::base::GetProperty("ro.product.name", "").c_str()); 1357 info_map["android_version"] = android::base::GetProperty("ro.build.version.release", ""); 1358 #endif 1359 info_map["clockid"] = clockid_; 1360 info_map["timestamp"] = std::to_string(time(nullptr)); 1361 return record_file_writer_->WriteMetaInfoFeature(info_map); 1362 } 1363 1364 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) { 1365 const ThreadEntry* thread = 1366 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid); 1367 const MapEntry* map = 1368 thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel()); 1369 Dso* dso = map->dso; 1370 const Symbol* symbol; 1371 if (dump_symbols_) { 1372 symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso); 1373 if (!symbol->HasDumpId()) { 1374 dso->CreateSymbolDumpId(symbol); 1375 } 1376 } 1377 if (!dso->HasDumpId()) { 1378 dso->CreateDumpId(); 1379 } 1380 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) { 1381 bool in_kernel = r.InKernel(); 1382 bool first_ip = true; 1383 for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) { 1384 uint64_t ip = r.callchain_data.ips[i]; 1385 if (ip >= PERF_CONTEXT_MAX) { 1386 switch (ip) { 1387 case PERF_CONTEXT_KERNEL: 1388 in_kernel = true; 1389 break; 1390 case PERF_CONTEXT_USER: 1391 in_kernel = false; 1392 break; 1393 default: 1394 LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex 1395 << ip; 1396 } 1397 } else { 1398 if (first_ip) { 1399 first_ip = false; 1400 // Remove duplication with sample ip. 1401 if (ip == r.ip_data.ip) { 1402 continue; 1403 } 1404 } 1405 map = thread_tree_.FindMap(thread, ip, in_kernel); 1406 dso = map->dso; 1407 if (dump_symbols_) { 1408 symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso); 1409 if (!symbol->HasDumpId()) { 1410 dso->CreateSymbolDumpId(symbol); 1411 } 1412 } 1413 if (!dso->HasDumpId()) { 1414 dso->CreateDumpId(); 1415 } 1416 } 1417 } 1418 } 1419 } 1420 1421 void RegisterRecordCommand() { 1422 RegisterCommand("record", 1423 [] { return std::unique_ptr<Command>(new RecordCommand()); }); 1424 } 1425