Home | History | Annotate | Download | only in simpleperf
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <libgen.h>
     18 #include <poll.h>
     19 #include <signal.h>
     20 #include <sys/utsname.h>
     21 #include <unistd.h>
     22 #include <set>
     23 #include <string>
     24 #include <unordered_map>
     25 #include <vector>
     26 
     27 #include <android-base/logging.h>
     28 #include <android-base/strings.h>
     29 
     30 #include "command.h"
     31 #include "dwarf_unwind.h"
     32 #include "environment.h"
     33 #include "event_selection_set.h"
     34 #include "event_type.h"
     35 #include "read_apk.h"
     36 #include "read_elf.h"
     37 #include "record.h"
     38 #include "record_file.h"
     39 #include "scoped_signal_handler.h"
     40 #include "thread_tree.h"
     41 #include "utils.h"
     42 #include "workload.h"
     43 
     44 static std::string default_measured_event_type = "cpu-cycles";
     45 
     46 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
     47     {"u", PERF_SAMPLE_BRANCH_USER},
     48     {"k", PERF_SAMPLE_BRANCH_KERNEL},
     49     {"any", PERF_SAMPLE_BRANCH_ANY},
     50     {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
     51     {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
     52     {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
     53 };
     54 
     55 static volatile bool signaled;
     56 static void signal_handler(int) {
     57   signaled = true;
     58 }
     59 
     60 // Used in cpu-hotplug test.
     61 bool system_wide_perf_event_open_failed = false;
     62 
     63 class RecordCommand : public Command {
     64  public:
     65   RecordCommand()
     66       : Command(
     67             "record", "record sampling info in perf.data",
     68             "Usage: simpleperf record [options] [command [command-args]]\n"
     69             "    Gather sampling information when running [command].\n"
     70             "    -a           System-wide collection.\n"
     71             "    -b           Enable take branch stack sampling. Same as '-j any'\n"
     72             "    -c count     Set event sample period.\n"
     73             "    --call-graph fp | dwarf[,<dump_stack_size>]\n"
     74             "                 Enable call graph recording. Use frame pointer or dwarf as the\n"
     75             "                 method to parse call graph in stack. Default is dwarf,8192.\n"
     76             "    --cpu cpu_item1,cpu_item2,...\n"
     77             "                 Collect samples only on the selected cpus. cpu_item can be cpu\n"
     78             "                 number like 1, or cpu range like 0-3.\n"
     79             "    -e event1[:modifier1],event2[:modifier2],...\n"
     80             "                 Select the event list to sample. Use `simpleperf list` to find\n"
     81             "                 all possible event names. Modifiers can be added to define\n"
     82             "                 how the event should be monitored. Possible modifiers are:\n"
     83             "                   u - monitor user space events only\n"
     84             "                   k - monitor kernel space events only\n"
     85             "    -f freq      Set event sample frequency.\n"
     86             "    -F freq      Same as '-f freq'.\n"
     87             "    -g           Same as '--call-graph dwarf'.\n"
     88             "    -j branch_filter1,branch_filter2,...\n"
     89             "                 Enable taken branch stack sampling. Each sample\n"
     90             "                 captures a series of consecutive taken branches.\n"
     91             "                 The following filters are defined:\n"
     92             "                   any: any type of branch\n"
     93             "                   any_call: any function call or system call\n"
     94             "                   any_ret: any function return or system call return\n"
     95             "                   ind_call: any indirect branch\n"
     96             "                   u: only when the branch target is at the user level\n"
     97             "                   k: only when the branch target is in the kernel\n"
     98             "                 This option requires at least one branch type among any,\n"
     99             "                 any_call, any_ret, ind_call.\n"
    100             "    -m mmap_pages\n"
    101             "                 Set the size of the buffer used to receiving sample data from\n"
    102             "                 the kernel. It should be a power of 2. The default value is 16.\n"
    103             "    --no-inherit\n"
    104             "                 Don't record created child threads/processes.\n"
    105             "    --no-unwind  If `--call-graph dwarf` option is used, then the user's stack will\n"
    106             "                 be unwound by default. Use this option to disable the unwinding of\n"
    107             "                 the user's stack.\n"
    108             "    -o record_file_name    Set record file name, default is perf.data.\n"
    109             "    -p pid1,pid2,...\n"
    110             "                 Record events on existing processes. Mutually exclusive with -a.\n"
    111             "    --post-unwind\n"
    112             "                 If `--call-graph dwarf` option is used, then the user's stack will\n"
    113             "                 be unwound while recording by default. But it may lose records as\n"
    114             "                 stacking unwinding can be time consuming. Use this option to unwind\n"
    115             "                 the user's stack after recording.\n"
    116             "    -t tid1,tid2,...\n"
    117             "                 Record events on existing threads. Mutually exclusive with -a.\n"),
    118         use_sample_freq_(true),
    119         sample_freq_(4000),
    120         system_wide_collection_(false),
    121         branch_sampling_(0),
    122         fp_callchain_sampling_(false),
    123         dwarf_callchain_sampling_(false),
    124         dump_stack_size_in_dwarf_sampling_(8192),
    125         unwind_dwarf_callchain_(true),
    126         post_unwind_(false),
    127         child_inherit_(true),
    128         perf_mmap_pages_(16),
    129         record_filename_("perf.data"),
    130         sample_record_count_(0) {
    131     signaled = false;
    132     scoped_signal_handler_.reset(
    133         new ScopedSignalHandler({SIGCHLD, SIGINT, SIGTERM}, signal_handler));
    134   }
    135 
    136   bool Run(const std::vector<std::string>& args);
    137 
    138   static bool ReadMmapDataCallback(const char* data, size_t size);
    139 
    140  private:
    141   bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args);
    142   bool AddMeasuredEventType(const std::string& event_type_name);
    143   bool SetEventSelection();
    144   bool CreateAndInitRecordFile();
    145   std::unique_ptr<RecordFileWriter> CreateRecordFile(const std::string& filename);
    146   bool DumpKernelAndModuleMmaps();
    147   bool DumpThreadCommAndMmaps(bool all_threads, const std::vector<pid_t>& selected_threads);
    148   bool CollectRecordsFromKernel(const char* data, size_t size);
    149   bool ProcessRecord(Record* record);
    150   void UpdateRecordForEmbeddedElfPath(Record* record);
    151   void UnwindRecord(Record* record);
    152   bool PostUnwind(const std::vector<std::string>& args);
    153   bool DumpAdditionalFeatures(const std::vector<std::string>& args);
    154   bool DumpBuildIdFeature();
    155   void CollectHitFileInfo(Record* record);
    156   std::pair<std::string, uint64_t> TestForEmbeddedElf(Dso *dso, uint64_t pgoff);
    157 
    158   bool use_sample_freq_;    // Use sample_freq_ when true, otherwise using sample_period_.
    159   uint64_t sample_freq_;    // Sample 'sample_freq_' times per second.
    160   uint64_t sample_period_;  // Sample once when 'sample_period_' events occur.
    161 
    162   bool system_wide_collection_;
    163   uint64_t branch_sampling_;
    164   bool fp_callchain_sampling_;
    165   bool dwarf_callchain_sampling_;
    166   uint32_t dump_stack_size_in_dwarf_sampling_;
    167   bool unwind_dwarf_callchain_;
    168   bool post_unwind_;
    169   bool child_inherit_;
    170   std::vector<pid_t> monitored_threads_;
    171   std::vector<int> cpus_;
    172   std::vector<EventTypeAndModifier> measured_event_types_;
    173   EventSelectionSet event_selection_set_;
    174 
    175   // mmap pages used by each perf event file, should be a power of 2.
    176   size_t perf_mmap_pages_;
    177 
    178   std::unique_ptr<RecordCache> record_cache_;
    179   ThreadTree thread_tree_;
    180   std::string record_filename_;
    181   std::unique_ptr<RecordFileWriter> record_file_writer_;
    182 
    183   std::set<std::string> hit_kernel_modules_;
    184   std::set<std::string> hit_user_files_;
    185 
    186   std::unique_ptr<ScopedSignalHandler> scoped_signal_handler_;
    187   uint64_t sample_record_count_;
    188 };
    189 
    190 bool RecordCommand::Run(const std::vector<std::string>& args) {
    191   if (!CheckPerfEventLimit()) {
    192     return false;
    193   }
    194 
    195   // 1. Parse options, and use default measured event type if not given.
    196   std::vector<std::string> workload_args;
    197   if (!ParseOptions(args, &workload_args)) {
    198     return false;
    199   }
    200   if (measured_event_types_.empty()) {
    201     if (!AddMeasuredEventType(default_measured_event_type)) {
    202       return false;
    203     }
    204   }
    205   if (!SetEventSelection()) {
    206     return false;
    207   }
    208 
    209   // 2. Create workload.
    210   std::unique_ptr<Workload> workload;
    211   if (!workload_args.empty()) {
    212     workload = Workload::CreateWorkload(workload_args);
    213     if (workload == nullptr) {
    214       return false;
    215     }
    216   }
    217   if (!system_wide_collection_ && monitored_threads_.empty()) {
    218     if (workload != nullptr) {
    219       monitored_threads_.push_back(workload->GetPid());
    220       event_selection_set_.SetEnableOnExec(true);
    221     } else {
    222       LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help\n";
    223       return false;
    224     }
    225   }
    226 
    227   // 3. Open perf_event_files, create memory mapped buffers for perf_event_files, add prepare poll
    228   //    for perf_event_files.
    229   if (system_wide_collection_) {
    230     if (!event_selection_set_.OpenEventFilesForCpus(cpus_)) {
    231       system_wide_perf_event_open_failed = true;
    232       return false;
    233     }
    234   } else {
    235     if (!event_selection_set_.OpenEventFilesForThreadsOnCpus(monitored_threads_, cpus_)) {
    236       return false;
    237     }
    238   }
    239   if (!event_selection_set_.MmapEventFiles(perf_mmap_pages_)) {
    240     return false;
    241   }
    242   std::vector<pollfd> pollfds;
    243   event_selection_set_.PreparePollForEventFiles(&pollfds);
    244 
    245   // 4. Create perf.data.
    246   if (!CreateAndInitRecordFile()) {
    247     return false;
    248   }
    249 
    250   // 5. Write records in mmap buffers of perf_event_files to output file while workload is running.
    251   if (workload != nullptr && !workload->Start()) {
    252     return false;
    253   }
    254   record_cache_.reset(
    255       new RecordCache(*event_selection_set_.FindEventAttrByType(measured_event_types_[0])));
    256   auto callback = std::bind(&RecordCommand::CollectRecordsFromKernel, this, std::placeholders::_1,
    257                             std::placeholders::_2);
    258   while (true) {
    259     if (!event_selection_set_.ReadMmapEventData(callback)) {
    260       return false;
    261     }
    262     if (signaled) {
    263       break;
    264     }
    265     poll(&pollfds[0], pollfds.size(), -1);
    266   }
    267   std::vector<std::unique_ptr<Record>> records = record_cache_->PopAll();
    268   for (auto& r : records) {
    269     if (!ProcessRecord(r.get())) {
    270       return false;
    271     }
    272   }
    273 
    274   // 6. Dump additional features, and close record file.
    275   if (!DumpAdditionalFeatures(args)) {
    276     return false;
    277   }
    278   if (!record_file_writer_->Close()) {
    279     return false;
    280   }
    281 
    282   // 7. Unwind dwarf callchain.
    283   if (post_unwind_) {
    284     if (!PostUnwind(args)) {
    285       return false;
    286     }
    287   }
    288   LOG(VERBOSE) << "Record " << sample_record_count_ << " samples.";
    289   return true;
    290 }
    291 
    292 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
    293                                  std::vector<std::string>* non_option_args) {
    294   std::set<pid_t> tid_set;
    295   size_t i;
    296   for (i = 0; i < args.size() && args[i].size() > 0 && args[i][0] == '-'; ++i) {
    297     if (args[i] == "-a") {
    298       system_wide_collection_ = true;
    299     } else if (args[i] == "-b") {
    300       branch_sampling_ = branch_sampling_type_map["any"];
    301     } else if (args[i] == "-c") {
    302       if (!NextArgumentOrError(args, &i)) {
    303         return false;
    304       }
    305       char* endptr;
    306       sample_period_ = strtoull(args[i].c_str(), &endptr, 0);
    307       if (*endptr != '\0' || sample_period_ == 0) {
    308         LOG(ERROR) << "Invalid sample period: '" << args[i] << "'";
    309         return false;
    310       }
    311       use_sample_freq_ = false;
    312     } else if (args[i] == "--call-graph") {
    313       if (!NextArgumentOrError(args, &i)) {
    314         return false;
    315       }
    316       std::vector<std::string> strs = android::base::Split(args[i], ",");
    317       if (strs[0] == "fp") {
    318         fp_callchain_sampling_ = true;
    319         dwarf_callchain_sampling_ = false;
    320       } else if (strs[0] == "dwarf") {
    321         fp_callchain_sampling_ = false;
    322         dwarf_callchain_sampling_ = true;
    323         if (strs.size() > 1) {
    324           char* endptr;
    325           uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
    326           if (*endptr != '\0' || size > UINT_MAX) {
    327             LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
    328             return false;
    329           }
    330           if ((size & 7) != 0) {
    331             LOG(ERROR) << "dump stack size " << size << " is not 8-byte aligned.";
    332             return false;
    333           }
    334           dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
    335         }
    336       } else {
    337         LOG(ERROR) << "unexpected argument for --call-graph option: " << args[i];
    338         return false;
    339       }
    340     } else if (args[i] == "--cpu") {
    341       if (!NextArgumentOrError(args, &i)) {
    342         return false;
    343       }
    344       cpus_ = GetCpusFromString(args[i]);
    345     } else if (args[i] == "-e") {
    346       if (!NextArgumentOrError(args, &i)) {
    347         return false;
    348       }
    349       std::vector<std::string> event_types = android::base::Split(args[i], ",");
    350       for (auto& event_type : event_types) {
    351         if (!AddMeasuredEventType(event_type)) {
    352           return false;
    353         }
    354       }
    355     } else if (args[i] == "-f" || args[i] == "-F") {
    356       if (!NextArgumentOrError(args, &i)) {
    357         return false;
    358       }
    359       char* endptr;
    360       sample_freq_ = strtoull(args[i].c_str(), &endptr, 0);
    361       if (*endptr != '\0' || sample_freq_ == 0) {
    362         LOG(ERROR) << "Invalid sample frequency: '" << args[i] << "'";
    363         return false;
    364       }
    365       use_sample_freq_ = true;
    366     } else if (args[i] == "-g") {
    367       fp_callchain_sampling_ = false;
    368       dwarf_callchain_sampling_ = true;
    369     } else if (args[i] == "-j") {
    370       if (!NextArgumentOrError(args, &i)) {
    371         return false;
    372       }
    373       std::vector<std::string> branch_sampling_types = android::base::Split(args[i], ",");
    374       for (auto& type : branch_sampling_types) {
    375         auto it = branch_sampling_type_map.find(type);
    376         if (it == branch_sampling_type_map.end()) {
    377           LOG(ERROR) << "unrecognized branch sampling filter: " << type;
    378           return false;
    379         }
    380         branch_sampling_ |= it->second;
    381       }
    382     } else if (args[i] == "-m") {
    383       if (!NextArgumentOrError(args, &i)) {
    384         return false;
    385       }
    386       char* endptr;
    387       uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
    388       if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
    389         LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
    390         return false;
    391       }
    392       perf_mmap_pages_ = pages;
    393     } else if (args[i] == "--no-inherit") {
    394       child_inherit_ = false;
    395     } else if (args[i] == "--no-unwind") {
    396       unwind_dwarf_callchain_ = false;
    397     } else if (args[i] == "-o") {
    398       if (!NextArgumentOrError(args, &i)) {
    399         return false;
    400       }
    401       record_filename_ = args[i];
    402     } else if (args[i] == "-p") {
    403       if (!NextArgumentOrError(args, &i)) {
    404         return false;
    405       }
    406       if (!GetValidThreadsFromProcessString(args[i], &tid_set)) {
    407         return false;
    408       }
    409     } else if (args[i] == "--post-unwind") {
    410       post_unwind_ = true;
    411     } else if (args[i] == "-t") {
    412       if (!NextArgumentOrError(args, &i)) {
    413         return false;
    414       }
    415       if (!GetValidThreadsFromThreadString(args[i], &tid_set)) {
    416         return false;
    417       }
    418     } else {
    419       ReportUnknownOption(args, i);
    420       return false;
    421     }
    422   }
    423 
    424   if (!dwarf_callchain_sampling_) {
    425     if (!unwind_dwarf_callchain_) {
    426       LOG(ERROR) << "--no-unwind is only used with `--call-graph dwarf` option.";
    427       return false;
    428     }
    429     unwind_dwarf_callchain_ = false;
    430   }
    431   if (post_unwind_) {
    432     if (!dwarf_callchain_sampling_) {
    433       LOG(ERROR) << "--post-unwind is only used with `--call-graph dwarf` option.";
    434       return false;
    435     }
    436     if (!unwind_dwarf_callchain_) {
    437       LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option.";
    438       return false;
    439     }
    440   }
    441 
    442   monitored_threads_.insert(monitored_threads_.end(), tid_set.begin(), tid_set.end());
    443   if (system_wide_collection_ && !monitored_threads_.empty()) {
    444     LOG(ERROR)
    445         << "Record system wide and existing processes/threads can't be used at the same time.";
    446     return false;
    447   }
    448 
    449   if (non_option_args != nullptr) {
    450     non_option_args->clear();
    451     for (; i < args.size(); ++i) {
    452       non_option_args->push_back(args[i]);
    453     }
    454   }
    455   return true;
    456 }
    457 
    458 bool RecordCommand::AddMeasuredEventType(const std::string& event_type_name) {
    459   std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType(event_type_name);
    460   if (event_type_modifier == nullptr) {
    461     return false;
    462   }
    463   measured_event_types_.push_back(*event_type_modifier);
    464   return true;
    465 }
    466 
    467 bool RecordCommand::SetEventSelection() {
    468   for (auto& event_type : measured_event_types_) {
    469     if (!event_selection_set_.AddEventType(event_type)) {
    470       return false;
    471     }
    472   }
    473   if (use_sample_freq_) {
    474     event_selection_set_.SetSampleFreq(sample_freq_);
    475   } else {
    476     event_selection_set_.SetSamplePeriod(sample_period_);
    477   }
    478   event_selection_set_.SampleIdAll();
    479   if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
    480     return false;
    481   }
    482   if (fp_callchain_sampling_) {
    483     event_selection_set_.EnableFpCallChainSampling();
    484   } else if (dwarf_callchain_sampling_) {
    485     if (!event_selection_set_.EnableDwarfCallChainSampling(dump_stack_size_in_dwarf_sampling_)) {
    486       return false;
    487     }
    488   }
    489   event_selection_set_.SetInherit(child_inherit_);
    490   return true;
    491 }
    492 
    493 bool RecordCommand::CreateAndInitRecordFile() {
    494   record_file_writer_ = CreateRecordFile(record_filename_);
    495   if (record_file_writer_ == nullptr) {
    496     return false;
    497   }
    498   if (!DumpKernelAndModuleMmaps()) {
    499     return false;
    500   }
    501   if (!DumpThreadCommAndMmaps(system_wide_collection_, monitored_threads_)) {
    502     return false;
    503   }
    504   return true;
    505 }
    506 
    507 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(const std::string& filename) {
    508   std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(filename);
    509   if (writer == nullptr) {
    510     return nullptr;
    511   }
    512 
    513   std::vector<AttrWithId> attr_ids;
    514   for (auto& event_type : measured_event_types_) {
    515     AttrWithId attr_id;
    516     attr_id.attr = event_selection_set_.FindEventAttrByType(event_type);
    517     CHECK(attr_id.attr != nullptr);
    518     const std::vector<std::unique_ptr<EventFd>>* fds =
    519         event_selection_set_.FindEventFdsByType(event_type);
    520     CHECK(fds != nullptr);
    521     for (auto& fd : *fds) {
    522       attr_id.ids.push_back(fd->Id());
    523     }
    524     attr_ids.push_back(attr_id);
    525   }
    526   if (!writer->WriteAttrSection(attr_ids)) {
    527     return nullptr;
    528   }
    529   return writer;
    530 }
    531 
    532 bool RecordCommand::DumpKernelAndModuleMmaps() {
    533   KernelMmap kernel_mmap;
    534   std::vector<KernelMmap> module_mmaps;
    535   GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
    536 
    537   const perf_event_attr* attr = event_selection_set_.FindEventAttrByType(measured_event_types_[0]);
    538   CHECK(attr != nullptr);
    539   MmapRecord mmap_record = CreateMmapRecord(*attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
    540                                             kernel_mmap.len, 0, kernel_mmap.filepath);
    541   if (!ProcessRecord(&mmap_record)) {
    542     return false;
    543   }
    544   for (auto& module_mmap : module_mmaps) {
    545     MmapRecord mmap_record = CreateMmapRecord(*attr, true, UINT_MAX, 0, module_mmap.start_addr,
    546                                               module_mmap.len, 0, module_mmap.filepath);
    547     if (!ProcessRecord(&mmap_record)) {
    548       return false;
    549     }
    550   }
    551   return true;
    552 }
    553 
    554 bool RecordCommand::DumpThreadCommAndMmaps(bool all_threads,
    555                                            const std::vector<pid_t>& selected_threads) {
    556   std::vector<ThreadComm> thread_comms;
    557   if (!GetThreadComms(&thread_comms)) {
    558     return false;
    559   }
    560   // Decide which processes and threads to dump.
    561   std::set<pid_t> dump_processes;
    562   std::set<pid_t> dump_threads;
    563   for (auto& tid : selected_threads) {
    564     dump_threads.insert(tid);
    565   }
    566   for (auto& thread : thread_comms) {
    567     if (dump_threads.find(thread.tid) != dump_threads.end()) {
    568       dump_processes.insert(thread.pid);
    569     }
    570   }
    571 
    572   const perf_event_attr* attr = event_selection_set_.FindEventAttrByType(measured_event_types_[0]);
    573   CHECK(attr != nullptr);
    574 
    575   // Dump processes.
    576   for (auto& thread : thread_comms) {
    577     if (thread.pid != thread.tid) {
    578       continue;
    579     }
    580     if (!all_threads && dump_processes.find(thread.pid) == dump_processes.end()) {
    581       continue;
    582     }
    583     CommRecord record = CreateCommRecord(*attr, thread.pid, thread.tid, thread.comm);
    584     if (!ProcessRecord(&record)) {
    585       return false;
    586     }
    587     std::vector<ThreadMmap> thread_mmaps;
    588     if (!GetThreadMmapsInProcess(thread.pid, &thread_mmaps)) {
    589       // The thread may exit before we get its info.
    590       continue;
    591     }
    592     for (auto& thread_mmap : thread_mmaps) {
    593       if (thread_mmap.executable == 0) {
    594         continue;  // No need to dump non-executable mmap info.
    595       }
    596       MmapRecord record =
    597           CreateMmapRecord(*attr, false, thread.pid, thread.tid, thread_mmap.start_addr,
    598                            thread_mmap.len, thread_mmap.pgoff, thread_mmap.name);
    599       if (!ProcessRecord(&record)) {
    600         return false;
    601       }
    602     }
    603   }
    604 
    605   // Dump threads.
    606   for (auto& thread : thread_comms) {
    607     if (thread.pid == thread.tid) {
    608       continue;
    609     }
    610     if (!all_threads && dump_threads.find(thread.tid) == dump_threads.end()) {
    611       continue;
    612     }
    613     ForkRecord fork_record = CreateForkRecord(*attr, thread.pid, thread.tid, thread.pid, thread.pid);
    614     if (!ProcessRecord(&fork_record)) {
    615       return false;
    616     }
    617     CommRecord comm_record = CreateCommRecord(*attr, thread.pid, thread.tid, thread.comm);
    618     if (!ProcessRecord(&comm_record)) {
    619       return false;
    620     }
    621   }
    622   return true;
    623 }
    624 
    625 bool RecordCommand::CollectRecordsFromKernel(const char* data, size_t size) {
    626   record_cache_->Push(data, size);
    627   while (true) {
    628     std::unique_ptr<Record> r = record_cache_->Pop();
    629     if (r == nullptr) {
    630       break;
    631     }
    632     if (!ProcessRecord(r.get())) {
    633       return false;
    634     }
    635   }
    636   return true;
    637 }
    638 
    639 bool RecordCommand::ProcessRecord(Record* record) {
    640   UpdateRecordForEmbeddedElfPath(record);
    641   BuildThreadTree(*record, &thread_tree_);
    642   CollectHitFileInfo(record);
    643   if (unwind_dwarf_callchain_ && !post_unwind_) {
    644     UnwindRecord(record);
    645   }
    646   if (record->type() == PERF_RECORD_SAMPLE) {
    647     sample_record_count_++;
    648   }
    649   bool result = record_file_writer_->WriteData(record->BinaryFormat());
    650   return result;
    651 }
    652 
    653 template<class RecordType>
    654 void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
    655   RecordType& r = *record;
    656   bool in_kernel = ((r.header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_KERNEL);
    657   if (!in_kernel && r.data.pgoff != 0) {
    658     // For the case of a shared library "foobar.so" embedded
    659     // inside an APK, we rewrite the original MMAP from
    660     // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
    661     // so as to make the library name explicit. This update is
    662     // done here (as part of the record operation) as opposed to
    663     // on the host during the report, since we want to report
    664     // the correct library name even if the the APK in question
    665     // is not present on the host. The new offset W is
    666     // calculated to be with respect to the start of foobar.so,
    667     // not to the start of path.apk.
    668     EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(r.filename, r.data.pgoff);
    669     if (ee != nullptr) {
    670       // Compute new offset relative to start of elf in APK.
    671       r.data.pgoff -= ee->entry_offset();
    672       r.filename = GetUrlInApk(r.filename, ee->entry_name());
    673       r.AdjustSizeBasedOnData();
    674     }
    675   }
    676 }
    677 
    678 void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
    679   if (record->type() == PERF_RECORD_MMAP) {
    680     UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
    681   } else if (record->type() == PERF_RECORD_MMAP2) {
    682     UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
    683   }
    684 }
    685 
    686 void RecordCommand::UnwindRecord(Record* record) {
    687   if (record->type() == PERF_RECORD_SAMPLE) {
    688     SampleRecord& r = *static_cast<SampleRecord*>(record);
    689     if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
    690         (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
    691         (!r.stack_user_data.data.empty())) {
    692       ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
    693       RegSet regs = CreateRegSet(r.regs_user_data.reg_mask, r.regs_user_data.regs);
    694       std::vector<char>& stack = r.stack_user_data.data;
    695       std::vector<uint64_t> unwind_ips = UnwindCallChain(GetBuildArch(), *thread, regs, stack);
    696       r.callchain_data.ips.push_back(PERF_CONTEXT_USER);
    697       r.callchain_data.ips.insert(r.callchain_data.ips.end(), unwind_ips.begin(), unwind_ips.end());
    698       r.regs_user_data.abi = 0;
    699       r.regs_user_data.reg_mask = 0;
    700       r.regs_user_data.regs.clear();
    701       r.stack_user_data.data.clear();
    702       r.stack_user_data.dyn_size = 0;
    703       r.AdjustSizeBasedOnData();
    704     }
    705   }
    706 }
    707 
    708 bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
    709   thread_tree_.Clear();
    710   std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(record_filename_);
    711   if (reader == nullptr) {
    712     return false;
    713   }
    714   std::string tmp_filename = record_filename_ + ".tmp";
    715   record_file_writer_ = CreateRecordFile(tmp_filename);
    716   if (record_file_writer_ == nullptr) {
    717     return false;
    718   }
    719   bool result = reader->ReadDataSection(
    720       [this](std::unique_ptr<Record> record) {
    721         BuildThreadTree(*record, &thread_tree_);
    722         UnwindRecord(record.get());
    723         return record_file_writer_->WriteData(record->BinaryFormat());
    724       },
    725       false);
    726   if (!result) {
    727     return false;
    728   }
    729   if (!DumpAdditionalFeatures(args)) {
    730     return false;
    731   }
    732   if (!record_file_writer_->Close()) {
    733     return false;
    734   }
    735 
    736   if (unlink(record_filename_.c_str()) != 0) {
    737     PLOG(ERROR) << "failed to remove " << record_filename_;
    738     return false;
    739   }
    740   if (rename(tmp_filename.c_str(), record_filename_.c_str()) != 0) {
    741     PLOG(ERROR) << "failed to rename " << tmp_filename << " to " << record_filename_;
    742     return false;
    743   }
    744   return true;
    745 }
    746 
    747 bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args) {
    748   size_t feature_count = (branch_sampling_ != 0 ? 5 : 4);
    749   if (!record_file_writer_->WriteFeatureHeader(feature_count)) {
    750     return false;
    751   }
    752   if (!DumpBuildIdFeature()) {
    753     return false;
    754   }
    755   utsname uname_buf;
    756   if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
    757     PLOG(ERROR) << "uname() failed";
    758     return false;
    759   }
    760   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, uname_buf.release)) {
    761     return false;
    762   }
    763   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, uname_buf.machine)) {
    764     return false;
    765   }
    766 
    767   std::string exec_path = "simpleperf";
    768   GetExecPath(&exec_path);
    769   std::vector<std::string> cmdline;
    770   cmdline.push_back(exec_path);
    771   cmdline.push_back("record");
    772   cmdline.insert(cmdline.end(), args.begin(), args.end());
    773   if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
    774     return false;
    775   }
    776   if (branch_sampling_ != 0 && !record_file_writer_->WriteBranchStackFeature()) {
    777     return false;
    778   }
    779   return true;
    780 }
    781 
    782 bool RecordCommand::DumpBuildIdFeature() {
    783   std::vector<BuildIdRecord> build_id_records;
    784   BuildId build_id;
    785   // Add build_ids for kernel/modules.
    786   for (const auto& filename : hit_kernel_modules_) {
    787     if (filename == DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID) {
    788       if (!GetKernelBuildId(&build_id)) {
    789         LOG(DEBUG) << "can't read build_id for kernel";
    790         continue;
    791       }
    792       build_id_records.push_back(
    793           CreateBuildIdRecord(true, UINT_MAX, build_id, DEFAULT_KERNEL_FILENAME_FOR_BUILD_ID));
    794     } else {
    795       std::string path = filename;
    796       std::string module_name = basename(&path[0]);
    797       if (android::base::EndsWith(module_name, ".ko")) {
    798         module_name = module_name.substr(0, module_name.size() - 3);
    799       }
    800       if (!GetModuleBuildId(module_name, &build_id)) {
    801         LOG(DEBUG) << "can't read build_id for module " << module_name;
    802         continue;
    803       }
    804       build_id_records.push_back(CreateBuildIdRecord(true, UINT_MAX, build_id, filename));
    805     }
    806   }
    807   // Add build_ids for user elf files.
    808   for (const auto& filename : hit_user_files_) {
    809     if (filename == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
    810       continue;
    811     }
    812     auto tuple = SplitUrlInApk(filename);
    813     if (std::get<0>(tuple)) {
    814       if (!GetBuildIdFromApkFile(std::get<1>(tuple), std::get<2>(tuple), &build_id)) {
    815         LOG(DEBUG) << "can't read build_id from file " << filename;
    816         continue;
    817       }
    818     } else {
    819       if (!GetBuildIdFromElfFile(filename, &build_id)) {
    820         LOG(DEBUG) << "can't read build_id from file " << filename;
    821         continue;
    822       }
    823     }
    824     build_id_records.push_back(CreateBuildIdRecord(false, UINT_MAX, build_id, filename));
    825   }
    826   if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
    827     return false;
    828   }
    829   return true;
    830 }
    831 
    832 void RecordCommand::CollectHitFileInfo(Record* record) {
    833   if (record->type() == PERF_RECORD_SAMPLE) {
    834     auto r = *static_cast<SampleRecord*>(record);
    835     bool in_kernel = ((r.header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_KERNEL);
    836     const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
    837     const MapEntry* map = thread_tree_.FindMap(thread, r.ip_data.ip, in_kernel);
    838     if (in_kernel) {
    839       hit_kernel_modules_.insert(map->dso->Path());
    840     } else {
    841       hit_user_files_.insert(map->dso->Path());
    842     }
    843   }
    844 }
    845 
    846 void RegisterRecordCommand() {
    847   RegisterCommand("record", [] { return std::unique_ptr<Command>(new RecordCommand()); });
    848 }
    849