Home | History | Annotate | Download | only in simpleperf
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <inttypes.h>
     18 #include <libgen.h>
     19 #include <signal.h>
     20 #include <sys/prctl.h>
     21 #include <sys/utsname.h>
     22 #include <time.h>
     23 #include <unistd.h>
     24 #include <set>
     25 #include <string>
     26 #include <unordered_map>
     27 #include <vector>
     28 
     29 #include <android-base/logging.h>
     30 #include <android-base/file.h>
     31 #include <android-base/parsedouble.h>
     32 #include <android-base/parseint.h>
     33 #include <android-base/strings.h>
     34 #include <android-base/test_utils.h>
     35 #if defined(__ANDROID__)
     36 #include <android-base/properties.h>
     37 #endif
     38 
     39 #include "CallChainJoiner.h"
     40 #include "command.h"
     41 #include "environment.h"
     42 #include "event_selection_set.h"
     43 #include "event_type.h"
     44 #include "IOEventLoop.h"
     45 #include "OfflineUnwinder.h"
     46 #include "perf_clock.h"
     47 #include "read_apk.h"
     48 #include "read_elf.h"
     49 #include "record.h"
     50 #include "record_file.h"
     51 #include "thread_tree.h"
     52 #include "tracing.h"
     53 #include "utils.h"
     54 #include "workload.h"
     55 
     56 using namespace simpleperf;
     57 
     58 static std::string default_measured_event_type = "cpu-cycles";
     59 
     60 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
     61     {"u", PERF_SAMPLE_BRANCH_USER},
     62     {"k", PERF_SAMPLE_BRANCH_KERNEL},
     63     {"any", PERF_SAMPLE_BRANCH_ANY},
     64     {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
     65     {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
     66     {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
     67 };
     68 
     69 static std::unordered_map<std::string, int> clockid_map = {
     70     {"realtime", CLOCK_REALTIME},
     71     {"monotonic", CLOCK_MONOTONIC},
     72     {"monotonic_raw", CLOCK_MONOTONIC_RAW},
     73     {"boottime", CLOCK_BOOTTIME},
     74 };
     75 
     76 // The max size of records dumped by kernel is 65535, and dump stack size
     77 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
     78 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
     79 
     80 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
     81 // Here 1024 is a desired value for pages in mapped buffer. If mapped
     82 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
     83 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
     84 
     85 // Cache size used by CallChainJoiner to cache call chains in memory.
     86 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
     87 
     88 class RecordCommand : public Command {
     89  public:
     90   RecordCommand()
     91       : Command(
     92             "record", "record sampling info in perf.data",
     93             // clang-format off
     94 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
     95 "       Gather sampling information of running [command]. And -a/-p/-t option\n"
     96 "       can be used to change target of sampling information.\n"
     97 "       The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
     98 "Select monitored threads:\n"
     99 "-a     System-wide collection.\n"
    100 #if defined(__ANDROID__)
    101 "--app package_name    Profile the process of an Android application.\n"
    102 "                      On non-rooted devices, the app must be debuggable,\n"
    103 "                      because we use run-as to switch to the app's context.\n"
    104 #endif
    105 "-p pid1,pid2,...       Record events on existing processes. Mutually exclusive\n"
    106 "                       with -a.\n"
    107 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
    108 "\n"
    109 "Select monitored event types:\n"
    110 "-e event1[:modifier1],event2[:modifier2],...\n"
    111 "             Select the event list to sample. Use `simpleperf list` to find\n"
    112 "             all possible event names. Modifiers can be added to define how\n"
    113 "             the event should be monitored.\n"
    114 "             Possible modifiers are:\n"
    115 "                u - monitor user space events only\n"
    116 "                k - monitor kernel space events only\n"
    117 "--group event1[:modifier],event2[:modifier2],...\n"
    118 "             Similar to -e option. But events specified in the same --group\n"
    119 "             option are monitored as a group, and scheduled in and out at the\n"
    120 "             same time.\n"
    121 "--trace-offcpu   Generate samples when threads are scheduled off cpu.\n"
    122 "                 Similar to \"-c 1 -e sched:sched_switch\".\n"
    123 "\n"
    124 "Select monitoring options:\n"
    125 "-f freq      Set event sample frequency. It means recording at most [freq]\n"
    126 "             samples every second. For non-tracepoint events, the default\n"
    127 "             option is -f 4000. A -f/-c option affects all event types\n"
    128 "             following it until meeting another -f/-c option. For example,\n"
    129 "             for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
    130 "             has sample freq 1000, sched:sched_switch event has sample period 1.\n"
    131 "-c count     Set event sample period. It means recording one sample when\n"
    132 "             [count] events happen. For tracepoint events, the default option\n"
    133 "             is -c 1.\n"
    134 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
    135 "             Enable call graph recording. Use frame pointer or dwarf debug\n"
    136 "             frame as the method to parse call graph in stack.\n"
    137 "             Default is dwarf,65528.\n"
    138 "-g           Same as '--call-graph dwarf'.\n"
    139 "--clockid clock_id      Generate timestamps of samples using selected clock.\n"
    140 "                        Possible values are: realtime, monotonic,\n"
    141 "                        monotonic_raw, boottime, perf. Default is perf.\n"
    142 "--cpu cpu_item1,cpu_item2,...\n"
    143 "             Collect samples only on the selected cpus. cpu_item can be cpu\n"
    144 "             number like 1, or cpu range like 0-3.\n"
    145 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
    146 "                        [command]. Here time_in_sec may be any positive\n"
    147 "                        floating point number.\n"
    148 "-j branch_filter1,branch_filter2,...\n"
    149 "             Enable taken branch stack sampling. Each sample captures a series\n"
    150 "             of consecutive taken branches.\n"
    151 "             The following filters are defined:\n"
    152 "                any: any type of branch\n"
    153 "                any_call: any function call or system call\n"
    154 "                any_ret: any function return or system call return\n"
    155 "                ind_call: any indirect branch\n"
    156 "                u: only when the branch target is at the user level\n"
    157 "                k: only when the branch target is in the kernel\n"
    158 "             This option requires at least one branch type among any, any_call,\n"
    159 "             any_ret, ind_call.\n"
    160 "-b           Enable taken branch stack sampling. Same as '-j any'.\n"
    161 "-m mmap_pages   Set the size of the buffer used to receiving sample data from\n"
    162 "                the kernel. It should be a power of 2. If not set, the max\n"
    163 "                possible value <= 1024 will be used.\n"
    164 "--no-inherit  Don't record created child threads/processes.\n"
    165 "\n"
    166 "Dwarf unwinding options:\n"
    167 "--no-post-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
    168 "                   will be recorded in perf.data and unwound after recording.\n"
    169 "                   However, this takes a lot of disk space. Use this option to\n"
    170 "                   unwind while recording.\n"
    171 "--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
    172 "              will be unwound by default. Use this option to disable the\n"
    173 "              unwinding of the user's stack.\n"
    174 "--no-callchain-joiner  If `--call-graph dwarf` option is used, then by default\n"
    175 "                       callchain joiner is used to break the 64k stack limit\n"
    176 "                       and build more complete call graphs. However, the built\n"
    177 "                       call graphs may not be correct in all cases.\n"
    178 "--callchain-joiner-min-matching-nodes count\n"
    179 "               When callchain joiner is used, set the matched nodes needed to join\n"
    180 "               callchains. The count should be >= 1. By default it is 1.\n"
    181 "\n"
    182 "Recording file options:\n"
    183 "--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
    184 "                          kernel symbols will be dumped when needed.\n"
    185 "--no-dump-symbols       Don't dump symbols in perf.data. By default symbols are\n"
    186 "                        dumped in perf.data, to support reporting in another\n"
    187 "                        environment.\n"
    188 "-o record_file_name    Set record file name, default is perf.data.\n"
    189 "--exit-with-parent            Stop recording when the process starting\n"
    190 "                              simpleperf dies.\n"
    191 "--start_profiling_fd fd_no    After starting profiling, write \"STARTED\" to\n"
    192 "                              <fd_no>, then close <fd_no>.\n"
    193 "--symfs <dir>    Look for files with symbols relative to this directory.\n"
    194 "                 This option is used to provide files with symbol table and\n"
    195 "                 debug information, which are used for unwinding and dumping symbols.\n"
    196 #if 0
    197 // Below options are only used internally and shouldn't be visible to the public.
    198 "--in-app         We are already running in the app's context.\n"
    199 "--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
    200 #endif
    201             // clang-format on
    202             ),
    203         system_wide_collection_(false),
    204         branch_sampling_(0),
    205         fp_callchain_sampling_(false),
    206         dwarf_callchain_sampling_(false),
    207         dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
    208         unwind_dwarf_callchain_(true),
    209         post_unwind_(true),
    210         child_inherit_(true),
    211         duration_in_sec_(0),
    212         can_dump_kernel_symbols_(true),
    213         dump_symbols_(true),
    214         clockid_("perf"),
    215         event_selection_set_(false),
    216         mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
    217         record_filename_("perf.data"),
    218         start_sampling_time_in_ns_(0),
    219         sample_record_count_(0),
    220         lost_record_count_(0),
    221         start_profiling_fd_(-1),
    222         in_app_context_(false),
    223         trace_offcpu_(false),
    224         exclude_kernel_callchain_(false),
    225         allow_callchain_joiner_(true),
    226         callchain_joiner_min_matching_nodes_(1u) {
    227     // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
    228     // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
    229     // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
    230     // finish properly.
    231     signal(SIGPIPE, SIG_IGN);
    232     app_package_name_ = GetDefaultAppPackageName();
    233   }
    234 
    235   bool Run(const std::vector<std::string>& args);
    236 
    237  private:
    238   bool ParseOptions(const std::vector<std::string>& args,
    239                     std::vector<std::string>* non_option_args);
    240   bool PrepareRecording(Workload* workload);
    241   bool DoRecording(Workload* workload);
    242   bool PostProcessRecording(const std::vector<std::string>& args);
    243   bool TraceOffCpu();
    244   bool SetEventSelectionFlags();
    245   bool CreateAndInitRecordFile();
    246   std::unique_ptr<RecordFileWriter> CreateRecordFile(
    247       const std::string& filename);
    248   bool DumpKernelSymbol();
    249   bool DumpTracingData();
    250   bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
    251   bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id);
    252   bool ProcessRecord(Record* record);
    253   bool SaveRecordForPostUnwinding(Record* record);
    254   bool SaveRecordAfterUnwinding(Record* record);
    255   bool SaveRecordWithoutUnwinding(Record* record);
    256 
    257   void UpdateRecordForEmbeddedElfPath(Record* record);
    258   bool UnwindRecord(SampleRecord& r);
    259   bool PostUnwindRecords();
    260   bool JoinCallChains();
    261   bool DumpAdditionalFeatures(const std::vector<std::string>& args);
    262   bool DumpBuildIdFeature();
    263   bool DumpFileFeature();
    264   bool DumpMetaInfoFeature();
    265   void CollectHitFileInfo(const SampleRecord& r);
    266 
    267   std::unique_ptr<SampleSpeed> sample_speed_;
    268   bool system_wide_collection_;
    269   uint64_t branch_sampling_;
    270   bool fp_callchain_sampling_;
    271   bool dwarf_callchain_sampling_;
    272   uint32_t dump_stack_size_in_dwarf_sampling_;
    273   bool unwind_dwarf_callchain_;
    274   bool post_unwind_;
    275   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
    276   bool child_inherit_;
    277   double duration_in_sec_;
    278   bool can_dump_kernel_symbols_;
    279   bool dump_symbols_;
    280   std::string clockid_;
    281   std::vector<int> cpus_;
    282   EventSelectionSet event_selection_set_;
    283 
    284   std::pair<size_t, size_t> mmap_page_range_;
    285 
    286   ThreadTree thread_tree_;
    287   std::string record_filename_;
    288   std::unique_ptr<RecordFileWriter> record_file_writer_;
    289 
    290   uint64_t start_sampling_time_in_ns_;  // nanoseconds from machine starting
    291 
    292   uint64_t sample_record_count_;
    293   uint64_t lost_record_count_;
    294   int start_profiling_fd_;
    295   std::string app_package_name_;
    296   bool in_app_context_;
    297   bool trace_offcpu_;
    298   bool exclude_kernel_callchain_;
    299 
    300   // For CallChainJoiner
    301   bool allow_callchain_joiner_;
    302   size_t callchain_joiner_min_matching_nodes_;
    303   std::unique_ptr<CallChainJoiner> callchain_joiner_;
    304 };
    305 
    306 bool RecordCommand::Run(const std::vector<std::string>& args) {
    307   ScopedCurrentArch scoped_arch(GetMachineArch());
    308   if (!CheckPerfEventLimit()) {
    309     return false;
    310   }
    311   AllowMoreOpenedFiles();
    312 
    313   std::vector<std::string> workload_args;
    314   if (!ParseOptions(args, &workload_args)) {
    315     return false;
    316   }
    317   ScopedTempFiles scoped_temp_files(android::base::Dirname(record_filename_));
    318   if (!app_package_name_.empty() && !in_app_context_) {
    319     // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
    320     // it will be impossible when using --app. So don't switch to app's context when we are
    321     // root.
    322     if (!IsRoot()) {
    323       return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
    324                              record_filename_, true);
    325     }
    326   }
    327   std::unique_ptr<Workload> workload;
    328   if (!workload_args.empty()) {
    329     workload = Workload::CreateWorkload(workload_args);
    330     if (workload == nullptr) {
    331       return false;
    332     }
    333   }
    334   if (!PrepareRecording(workload.get())) {
    335     return false;
    336   }
    337   if (!DoRecording(workload.get())) {
    338     return false;
    339   }
    340   return PostProcessRecording(args);
    341 }
    342 
    343 bool RecordCommand::PrepareRecording(Workload* workload) {
    344   // 1. Prepare in other modules.
    345   if (!InitPerfClock()) {
    346     return false;
    347   }
    348   PrepareVdsoFile();
    349 
    350   // 2. Add default event type.
    351   if (event_selection_set_.empty()) {
    352     size_t group_id;
    353     if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
    354       return false;
    355     }
    356     if (sample_speed_) {
    357       event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
    358     }
    359   }
    360 
    361   // 3. Process options before opening perf event files.
    362   exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
    363   if (trace_offcpu_ && !TraceOffCpu()) {
    364     return false;
    365   }
    366   if (!SetEventSelectionFlags()) {
    367     return false;
    368   }
    369   if (unwind_dwarf_callchain_) {
    370     offline_unwinder_.reset(new OfflineUnwinder(false));
    371   }
    372   if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
    373     callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
    374                                                 callchain_joiner_min_matching_nodes_,
    375                                                 false));
    376   }
    377 
    378   // 4. Add monitored targets.
    379   bool need_to_check_targets = false;
    380   if (system_wide_collection_) {
    381     event_selection_set_.AddMonitoredThreads({-1});
    382   } else if (!event_selection_set_.HasMonitoredTarget()) {
    383     if (workload != nullptr) {
    384       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
    385       event_selection_set_.SetEnableOnExec(true);
    386       if (event_selection_set_.HasInplaceSampler()) {
    387         // Start worker early, because the worker process has to setup inplace-sampler server
    388         // before we try to connect it.
    389         if (!workload->Start()) {
    390           return false;
    391         }
    392       }
    393     } else if (!app_package_name_.empty()) {
    394       // If app process is not created, wait for it. This allows simpleperf starts before
    395       // app process. In this way, we can have a better support of app start-up time profiling.
    396       std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
    397       event_selection_set_.AddMonitoredProcesses(pids);
    398       need_to_check_targets = true;
    399     } else {
    400       LOG(ERROR)
    401           << "No threads to monitor. Try `simpleperf help record` for help";
    402       return false;
    403     }
    404   } else {
    405     need_to_check_targets = true;
    406   }
    407 
    408   // 5. Open perf event files and create mapped buffers.
    409   if (!event_selection_set_.OpenEventFiles(cpus_)) {
    410     return false;
    411   }
    412   if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first,
    413                                            mmap_page_range_.second)) {
    414     return false;
    415   }
    416 
    417   // 6. Create perf.data.
    418   if (!CreateAndInitRecordFile()) {
    419     return false;
    420   }
    421 
    422   // 7. Add read/signal/periodic Events.
    423   auto callback =
    424       std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
    425   if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
    426     return false;
    427   }
    428   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
    429     return false;
    430   }
    431   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
    432   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM},
    433                              [loop]() { return loop->ExitLoop(); })) {
    434     return false;
    435   }
    436 
    437   // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
    438   if (!SignalIsIgnored(SIGHUP)) {
    439     if (!loop->AddSignalEvent(SIGHUP, [loop]() { return loop->ExitLoop(); })) {
    440       return false;
    441     }
    442   }
    443 
    444   if (duration_in_sec_ != 0) {
    445     if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
    446                                 [loop]() { return loop->ExitLoop(); })) {
    447       return false;
    448     }
    449   }
    450   return true;
    451 }
    452 
    453 bool RecordCommand::DoRecording(Workload* workload) {
    454   // Write records in mapped buffers of perf_event_files to output file while workload is running.
    455   start_sampling_time_in_ns_ = GetPerfClock();
    456   LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns";
    457   if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
    458     return false;
    459   }
    460   if (start_profiling_fd_ != -1) {
    461     if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
    462       PLOG(ERROR) << "failed to write to start_profiling_fd_";
    463     }
    464     close(start_profiling_fd_);
    465   }
    466   if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
    467     return false;
    468   }
    469   if (!event_selection_set_.FinishReadMmapEventData()) {
    470     return false;
    471   }
    472   return true;
    473 }
    474 
    475 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
    476   // 1. Post unwind dwarf callchain.
    477   if (unwind_dwarf_callchain_ && post_unwind_) {
    478     if (!PostUnwindRecords()) {
    479       return false;
    480     }
    481   }
    482 
    483   // 2. Optionally join Callchains.
    484   if (callchain_joiner_) {
    485     JoinCallChains();
    486   }
    487 
    488   // 3. Dump additional features, and close record file.
    489   if (!DumpAdditionalFeatures(args)) {
    490     return false;
    491   }
    492   if (!record_file_writer_->Close()) {
    493     return false;
    494   }
    495 
    496   // 4. Show brief record result.
    497   LOG(INFO) << "Samples recorded: " << sample_record_count_
    498             << ". Samples lost: " << lost_record_count_ << ".";
    499   if (sample_record_count_ + lost_record_count_ != 0) {
    500     double lost_percent = static_cast<double>(lost_record_count_) /
    501                           (lost_record_count_ + sample_record_count_);
    502     constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
    503     if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
    504       LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
    505                    << "consider increasing mmap_pages(-m), "
    506                    << "or decreasing sample frequency(-f), "
    507                    << "or increasing sample period(-c).";
    508     }
    509   }
    510   if (callchain_joiner_) {
    511     callchain_joiner_->DumpStat();
    512   }
    513   return true;
    514 }
    515 
    516 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
    517                                  std::vector<std::string>* non_option_args) {
    518   std::vector<size_t> wait_setting_speed_event_groups_;
    519   size_t i;
    520   for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
    521     if (args[i] == "-a") {
    522       system_wide_collection_ = true;
    523     } else if (args[i] == "--app") {
    524       if (!NextArgumentOrError(args, &i)) {
    525         return false;
    526       }
    527       app_package_name_ = args[i];
    528     } else if (args[i] == "-b") {
    529       branch_sampling_ = branch_sampling_type_map["any"];
    530     } else if (args[i] == "-c" || args[i] == "-f") {
    531       if (!NextArgumentOrError(args, &i)) {
    532         return false;
    533       }
    534       char* endptr;
    535       uint64_t value = strtoull(args[i].c_str(), &endptr, 0);
    536       if (*endptr != '\0' || value == 0) {
    537         LOG(ERROR) << "Invalid option for " << args[i-1] << ": '" << args[i] << "'";
    538         return false;
    539       }
    540       if (args[i-1] == "-c") {
    541         sample_speed_.reset(new SampleSpeed(0, value));
    542       } else {
    543         sample_speed_.reset(new SampleSpeed(value, 0));
    544       }
    545       for (auto group_id : wait_setting_speed_event_groups_) {
    546         event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
    547       }
    548       wait_setting_speed_event_groups_.clear();
    549 
    550     } else if (args[i] == "--call-graph") {
    551       if (!NextArgumentOrError(args, &i)) {
    552         return false;
    553       }
    554       std::vector<std::string> strs = android::base::Split(args[i], ",");
    555       if (strs[0] == "fp") {
    556         fp_callchain_sampling_ = true;
    557         dwarf_callchain_sampling_ = false;
    558       } else if (strs[0] == "dwarf") {
    559         fp_callchain_sampling_ = false;
    560         dwarf_callchain_sampling_ = true;
    561         if (strs.size() > 1) {
    562           char* endptr;
    563           uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
    564           if (*endptr != '\0' || size > UINT_MAX) {
    565             LOG(ERROR) << "invalid dump stack size in --call-graph option: "
    566                        << strs[1];
    567             return false;
    568           }
    569           if ((size & 7) != 0) {
    570             LOG(ERROR) << "dump stack size " << size
    571                        << " is not 8-byte aligned.";
    572             return false;
    573           }
    574           if (size >= MAX_DUMP_STACK_SIZE) {
    575             LOG(ERROR) << "dump stack size " << size
    576                        << " is bigger than max allowed size "
    577                        << MAX_DUMP_STACK_SIZE << ".";
    578             return false;
    579           }
    580           dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
    581         }
    582       } else {
    583         LOG(ERROR) << "unexpected argument for --call-graph option: "
    584                    << args[i];
    585         return false;
    586       }
    587     } else if (args[i] == "--clockid") {
    588       if (!NextArgumentOrError(args, &i)) {
    589         return false;
    590       }
    591       if (args[i] != "perf") {
    592         if (!IsSettingClockIdSupported()) {
    593           LOG(ERROR) << "Setting clockid is not supported by the kernel.";
    594           return false;
    595         }
    596         if (clockid_map.find(args[i]) == clockid_map.end()) {
    597           LOG(ERROR) << "Invalid clockid: " << args[i];
    598           return false;
    599         }
    600       }
    601       clockid_ = args[i];
    602     } else if (args[i] == "--cpu") {
    603       if (!NextArgumentOrError(args, &i)) {
    604         return false;
    605       }
    606       cpus_ = GetCpusFromString(args[i]);
    607     } else if (args[i] == "--duration") {
    608       if (!NextArgumentOrError(args, &i)) {
    609         return false;
    610       }
    611       if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
    612                                       1e-9)) {
    613         LOG(ERROR) << "Invalid duration: " << args[i].c_str();
    614         return false;
    615       }
    616     } else if (args[i] == "-e") {
    617       if (!NextArgumentOrError(args, &i)) {
    618         return false;
    619       }
    620       std::vector<std::string> event_types = android::base::Split(args[i], ",");
    621       for (auto& event_type : event_types) {
    622         size_t group_id;
    623         if (!event_selection_set_.AddEventType(event_type, &group_id)) {
    624           return false;
    625         }
    626         if (sample_speed_) {
    627           event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
    628         } else {
    629           wait_setting_speed_event_groups_.push_back(group_id);
    630         }
    631       }
    632     } else if (args[i] == "--exit-with-parent") {
    633       prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
    634     } else if (args[i] == "-g") {
    635       fp_callchain_sampling_ = false;
    636       dwarf_callchain_sampling_ = true;
    637     } else if (args[i] == "--group") {
    638       if (!NextArgumentOrError(args, &i)) {
    639         return false;
    640       }
    641       std::vector<std::string> event_types = android::base::Split(args[i], ",");
    642       size_t group_id;
    643       if (!event_selection_set_.AddEventGroup(event_types, &group_id)) {
    644         return false;
    645       }
    646       if (sample_speed_) {
    647         event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
    648       } else {
    649         wait_setting_speed_event_groups_.push_back(group_id);
    650       }
    651     } else if (args[i] == "--in-app") {
    652       in_app_context_ = true;
    653     } else if (args[i] == "-j") {
    654       if (!NextArgumentOrError(args, &i)) {
    655         return false;
    656       }
    657       std::vector<std::string> branch_sampling_types =
    658           android::base::Split(args[i], ",");
    659       for (auto& type : branch_sampling_types) {
    660         auto it = branch_sampling_type_map.find(type);
    661         if (it == branch_sampling_type_map.end()) {
    662           LOG(ERROR) << "unrecognized branch sampling filter: " << type;
    663           return false;
    664         }
    665         branch_sampling_ |= it->second;
    666       }
    667     } else if (args[i] == "-m") {
    668       if (!NextArgumentOrError(args, &i)) {
    669         return false;
    670       }
    671       char* endptr;
    672       uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
    673       if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
    674         LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
    675         return false;
    676       }
    677       mmap_page_range_.first = mmap_page_range_.second = pages;
    678     } else if (args[i] == "--no-dump-kernel-symbols") {
    679       can_dump_kernel_symbols_ = false;
    680     } else if (args[i] == "--no-dump-symbols") {
    681       dump_symbols_ = false;
    682     } else if (args[i] == "--no-inherit") {
    683       child_inherit_ = false;
    684     } else if (args[i] == "--no-unwind") {
    685       unwind_dwarf_callchain_ = false;
    686     } else if (args[i] == "--no-callchain-joiner") {
    687       allow_callchain_joiner_ = false;
    688     } else if (args[i] == "--callchain-joiner-min-matching-nodes") {
    689       if (!NextArgumentOrError(args, &i)) {
    690         return false;
    691       }
    692       if (!android::base::ParseUint(args[i].c_str(), &callchain_joiner_min_matching_nodes_) ||
    693           callchain_joiner_min_matching_nodes_ < 1u) {
    694         LOG(ERROR) << "unexpected argument for " << args[i - 1] << " option";
    695         return false;
    696       }
    697     } else if (args[i] == "-o") {
    698       if (!NextArgumentOrError(args, &i)) {
    699         return false;
    700       }
    701       record_filename_ = args[i];
    702     } else if (args[i] == "-p") {
    703       if (!NextArgumentOrError(args, &i)) {
    704         return false;
    705       }
    706       std::set<pid_t> pids;
    707       if (!GetValidThreadsFromThreadString(args[i], &pids)) {
    708         return false;
    709       }
    710       event_selection_set_.AddMonitoredProcesses(pids);
    711     } else if (args[i] == "--no-post-unwind") {
    712       post_unwind_ = false;
    713     } else if (args[i] == "--start_profiling_fd") {
    714       if (!NextArgumentOrError(args, &i)) {
    715         return false;
    716       }
    717       if (!android::base::ParseInt(args[i].c_str(), &start_profiling_fd_, 0)) {
    718         LOG(ERROR) << "Invalid start_profiling_fd: " << args[i];
    719         return false;
    720       }
    721     } else if (args[i] == "--symfs") {
    722       if (!NextArgumentOrError(args, &i)) {
    723         return false;
    724       }
    725       if (!Dso::SetSymFsDir(args[i])) {
    726         return false;
    727       }
    728     } else if (args[i] == "-t") {
    729       if (!NextArgumentOrError(args, &i)) {
    730         return false;
    731       }
    732       std::set<pid_t> tids;
    733       if (!GetValidThreadsFromThreadString(args[i], &tids)) {
    734         return false;
    735       }
    736       event_selection_set_.AddMonitoredThreads(tids);
    737     } else if (args[i] == "--trace-offcpu") {
    738       trace_offcpu_ = true;
    739     } else if (args[i] == "--tracepoint-events") {
    740       if (!NextArgumentOrError(args, &i)) {
    741         return false;
    742       }
    743       if (!SetTracepointEventsFilePath(args[i])) {
    744         return false;
    745       }
    746     } else if (args[i] == "--") {
    747       i++;
    748       break;
    749     } else {
    750       ReportUnknownOption(args, i);
    751       return false;
    752     }
    753   }
    754 
    755   if (!dwarf_callchain_sampling_) {
    756     if (!unwind_dwarf_callchain_) {
    757       LOG(ERROR)
    758           << "--no-unwind is only used with `--call-graph dwarf` option.";
    759       return false;
    760     }
    761     unwind_dwarf_callchain_ = false;
    762   }
    763   if (post_unwind_) {
    764     if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
    765       post_unwind_ = false;
    766     }
    767   } else {
    768     if (!dwarf_callchain_sampling_) {
    769       LOG(ERROR)
    770           << "--no-post-unwind is only used with `--call-graph dwarf` option.";
    771       return false;
    772     }
    773     if (!unwind_dwarf_callchain_) {
    774       LOG(ERROR) << "--no-post-unwind can't be used with `--no-unwind` option.";
    775       return false;
    776     }
    777   }
    778 
    779   if (fp_callchain_sampling_) {
    780     if (GetBuildArch() == ARCH_ARM) {
    781       LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
    782                    << "consider using `-g` option or profiling on aarch64 architecture.";
    783     }
    784   }
    785 
    786   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
    787     LOG(ERROR) << "Record system wide and existing processes/threads can't be "
    788                   "used at the same time.";
    789     return false;
    790   }
    791 
    792   if (system_wide_collection_ && !IsRoot()) {
    793     LOG(ERROR) << "System wide profiling needs root privilege.";
    794     return false;
    795   }
    796 
    797   if (dump_symbols_ && can_dump_kernel_symbols_) {
    798     // No need to dump kernel symbols as we will dump all required symbols.
    799     can_dump_kernel_symbols_ = false;
    800   }
    801 
    802   non_option_args->clear();
    803   for (; i < args.size(); ++i) {
    804     non_option_args->push_back(args[i]);
    805   }
    806   return true;
    807 }
    808 
    809 bool RecordCommand::TraceOffCpu() {
    810   if (FindEventTypeByName("sched:sched_switch") == nullptr) {
    811     LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
    812     return false;
    813   }
    814   for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
    815     if (event_type->name == "sched:sched_switch") {
    816       LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
    817       return false;
    818     }
    819   }
    820   if (!IsDumpingRegsForTracepointEventsSupported()) {
    821     LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
    822     return false;
    823   }
    824   return event_selection_set_.AddEventType("sched:sched_switch");
    825 }
    826 
    827 bool RecordCommand::SetEventSelectionFlags() {
    828   event_selection_set_.SampleIdAll();
    829   if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
    830     return false;
    831   }
    832   if (fp_callchain_sampling_) {
    833     event_selection_set_.EnableFpCallChainSampling();
    834   } else if (dwarf_callchain_sampling_) {
    835     if (!event_selection_set_.EnableDwarfCallChainSampling(
    836             dump_stack_size_in_dwarf_sampling_)) {
    837       return false;
    838     }
    839   }
    840   event_selection_set_.SetInherit(child_inherit_);
    841   if (clockid_ != "perf") {
    842     event_selection_set_.SetClockId(clockid_map[clockid_]);
    843   }
    844   return true;
    845 }
    846 
    847 bool RecordCommand::CreateAndInitRecordFile() {
    848   record_file_writer_ = CreateRecordFile(record_filename_);
    849   if (record_file_writer_ == nullptr) {
    850     return false;
    851   }
    852   // Use first perf_event_attr and first event id to dump mmap and comm records.
    853   EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
    854   if (!DumpKernelSymbol()) {
    855     return false;
    856   }
    857   if (!DumpTracingData()) {
    858     return false;
    859   }
    860   if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) {
    861     return false;
    862   }
    863   if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) {
    864     return false;
    865   }
    866   return true;
    867 }
    868 
    869 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
    870     const std::string& filename) {
    871   std::unique_ptr<RecordFileWriter> writer =
    872       RecordFileWriter::CreateInstance(filename);
    873   if (writer == nullptr) {
    874     return nullptr;
    875   }
    876 
    877   if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
    878     return nullptr;
    879   }
    880   return writer;
    881 }
    882 
    883 bool RecordCommand::DumpKernelSymbol() {
    884   if (can_dump_kernel_symbols_) {
    885     std::string kallsyms;
    886     if (event_selection_set_.NeedKernelSymbol() &&
    887         CheckKernelSymbolAddresses()) {
    888       if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
    889         PLOG(ERROR) << "failed to read /proc/kallsyms";
    890         return false;
    891       }
    892       KernelSymbolRecord r(kallsyms);
    893       if (!ProcessRecord(&r)) {
    894         return false;
    895       }
    896     }
    897   }
    898   return true;
    899 }
    900 
    901 bool RecordCommand::DumpTracingData() {
    902   std::vector<const EventType*> tracepoint_event_types =
    903       event_selection_set_.GetTracepointEvents();
    904   if (tracepoint_event_types.empty() || !CanRecordRawData()) {
    905     return true;  // No need to dump tracing data, or can't do it.
    906   }
    907   std::vector<char> tracing_data;
    908   if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
    909     return false;
    910   }
    911   TracingDataRecord record(tracing_data);
    912   if (!ProcessRecord(&record)) {
    913     return false;
    914   }
    915   return true;
    916 }
    917 
    918 bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
    919                                              uint64_t event_id) {
    920   KernelMmap kernel_mmap;
    921   std::vector<KernelMmap> module_mmaps;
    922   GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
    923 
    924   MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
    925                          kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
    926   if (!ProcessRecord(&mmap_record)) {
    927     return false;
    928   }
    929   for (auto& module_mmap : module_mmaps) {
    930     MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr,
    931                            module_mmap.len, 0, module_mmap.filepath, event_id);
    932     if (!ProcessRecord(&mmap_record)) {
    933       return false;
    934     }
    935   }
    936   return true;
    937 }
    938 
    939 bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr,
    940                                            uint64_t event_id) {
    941   // Decide which processes and threads to dump.
    942   // For system_wide profiling, dump all threads.
    943   // For non system wide profiling, build dump_threads.
    944   bool all_threads = system_wide_collection_;
    945   std::set<pid_t> dump_threads = event_selection_set_.GetMonitoredThreads();
    946   for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) {
    947     std::vector<pid_t> tids = GetThreadsInProcess(pid);
    948     dump_threads.insert(tids.begin(), tids.end());
    949   }
    950 
    951   // Collect processes to dump.
    952   std::vector<pid_t> processes;
    953   if (all_threads) {
    954     processes = GetAllProcesses();
    955   } else {
    956     std::set<pid_t> process_set;
    957     for (const auto& tid : dump_threads) {
    958       pid_t pid;
    959       if (!GetProcessForThread(tid, &pid)) {
    960         continue;
    961       }
    962       process_set.insert(pid);
    963     }
    964     processes.insert(processes.end(), process_set.begin(), process_set.end());
    965   }
    966 
    967   // Dump each process and its threads.
    968   for (auto& pid : processes) {
    969     // Dump mmap records.
    970     std::vector<ThreadMmap> thread_mmaps;
    971     if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
    972       // The process may exit before we get its info.
    973       continue;
    974     }
    975     for (const auto& map : thread_mmaps) {
    976       if (map.executable == 0) {
    977         continue;  // No need to dump non-executable mmap info.
    978       }
    979       MmapRecord record(attr, false, pid, pid, map.start_addr, map.len,
    980                         map.pgoff, map.name, event_id);
    981       if (!ProcessRecord(&record)) {
    982         return false;
    983       }
    984     }
    985     // Dump process name.
    986     std::string name;
    987     if (GetThreadName(pid, &name)) {
    988       CommRecord record(attr, pid, pid, name, event_id, 0);
    989       if (!ProcessRecord(&record)) {
    990         return false;
    991       }
    992     }
    993     // Dump thread info.
    994     std::vector<pid_t> threads = GetThreadsInProcess(pid);
    995     for (const auto& tid : threads) {
    996       if (tid == pid) {
    997         continue;
    998       }
    999       if (all_threads || dump_threads.find(tid) != dump_threads.end()) {
   1000         ForkRecord fork_record(attr, pid, tid, pid, pid, event_id);
   1001         if (!ProcessRecord(&fork_record)) {
   1002           return false;
   1003         }
   1004         if (GetThreadName(tid, &name)) {
   1005           CommRecord comm_record(attr, pid, tid, name, event_id, 0);
   1006           if (!ProcessRecord(&comm_record)) {
   1007             return false;
   1008           }
   1009         }
   1010       }
   1011     }
   1012   }
   1013   return true;
   1014 }
   1015 
   1016 bool RecordCommand::ProcessRecord(Record* record) {
   1017   if (unwind_dwarf_callchain_) {
   1018     if (post_unwind_) {
   1019       return SaveRecordForPostUnwinding(record);
   1020     }
   1021     return SaveRecordAfterUnwinding(record);
   1022   }
   1023   return SaveRecordWithoutUnwinding(record);
   1024 }
   1025 
   1026 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
   1027   if (record->type() == PERF_RECORD_SAMPLE) {
   1028     static_cast<SampleRecord*>(record)->RemoveInvalidStackData();
   1029   }
   1030   if (!record_file_writer_->WriteRecord(*record)) {
   1031     LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
   1032                << "--no-post-unwind option.";
   1033     return false;
   1034   }
   1035   return true;
   1036 }
   1037 
   1038 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
   1039   if (record->type() == PERF_RECORD_SAMPLE) {
   1040     auto& r = *static_cast<SampleRecord*>(record);
   1041     // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
   1042     // to adjust callchains generated by dwarf unwinder.
   1043     r.AdjustCallChainGeneratedByKernel();
   1044     if (!UnwindRecord(r)) {
   1045       return false;
   1046     }
   1047     // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
   1048     // chain.
   1049     if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) {
   1050       // If current record contains no user callchain, skip it.
   1051       return true;
   1052     }
   1053     sample_record_count_++;
   1054   } else if (record->type() == PERF_RECORD_LOST) {
   1055     lost_record_count_ += static_cast<LostRecord*>(record)->lost;
   1056   } else {
   1057     UpdateRecordForEmbeddedElfPath(record);
   1058     thread_tree_.Update(*record);
   1059   }
   1060   return record_file_writer_->WriteRecord(*record);
   1061 }
   1062 
   1063 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
   1064   if (record->type() == PERF_RECORD_SAMPLE) {
   1065     auto& r = *static_cast<SampleRecord*>(record);
   1066     if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
   1067       r.AdjustCallChainGeneratedByKernel();
   1068     }
   1069     if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) {
   1070       // If current record contains no user callchain, skip it.
   1071       return true;
   1072     }
   1073     sample_record_count_++;
   1074   } else if (record->type() == PERF_RECORD_LOST) {
   1075     lost_record_count_ += static_cast<LostRecord*>(record)->lost;
   1076   }
   1077   return record_file_writer_->WriteRecord(*record);
   1078 }
   1079 
   1080 template <class RecordType>
   1081 void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
   1082   RecordType& r = *record;
   1083   if (!r.InKernel() && r.data->pgoff != 0) {
   1084     // For the case of a shared library "foobar.so" embedded
   1085     // inside an APK, we rewrite the original MMAP from
   1086     // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
   1087     // so as to make the library name explicit. This update is
   1088     // done here (as part of the record operation) as opposed to
   1089     // on the host during the report, since we want to report
   1090     // the correct library name even if the the APK in question
   1091     // is not present on the host. The new offset W is
   1092     // calculated to be with respect to the start of foobar.so,
   1093     // not to the start of path.apk.
   1094     EmbeddedElf* ee =
   1095         ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff);
   1096     if (ee != nullptr) {
   1097       // Compute new offset relative to start of elf in APK.
   1098       auto data = *r.data;
   1099       data.pgoff -= ee->entry_offset();
   1100       r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name()));
   1101     }
   1102   }
   1103 }
   1104 
   1105 void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
   1106   if (record->type() == PERF_RECORD_MMAP) {
   1107     UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
   1108   } else if (record->type() == PERF_RECORD_MMAP2) {
   1109     UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
   1110   }
   1111 }
   1112 
   1113 bool RecordCommand::UnwindRecord(SampleRecord& r) {
   1114   if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
   1115       (r.sample_type & PERF_SAMPLE_REGS_USER) &&
   1116       (r.regs_user_data.reg_mask != 0) &&
   1117       (r.sample_type & PERF_SAMPLE_STACK_USER) &&
   1118       (r.GetValidStackSize() > 0)) {
   1119     ThreadEntry* thread =
   1120         thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
   1121     RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
   1122     std::vector<uint64_t> ips;
   1123     std::vector<uint64_t> sps;
   1124     if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
   1125                                             r.GetValidStackSize(), &ips, &sps)) {
   1126       return false;
   1127     }
   1128     r.ReplaceRegAndStackWithCallChain(ips);
   1129     if (callchain_joiner_) {
   1130       return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
   1131                                              CallChainJoiner::ORIGINAL_OFFLINE, ips, sps);
   1132     }
   1133   }
   1134   return true;
   1135 }
   1136 
   1137 bool RecordCommand::PostUnwindRecords() {
   1138   // 1. Move records from record_filename_ to a temporary file.
   1139   if (!record_file_writer_->Close()) {
   1140     return false;
   1141   }
   1142   record_file_writer_.reset();
   1143   std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
   1144   if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
   1145     return false;
   1146   }
   1147   std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
   1148   if (!reader) {
   1149     return false;
   1150   }
   1151 
   1152   // 2. Read records from the temporary file, and write unwound records back to record_filename_.
   1153   record_file_writer_ = CreateRecordFile(record_filename_);
   1154   if (!record_file_writer_) {
   1155     return false;
   1156   }
   1157   sample_record_count_ = 0;
   1158   lost_record_count_ = 0;
   1159   auto callback = [this](std::unique_ptr<Record> record) {
   1160     return SaveRecordAfterUnwinding(record.get());
   1161   };
   1162   return reader->ReadDataSection(callback, false);
   1163 }
   1164 
   1165 bool RecordCommand::JoinCallChains() {
   1166   // 1. Prepare joined callchains.
   1167   if (!callchain_joiner_->JoinCallChains()) {
   1168     return false;
   1169   }
   1170   // 2. Move records from record_filename_ to a temporary file.
   1171   if (!record_file_writer_->Close()) {
   1172     return false;
   1173   }
   1174   record_file_writer_.reset();
   1175   std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
   1176   if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
   1177     return false;
   1178   }
   1179 
   1180   // 3. Read records from the temporary file, and write record with joined call chains back
   1181   // to record_filename_.
   1182   std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
   1183   record_file_writer_ = CreateRecordFile(record_filename_);
   1184   if (!reader || !record_file_writer_) {
   1185     return false;
   1186   }
   1187 
   1188   auto record_callback = [&](std::unique_ptr<Record> r) {
   1189     if (r->type() != PERF_RECORD_SAMPLE) {
   1190       return record_file_writer_->WriteRecord(*r);
   1191     }
   1192     SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
   1193     if (!sr.HasUserCallChain()) {
   1194       return record_file_writer_->WriteRecord(sr);
   1195     }
   1196     pid_t pid;
   1197     pid_t tid;
   1198     CallChainJoiner::ChainType type;
   1199     std::vector<uint64_t> ips;
   1200     std::vector<uint64_t> sps;
   1201     if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
   1202       return false;
   1203     }
   1204     CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
   1205     CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
   1206     CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
   1207     sr.UpdateUserCallChain(ips);
   1208     return record_file_writer_->WriteRecord(sr);
   1209   };
   1210   return reader->ReadDataSection(record_callback, false);
   1211 }
   1212 
   1213 bool RecordCommand::DumpAdditionalFeatures(
   1214     const std::vector<std::string>& args) {
   1215   // Read data section of perf.data to collect hit file information.
   1216   thread_tree_.ClearThreadAndMap();
   1217   if (CheckKernelSymbolAddresses()) {
   1218     Dso::ReadKernelSymbolsFromProc();
   1219   }
   1220   auto callback = [&](const Record* r) {
   1221     thread_tree_.Update(*r);
   1222     if (r->type() == PERF_RECORD_SAMPLE) {
   1223       CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
   1224     }
   1225   };
   1226   if (!record_file_writer_->ReadDataSection(callback)) {
   1227     return false;
   1228   }
   1229 
   1230   size_t feature_count = 5;
   1231   if (branch_sampling_) {
   1232     feature_count++;
   1233   }
   1234   if (dump_symbols_) {
   1235     feature_count++;
   1236   }
   1237   if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
   1238     return false;
   1239   }
   1240   if (!DumpBuildIdFeature()) {
   1241     return false;
   1242   }
   1243   if (dump_symbols_ && !DumpFileFeature()) {
   1244     return false;
   1245   }
   1246   utsname uname_buf;
   1247   if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
   1248     PLOG(ERROR) << "uname() failed";
   1249     return false;
   1250   }
   1251   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
   1252                                                uname_buf.release)) {
   1253     return false;
   1254   }
   1255   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
   1256                                                uname_buf.machine)) {
   1257     return false;
   1258   }
   1259 
   1260   std::string exec_path = android::base::GetExecutablePath();
   1261   if (exec_path.empty()) exec_path = "simpleperf";
   1262   std::vector<std::string> cmdline;
   1263   cmdline.push_back(exec_path);
   1264   cmdline.push_back("record");
   1265   cmdline.insert(cmdline.end(), args.begin(), args.end());
   1266   if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
   1267     return false;
   1268   }
   1269   if (branch_sampling_ != 0 &&
   1270       !record_file_writer_->WriteBranchStackFeature()) {
   1271     return false;
   1272   }
   1273   if (!DumpMetaInfoFeature()) {
   1274     return false;
   1275   }
   1276 
   1277   if (!record_file_writer_->EndWriteFeatures()) {
   1278     return false;
   1279   }
   1280   return true;
   1281 }
   1282 
   1283 bool RecordCommand::DumpBuildIdFeature() {
   1284   std::vector<BuildIdRecord> build_id_records;
   1285   BuildId build_id;
   1286   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
   1287   for (Dso* dso : dso_v) {
   1288     if (!dso->HasDumpId()) {
   1289       continue;
   1290     }
   1291     if (dso->type() == DSO_KERNEL) {
   1292       if (!GetKernelBuildId(&build_id)) {
   1293         continue;
   1294       }
   1295       build_id_records.push_back(
   1296           BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
   1297     } else if (dso->type() == DSO_KERNEL_MODULE) {
   1298       std::string path = dso->Path();
   1299       std::string module_name = basename(&path[0]);
   1300       if (android::base::EndsWith(module_name, ".ko")) {
   1301         module_name = module_name.substr(0, module_name.size() - 3);
   1302       }
   1303       if (!GetModuleBuildId(module_name, &build_id)) {
   1304         LOG(DEBUG) << "can't read build_id for module " << module_name;
   1305         continue;
   1306       }
   1307       build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
   1308     } else {
   1309       if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
   1310         continue;
   1311       }
   1312       auto tuple = SplitUrlInApk(dso->Path());
   1313       if (std::get<0>(tuple)) {
   1314         ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple),
   1315                                                  std::get<2>(tuple), &build_id);
   1316         if (result != ElfStatus::NO_ERROR) {
   1317           LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
   1318                      << result;
   1319           continue;
   1320         }
   1321       } else {
   1322         ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id);
   1323         if (result != ElfStatus::NO_ERROR) {
   1324           LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
   1325                      << result;
   1326           continue;
   1327         }
   1328       }
   1329       build_id_records.push_back(
   1330           BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
   1331     }
   1332   }
   1333   if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
   1334     return false;
   1335   }
   1336   return true;
   1337 }
   1338 
   1339 bool RecordCommand::DumpFileFeature() {
   1340   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
   1341   return record_file_writer_->WriteFileFeatures(thread_tree_.GetAllDsos());
   1342 }
   1343 
   1344 bool RecordCommand::DumpMetaInfoFeature() {
   1345   std::unordered_map<std::string, std::string> info_map;
   1346   info_map["simpleperf_version"] = GetSimpleperfVersion();
   1347   info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
   1348   info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
   1349   // By storing event types information in perf.data, the readers of perf.data have the same
   1350   // understanding of event types, even if they are on another machine.
   1351   info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
   1352 #if defined(__ANDROID__)
   1353   info_map["product_props"] = android::base::StringPrintf("%s:%s:%s",
   1354                                   android::base::GetProperty("ro.product.manufacturer", "").c_str(),
   1355                                   android::base::GetProperty("ro.product.model", "").c_str(),
   1356                                   android::base::GetProperty("ro.product.name", "").c_str());
   1357   info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
   1358 #endif
   1359   info_map["clockid"] = clockid_;
   1360   info_map["timestamp"] = std::to_string(time(nullptr));
   1361   return record_file_writer_->WriteMetaInfoFeature(info_map);
   1362 }
   1363 
   1364 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
   1365   const ThreadEntry* thread =
   1366       thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
   1367   const MapEntry* map =
   1368       thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
   1369   Dso* dso = map->dso;
   1370   const Symbol* symbol;
   1371   if (dump_symbols_) {
   1372     symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
   1373     if (!symbol->HasDumpId()) {
   1374       dso->CreateSymbolDumpId(symbol);
   1375     }
   1376   }
   1377   if (!dso->HasDumpId()) {
   1378     dso->CreateDumpId();
   1379   }
   1380   if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
   1381     bool in_kernel = r.InKernel();
   1382     bool first_ip = true;
   1383     for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
   1384       uint64_t ip = r.callchain_data.ips[i];
   1385       if (ip >= PERF_CONTEXT_MAX) {
   1386         switch (ip) {
   1387           case PERF_CONTEXT_KERNEL:
   1388             in_kernel = true;
   1389             break;
   1390           case PERF_CONTEXT_USER:
   1391             in_kernel = false;
   1392             break;
   1393           default:
   1394             LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
   1395                        << ip;
   1396         }
   1397       } else {
   1398         if (first_ip) {
   1399           first_ip = false;
   1400           // Remove duplication with sample ip.
   1401           if (ip == r.ip_data.ip) {
   1402             continue;
   1403           }
   1404         }
   1405         map = thread_tree_.FindMap(thread, ip, in_kernel);
   1406         dso = map->dso;
   1407         if (dump_symbols_) {
   1408           symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
   1409           if (!symbol->HasDumpId()) {
   1410             dso->CreateSymbolDumpId(symbol);
   1411           }
   1412         }
   1413         if (!dso->HasDumpId()) {
   1414           dso->CreateDumpId();
   1415         }
   1416       }
   1417     }
   1418   }
   1419 }
   1420 
   1421 void RegisterRecordCommand() {
   1422   RegisterCommand("record",
   1423                   [] { return std::unique_ptr<Command>(new RecordCommand()); });
   1424 }
   1425