Home | History | Annotate | Download | only in simpleperf
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "command.h"
     18 
     19 #include <unordered_map>
     20 
     21 #include <android-base/logging.h>
     22 #include <android-base/strings.h>
     23 
     24 #include "callchain.h"
     25 #include "event_attr.h"
     26 #include "event_type.h"
     27 #include "record_file.h"
     28 #include "sample_tree.h"
     29 #include "tracing.h"
     30 #include "utils.h"
     31 
     32 namespace {
     33 
     34 struct SlabSample {
     35   const Symbol* symbol;            // the function making allocation
     36   uint64_t ptr;                    // the start address of the allocated space
     37   uint64_t bytes_req;              // requested space size
     38   uint64_t bytes_alloc;            // allocated space size
     39   uint64_t sample_count;           // count of allocations
     40   uint64_t gfp_flags;              // flags used for allocation
     41   uint64_t cross_cpu_allocations;  // count of allocations freed not on the
     42                                    // cpu allocating them
     43   CallChainRoot<SlabSample> callchain;  // a callchain tree representing all
     44                                         // callchains in this sample
     45   SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req,
     46              uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags,
     47              uint64_t cross_cpu_allocations)
     48       : symbol(symbol),
     49         ptr(ptr),
     50         bytes_req(bytes_req),
     51         bytes_alloc(bytes_alloc),
     52         sample_count(sample_count),
     53         gfp_flags(gfp_flags),
     54         cross_cpu_allocations(cross_cpu_allocations) {}
     55 
     56   uint64_t GetPeriod() const {
     57     return sample_count;
     58   }
     59 };
     60 
     61 struct SlabAccumulateInfo {
     62   uint64_t bytes_req;
     63   uint64_t bytes_alloc;
     64 };
     65 
     66 BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
     67 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
     68 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
     69 BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
     70 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations,
     71                                      cross_cpu_allocations);
     72 
     73 BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
     74 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
     75 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
     76 BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
     77 BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations,
     78                               cross_cpu_allocations);
     79 
     80 static int CompareFragment(const SlabSample* sample1,
     81                            const SlabSample* sample2) {
     82   uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
     83   uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
     84   return Compare(frag2, frag1);
     85 }
     86 
     87 static std::string DisplayFragment(const SlabSample* sample) {
     88   return android::base::StringPrintf("%" PRIu64,
     89                                      sample->bytes_alloc - sample->bytes_req);
     90 }
     91 
     92 struct SlabSampleTree {
     93   std::vector<SlabSample*> samples;
     94   uint64_t total_requested_bytes;
     95   uint64_t total_allocated_bytes;
     96   uint64_t nr_allocations;
     97   uint64_t nr_frees;
     98   uint64_t nr_cross_cpu_allocations;
     99 };
    100 
    101 struct SlabFormat {
    102   enum {
    103     KMEM_ALLOC,
    104     KMEM_FREE,
    105   } type;
    106   TracingFieldPlace call_site;
    107   TracingFieldPlace ptr;
    108   TracingFieldPlace bytes_req;
    109   TracingFieldPlace bytes_alloc;
    110   TracingFieldPlace gfp_flags;
    111 };
    112 
    113 class SlabSampleTreeBuilder
    114     : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
    115  public:
    116   SlabSampleTreeBuilder(const SampleComparator<SlabSample>& sample_comparator,
    117                         ThreadTree* thread_tree)
    118       : SampleTreeBuilder(sample_comparator),
    119         thread_tree_(thread_tree),
    120         total_requested_bytes_(0),
    121         total_allocated_bytes_(0),
    122         nr_allocations_(0),
    123         nr_cross_cpu_allocations_(0) {}
    124 
    125   SlabSampleTree GetSampleTree() const {
    126     SlabSampleTree sample_tree;
    127     sample_tree.samples = GetSamples();
    128     sample_tree.total_requested_bytes = total_requested_bytes_;
    129     sample_tree.total_allocated_bytes = total_allocated_bytes_;
    130     sample_tree.nr_allocations = nr_allocations_;
    131     sample_tree.nr_frees = nr_frees_;
    132     sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
    133     return sample_tree;
    134   }
    135 
    136   void AddSlabFormat(const std::vector<uint64_t>& event_ids,
    137                      SlabFormat format) {
    138     std::unique_ptr<SlabFormat> p(new SlabFormat(format));
    139     for (auto id : event_ids) {
    140       event_id_to_format_map_[id] = p.get();
    141     }
    142     formats_.push_back(std::move(p));
    143   }
    144 
    145  protected:
    146   SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
    147                            SlabAccumulateInfo* acc_info) override {
    148     if (!in_kernel) {
    149       // Normally we don't parse records in user space because tracepoint
    150       // events all happen in kernel. But if r.ip_data.ip == 0, it may be
    151       // a kernel record failed to dump ip register and is still useful.
    152       if (r.ip_data.ip == 0) {
    153         // It seems we are on a kernel can't dump regset for tracepoint events
    154         // because of lacking perf_arch_fetch_caller_regs(). We can't get
    155         // callchain, but we can still do a normal report.
    156         static bool first = true;
    157         if (first) {
    158           first = false;
    159           if (accumulate_callchain_) {
    160             // The kernel doesn't seem to support dumping registers for
    161             // tracepoint events because of lacking
    162             // perf_arch_fetch_caller_regs().
    163             LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
    164                          << " events because of lacking kernel support.";
    165           }
    166         }
    167       } else {
    168         return nullptr;
    169       }
    170     }
    171     uint64_t id = r.id_data.id;
    172     auto it = event_id_to_format_map_.find(id);
    173     if (it == event_id_to_format_map_.end()) {
    174       return nullptr;
    175     }
    176     const char* raw_data = r.raw_data.data;
    177     SlabFormat* format = it->second;
    178     if (format->type == SlabFormat::KMEM_ALLOC) {
    179       uint64_t call_site = format->call_site.ReadFromData(raw_data);
    180       const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
    181       uint64_t ptr = format->ptr.ReadFromData(raw_data);
    182       uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
    183       uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
    184       uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
    185       SlabSample* sample =
    186           InsertSample(std::unique_ptr<SlabSample>(new SlabSample(
    187               symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
    188       alloc_cpu_record_map_.insert(
    189           std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
    190       acc_info->bytes_req = bytes_req;
    191       acc_info->bytes_alloc = bytes_alloc;
    192       return sample;
    193     } else if (format->type == SlabFormat::KMEM_FREE) {
    194       uint64_t ptr = format->ptr.ReadFromData(raw_data);
    195       auto it = alloc_cpu_record_map_.find(ptr);
    196       if (it != alloc_cpu_record_map_.end()) {
    197         SlabSample* sample = it->second.second;
    198         if (r.cpu_data.cpu != it->second.first) {
    199           sample->cross_cpu_allocations++;
    200           nr_cross_cpu_allocations_++;
    201         }
    202         alloc_cpu_record_map_.erase(it);
    203       }
    204       nr_frees_++;
    205     }
    206     return nullptr;
    207   }
    208 
    209   SlabSample* CreateBranchSample(const SampleRecord&,
    210                                  const BranchStackItemType&) override {
    211     return nullptr;
    212   }
    213 
    214   SlabSample* CreateCallChainSample(
    215       const SlabSample* sample, uint64_t ip, bool in_kernel,
    216       const std::vector<SlabSample*>& callchain,
    217       const SlabAccumulateInfo& acc_info) override {
    218     if (!in_kernel) {
    219       return nullptr;
    220     }
    221     const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
    222     return InsertCallChainSample(
    223         std::unique_ptr<SlabSample>(
    224             new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
    225                            acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
    226         callchain);
    227   }
    228 
    229   const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
    230 
    231   uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override {
    232     // Decide the percentage of callchain by the sample_count, so use 1 as the
    233     // period when calling AddCallChain().
    234     return 1;
    235   }
    236 
    237   void UpdateSummary(const SlabSample* sample) override {
    238     total_requested_bytes_ += sample->bytes_req;
    239     total_allocated_bytes_ += sample->bytes_alloc;
    240     nr_allocations_++;
    241   }
    242 
    243   void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
    244     sample1->bytes_req += sample2->bytes_req;
    245     sample1->bytes_alloc += sample2->bytes_alloc;
    246     sample1->sample_count += sample2->sample_count;
    247   }
    248 
    249  private:
    250   ThreadTree* thread_tree_;
    251   uint64_t total_requested_bytes_;
    252   uint64_t total_allocated_bytes_;
    253   uint64_t nr_allocations_;
    254   uint64_t nr_frees_;
    255   uint64_t nr_cross_cpu_allocations_;
    256 
    257   std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
    258   std::vector<std::unique_ptr<SlabFormat>> formats_;
    259   std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>>
    260       alloc_cpu_record_map_;
    261 };
    262 
    263 using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
    264 using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
    265 using SlabSampleCallgraphDisplayer =
    266     CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>;
    267 
    268 struct EventAttrWithName {
    269   perf_event_attr attr;
    270   std::string name;
    271   std::vector<uint64_t> event_ids;
    272 };
    273 
    274 class KmemCommand : public Command {
    275  public:
    276   KmemCommand()
    277       : Command(
    278             "kmem", "collect kernel memory allocation information",
    279             // clang-format off
    280 "Usage: kmem (record [record options] | report [report options])\n"
    281 "kmem record\n"
    282 "-g        Enable call graph recording. Same as '--call-graph fp'.\n"
    283 "--slab    Collect slab allocation information. Default option.\n"
    284 "Other record options provided by simpleperf record command are also available.\n"
    285 "kmem report\n"
    286 "--children  Print the accumulated allocation info appeared in the callchain.\n"
    287 "            Can be used on perf.data recorded with `--call-graph fp` option.\n"
    288 "-g [callee|caller]  Print call graph for perf.data recorded with\n"
    289 "                    `--call-graph fp` option. If callee mode is used, the graph\n"
    290 "                     shows how functions are called from others. Otherwise, the\n"
    291 "                     graph shows how functions call others. Default is callee\n"
    292 "                     mode. The percentage shown in the graph is determined by\n"
    293 "                     the hit count of the callchain.\n"
    294 "-i          Specify path of record file, default is perf.data\n"
    295 "-o report_file_name  Set report file name, default is stdout.\n"
    296 "--slab      Report slab allocation information. Default option.\n"
    297 "--slab-sort key1,key2,...\n"
    298 "            Select the keys to sort and print slab allocation information.\n"
    299 "            Should be used with --slab option. Possible keys include:\n"
    300 "              hit         -- the allocation count.\n"
    301 "              caller      -- the function calling allocation.\n"
    302 "              ptr         -- the address of the allocated space.\n"
    303 "              bytes_req   -- the total requested space size.\n"
    304 "              bytes_alloc -- the total allocated space size.\n"
    305 "              fragment    -- the extra allocated space size\n"
    306 "                             (bytes_alloc - bytes_req).\n"
    307 "              gfp_flags   -- the flags used for allocation.\n"
    308 "              pingpong    -- the count of allocations that are freed not on\n"
    309 "                             the cpu allocating them.\n"
    310 "            The default slab sort keys are:\n"
    311 "              hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
    312             // clang-format on
    313             ),
    314         is_record_(false),
    315         use_slab_(false),
    316         accumulate_callchain_(false),
    317         print_callgraph_(false),
    318         callgraph_show_callee_(false),
    319         record_filename_("perf.data"),
    320         record_file_arch_(GetBuildArch()) {}
    321 
    322   bool Run(const std::vector<std::string>& args);
    323 
    324  private:
    325   bool ParseOptions(const std::vector<std::string>& args,
    326                     std::vector<std::string>* left_args);
    327   bool RecordKmemInfo(const std::vector<std::string>& record_args);
    328   bool ReportKmemInfo();
    329   bool PrepareToBuildSampleTree();
    330   void ReadEventAttrsFromRecordFile();
    331   bool ReadFeaturesFromRecordFile();
    332   bool ReadSampleTreeFromRecordFile();
    333   bool ProcessRecord(std::unique_ptr<Record> record);
    334   void ProcessTracingData(const std::vector<char>& data);
    335   bool PrintReport();
    336   void PrintReportContext(FILE* fp);
    337   void PrintSlabReportContext(FILE* fp);
    338 
    339   bool is_record_;
    340   bool use_slab_;
    341   std::vector<std::string> slab_sort_keys_;
    342   bool accumulate_callchain_;
    343   bool print_callgraph_;
    344   bool callgraph_show_callee_;
    345 
    346   std::string record_filename_;
    347   std::unique_ptr<RecordFileReader> record_file_reader_;
    348   std::vector<EventAttrWithName> event_attrs_;
    349   std::string record_cmdline_;
    350   ArchType record_file_arch_;
    351 
    352   ThreadTree thread_tree_;
    353   SlabSampleTree slab_sample_tree_;
    354   std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
    355   std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
    356   std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
    357 
    358   std::string report_filename_;
    359 };
    360 
    361 bool KmemCommand::Run(const std::vector<std::string>& args) {
    362   std::vector<std::string> left_args;
    363   if (!ParseOptions(args, &left_args)) {
    364     return false;
    365   }
    366   if (!use_slab_) {
    367     use_slab_ = true;
    368   }
    369   if (is_record_) {
    370     return RecordKmemInfo(left_args);
    371   }
    372   return ReportKmemInfo();
    373 }
    374 
    375 bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
    376                                std::vector<std::string>* left_args) {
    377   if (args.empty()) {
    378     LOG(ERROR) << "No subcommand specified";
    379     return false;
    380   }
    381   if (args[0] == "record") {
    382     if (!IsRoot()) {
    383       LOG(ERROR) << "simpleperf kmem record command needs root privilege";
    384       return false;
    385     }
    386     is_record_ = true;
    387     size_t i;
    388     for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
    389       if (args[i] == "-g") {
    390         left_args->push_back("--call-graph");
    391         left_args->push_back("fp");
    392       } else if (args[i] == "--slab") {
    393         use_slab_ = true;
    394       } else {
    395         left_args->push_back(args[i]);
    396       }
    397     }
    398     left_args->insert(left_args->end(), args.begin() + i, args.end());
    399   } else if (args[0] == "report") {
    400     is_record_ = false;
    401     for (size_t i = 1; i < args.size(); ++i) {
    402       if (args[i] == "--children") {
    403         accumulate_callchain_ = true;
    404       } else if (args[i] == "-g") {
    405         print_callgraph_ = true;
    406         accumulate_callchain_ = true;
    407         callgraph_show_callee_ = true;
    408         if (i + 1 < args.size() && args[i + 1][0] != '-') {
    409           ++i;
    410           if (args[i] == "callee") {
    411             callgraph_show_callee_ = true;
    412           } else if (args[i] == "caller") {
    413             callgraph_show_callee_ = false;
    414           } else {
    415             LOG(ERROR) << "Unknown argument with -g option: " << args[i];
    416             return false;
    417           }
    418         }
    419       } else if (args[i] == "-i") {
    420         if (!NextArgumentOrError(args, &i)) {
    421           return false;
    422         }
    423         record_filename_ = args[i];
    424       } else if (args[i] == "-o") {
    425         if (!NextArgumentOrError(args, &i)) {
    426           return false;
    427         }
    428         report_filename_ = args[i];
    429       } else if (args[i] == "--slab") {
    430         use_slab_ = true;
    431       } else if (args[i] == "--slab-sort") {
    432         if (!NextArgumentOrError(args, &i)) {
    433           return false;
    434         }
    435         slab_sort_keys_ = android::base::Split(args[i], ",");
    436       } else {
    437         ReportUnknownOption(args, i);
    438         return false;
    439       }
    440     }
    441   } else {
    442     LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
    443                << ". Try `simpleperf help " << Name() << "`";
    444     return false;
    445   }
    446   return true;
    447 }
    448 
    449 bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
    450   std::vector<std::string> args;
    451   if (use_slab_) {
    452     std::vector<std::string> trace_events = {
    453         "kmem:kmalloc",      "kmem:kmem_cache_alloc",
    454         "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
    455         "kmem:kfree",        "kmem:kmem_cache_free"};
    456     for (const auto& name : trace_events) {
    457       if (ParseEventType(name)) {
    458         args.insert(args.end(), {"-e", name});
    459       }
    460     }
    461   }
    462   if (args.empty()) {
    463     LOG(ERROR) << "Kernel allocation related trace events are not supported.";
    464     return false;
    465   }
    466   args.push_back("-a");
    467   args.insert(args.end(), record_args.begin(), record_args.end());
    468   std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
    469   if (record_cmd == nullptr) {
    470     LOG(ERROR) << "record command isn't available";
    471     return false;
    472   }
    473   return record_cmd->Run(args);
    474 }
    475 
    476 bool KmemCommand::ReportKmemInfo() {
    477   if (!PrepareToBuildSampleTree()) {
    478     return false;
    479   }
    480   record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
    481   if (record_file_reader_ == nullptr) {
    482     return false;
    483   }
    484   ReadEventAttrsFromRecordFile();
    485   if (!ReadFeaturesFromRecordFile()) {
    486     return false;
    487   }
    488   if (!ReadSampleTreeFromRecordFile()) {
    489     return false;
    490   }
    491   if (!PrintReport()) {
    492     return false;
    493   }
    494   return true;
    495 }
    496 
    497 bool KmemCommand::PrepareToBuildSampleTree() {
    498   if (use_slab_) {
    499     if (slab_sort_keys_.empty()) {
    500       slab_sort_keys_ = {"hit",         "caller",   "bytes_req",
    501                          "bytes_alloc", "fragment", "pingpong"};
    502     }
    503     SampleComparator<SlabSample> comparator;
    504     SampleComparator<SlabSample> sort_comparator;
    505     SampleDisplayer<SlabSample, SlabSampleTree> displayer;
    506     std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
    507 
    508     if (print_callgraph_) {
    509       displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer());
    510     }
    511 
    512     for (const auto& key : slab_sort_keys_) {
    513       if (key == "hit") {
    514         sort_comparator.AddCompareFunction(CompareSampleCount);
    515         displayer.AddDisplayFunction(accumulated_name + "Hit",
    516                                      DisplaySampleCount);
    517       } else if (key == "caller") {
    518         comparator.AddCompareFunction(CompareSymbol);
    519         displayer.AddDisplayFunction("Caller", DisplaySymbol);
    520       } else if (key == "ptr") {
    521         comparator.AddCompareFunction(ComparePtr);
    522         displayer.AddDisplayFunction("Ptr", DisplayPtr);
    523       } else if (key == "bytes_req") {
    524         sort_comparator.AddCompareFunction(CompareBytesReq);
    525         displayer.AddDisplayFunction(accumulated_name + "BytesReq",
    526                                      DisplayBytesReq);
    527       } else if (key == "bytes_alloc") {
    528         sort_comparator.AddCompareFunction(CompareBytesAlloc);
    529         displayer.AddDisplayFunction(accumulated_name + "BytesAlloc",
    530                                      DisplayBytesAlloc);
    531       } else if (key == "fragment") {
    532         sort_comparator.AddCompareFunction(CompareFragment);
    533         displayer.AddDisplayFunction(accumulated_name + "Fragment",
    534                                      DisplayFragment);
    535       } else if (key == "gfp_flags") {
    536         comparator.AddCompareFunction(CompareGfpFlags);
    537         displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
    538       } else if (key == "pingpong") {
    539         sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
    540         displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
    541       } else {
    542         LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
    543         return false;
    544       }
    545       slab_sample_tree_builder_.reset(
    546           new SlabSampleTreeBuilder(comparator, &thread_tree_));
    547       slab_sample_tree_builder_->SetCallChainSampleOptions(
    548           accumulate_callchain_, print_callgraph_, !callgraph_show_callee_);
    549       sort_comparator.AddComparator(comparator);
    550       slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
    551       slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
    552     }
    553   }
    554   return true;
    555 }
    556 
    557 void KmemCommand::ReadEventAttrsFromRecordFile() {
    558   std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
    559   for (const auto& attr_with_id : attrs) {
    560     EventAttrWithName attr;
    561     attr.attr = *attr_with_id.attr;
    562     attr.event_ids = attr_with_id.ids;
    563     attr.name = GetEventNameByAttr(attr.attr);
    564     event_attrs_.push_back(attr);
    565   }
    566 }
    567 
    568 bool KmemCommand::ReadFeaturesFromRecordFile() {
    569   record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
    570   std::string arch =
    571       record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
    572   if (!arch.empty()) {
    573     record_file_arch_ = GetArchType(arch);
    574     if (record_file_arch_ == ARCH_UNSUPPORTED) {
    575       return false;
    576     }
    577   }
    578   std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
    579   if (!cmdline.empty()) {
    580     record_cmdline_ = android::base::Join(cmdline, ' ');
    581   }
    582   if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
    583     std::vector<char> tracing_data;
    584     if (!record_file_reader_->ReadFeatureSection(
    585             PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) {
    586       return false;
    587     }
    588     ProcessTracingData(tracing_data);
    589   }
    590   return true;
    591 }
    592 
    593 bool KmemCommand::ReadSampleTreeFromRecordFile() {
    594   if (!record_file_reader_->ReadDataSection(
    595           [this](std::unique_ptr<Record> record) {
    596             return ProcessRecord(std::move(record));
    597           })) {
    598     return false;
    599   }
    600   if (use_slab_) {
    601     slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
    602     slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
    603   }
    604   return true;
    605 }
    606 
    607 bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
    608   thread_tree_.Update(*record);
    609   if (record->type() == PERF_RECORD_SAMPLE) {
    610     if (use_slab_) {
    611       slab_sample_tree_builder_->ProcessSampleRecord(
    612           *static_cast<const SampleRecord*>(record.get()));
    613     }
    614   } else if (record->type() == PERF_RECORD_TRACING_DATA) {
    615     const auto& r = *static_cast<TracingDataRecord*>(record.get());
    616     ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size));
    617   }
    618   return true;
    619 }
    620 
    621 void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
    622   Tracing tracing(data);
    623   for (auto& attr : event_attrs_) {
    624     if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
    625       uint64_t trace_event_id = attr.attr.config;
    626       attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
    627       TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
    628       if (use_slab_) {
    629         if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
    630             format.name == "kmalloc_node" ||
    631             format.name == "kmem_cache_alloc_node") {
    632           SlabFormat f;
    633           f.type = SlabFormat::KMEM_ALLOC;
    634           format.GetField("call_site", f.call_site);
    635           format.GetField("ptr", f.ptr);
    636           format.GetField("bytes_req", f.bytes_req);
    637           format.GetField("bytes_alloc", f.bytes_alloc);
    638           format.GetField("gfp_flags", f.gfp_flags);
    639           slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
    640         } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
    641           SlabFormat f;
    642           f.type = SlabFormat::KMEM_FREE;
    643           format.GetField("call_site", f.call_site);
    644           format.GetField("ptr", f.ptr);
    645           slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
    646         }
    647       }
    648     }
    649   }
    650 }
    651 
    652 bool KmemCommand::PrintReport() {
    653   std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
    654   FILE* report_fp = stdout;
    655   if (!report_filename_.empty()) {
    656     file_handler.reset(fopen(report_filename_.c_str(), "w"));
    657     if (file_handler == nullptr) {
    658       PLOG(ERROR) << "failed to open " << report_filename_;
    659       return false;
    660     }
    661     report_fp = file_handler.get();
    662   }
    663   PrintReportContext(report_fp);
    664   if (use_slab_) {
    665     fprintf(report_fp, "\n\n");
    666     PrintSlabReportContext(report_fp);
    667     slab_sample_tree_displayer_->DisplaySamples(
    668         report_fp, slab_sample_tree_.samples, &slab_sample_tree_);
    669   }
    670   return true;
    671 }
    672 
    673 void KmemCommand::PrintReportContext(FILE* fp) {
    674   if (!record_cmdline_.empty()) {
    675     fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
    676   }
    677   fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
    678   for (const auto& attr : event_attrs_) {
    679     fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(),
    680             attr.attr.type, attr.attr.config);
    681   }
    682 }
    683 
    684 void KmemCommand::PrintSlabReportContext(FILE* fp) {
    685   fprintf(fp, "Slab allocation information:\n");
    686   fprintf(fp, "Total requested bytes: %" PRIu64 "\n",
    687           slab_sample_tree_.total_requested_bytes);
    688   fprintf(fp, "Total allocated bytes: %" PRIu64 "\n",
    689           slab_sample_tree_.total_allocated_bytes);
    690   uint64_t fragment = slab_sample_tree_.total_allocated_bytes -
    691                       slab_sample_tree_.total_requested_bytes;
    692   double percentage = 0.0;
    693   if (slab_sample_tree_.total_allocated_bytes != 0) {
    694     percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
    695   }
    696   fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
    697   fprintf(fp, "Total allocations: %" PRIu64 "\n",
    698           slab_sample_tree_.nr_allocations);
    699   fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
    700   percentage = 0.0;
    701   if (slab_sample_tree_.nr_allocations != 0) {
    702     percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations /
    703                  slab_sample_tree_.nr_allocations;
    704   }
    705   fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
    706           slab_sample_tree_.nr_cross_cpu_allocations, percentage);
    707   fprintf(fp, "\n");
    708 }
    709 
    710 }  // namespace
    711 
    712 void RegisterKmemCommand() {
    713   RegisterCommand("kmem",
    714                   [] { return std::unique_ptr<Command>(new KmemCommand()); });
    715 }
    716