Home | History | Annotate | Download | only in simpleperf
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef SIMPLE_PERF_SAMPLE_TREE_H_
     18 #define SIMPLE_PERF_SAMPLE_TREE_H_
     19 
     20 #include <unordered_map>
     21 
     22 #include "callchain.h"
     23 #include "OfflineUnwinder.h"
     24 #include "perf_regs.h"
     25 #include "record.h"
     26 #include "SampleComparator.h"
     27 #include "SampleDisplayer.h"
     28 #include "thread_tree.h"
     29 
     30 using namespace simpleperf;
     31 
     32 // A SampleTree is a collection of samples. A profiling report is mainly about
     33 // constructing a SampleTree and display it. There are three steps involved:
     34 // build the tree, sort the tree, and display it. For example, if we want to
     35 // show how many cpu-cycles are spent in different functions, we should do as
     36 // follows:
     37 // 1. Build a SampleTree from SampleRecords with each sample containing
     38 //    (cpu-cycles, function name). When building the tree, we should merge
     39 //    samples containing the same function name.
     40 // 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
     41 //    samples in a decreasing order of cpu-cycles, we should sort it like this.
     42 // 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
     43 //    pair.
     44 //
     45 // We represent the three steps with three template classes.
     46 // 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
     47 //    SampleTreeBuilder's constructor decides the property of samples should be
     48 //    merged together.
     49 // 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
     50 //    sorted by SampleTreeSorter. The sort result decides the order to show
     51 //    samples.
     52 // 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
     53 //    displays each sample in the SampleTree.
     54 
     55 template <typename EntryT, typename AccumulateInfoT>
     56 class SampleTreeBuilder {
     57  public:
     58   explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator)
     59       : sample_set_(comparator),
     60         accumulate_callchain_(false),
     61         sample_comparator_(comparator),
     62         callchain_sample_set_(comparator),
     63         use_branch_address_(false),
     64         build_callchain_(false),
     65         use_caller_as_callchain_root_(false) {}
     66 
     67   virtual ~SampleTreeBuilder() {}
     68 
     69   void SetBranchSampleOption(bool use_branch_address) {
     70     use_branch_address_ = use_branch_address;
     71   }
     72 
     73   void SetCallChainSampleOptions(bool accumulate_callchain,
     74                                  bool build_callchain,
     75                                  bool use_caller_as_callchain_root) {
     76     accumulate_callchain_ = accumulate_callchain;
     77     build_callchain_ = build_callchain;
     78     use_caller_as_callchain_root_ = use_caller_as_callchain_root;
     79     if (accumulate_callchain_) {
     80       offline_unwinder_.reset(new OfflineUnwinder(false));
     81     }
     82   }
     83 
     84   void ProcessSampleRecord(const SampleRecord& r) {
     85     if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
     86       for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
     87         auto& item = r.branch_stack_data.stack[i];
     88         if (item.from != 0 && item.to != 0) {
     89           CreateBranchSample(r, item);
     90         }
     91       }
     92       return;
     93     }
     94     bool in_kernel = r.InKernel();
     95     AccumulateInfoT acc_info;
     96     EntryT* sample = CreateSample(r, in_kernel, &acc_info);
     97     if (sample == nullptr) {
     98       return;
     99     }
    100     if (accumulate_callchain_) {
    101       std::vector<uint64_t> ips;
    102       if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
    103         ips.insert(ips.end(), r.callchain_data.ips,
    104                    r.callchain_data.ips + r.callchain_data.ip_nr);
    105       }
    106       const ThreadEntry* thread = GetThreadOfSample(sample);
    107       // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
    108       // make up for the missing kernel patch in N9. See b/22612370.
    109       if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
    110           (r.regs_user_data.reg_mask != 0) &&
    111           (r.sample_type & PERF_SAMPLE_STACK_USER) &&
    112           (r.GetValidStackSize() > 0)) {
    113         RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
    114         std::vector<uint64_t> user_ips;
    115         std::vector<uint64_t> sps;
    116         if (offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
    117                                                r.GetValidStackSize(), &user_ips, &sps)) {
    118           ips.push_back(PERF_CONTEXT_USER);
    119           ips.insert(ips.end(), user_ips.begin(), user_ips.end());
    120         }
    121       }
    122 
    123       std::vector<EntryT*> callchain;
    124       callchain.push_back(sample);
    125 
    126       bool first_ip = true;
    127       for (auto& ip : ips) {
    128         if (ip >= PERF_CONTEXT_MAX) {
    129           switch (ip) {
    130             case PERF_CONTEXT_KERNEL:
    131               in_kernel = true;
    132               break;
    133             case PERF_CONTEXT_USER:
    134               in_kernel = false;
    135               break;
    136             default:
    137               LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
    138           }
    139         } else {
    140           if (first_ip) {
    141             first_ip = false;
    142             // Remove duplication with sampled ip.
    143             if (ip == r.ip_data.ip) {
    144               continue;
    145             }
    146           }
    147           EntryT* callchain_sample =
    148               CreateCallChainSample(sample, ip, in_kernel, callchain, acc_info);
    149           if (callchain_sample == nullptr) {
    150             break;
    151           }
    152           callchain.push_back(callchain_sample);
    153         }
    154       }
    155 
    156       if (build_callchain_) {
    157         std::set<EntryT*> added_set;
    158         if (use_caller_as_callchain_root_) {
    159           std::reverse(callchain.begin(), callchain.end());
    160         }
    161         EntryT* parent = nullptr;
    162         while (callchain.size() >= 2) {
    163           EntryT* sample = callchain[0];
    164           callchain.erase(callchain.begin());
    165           // Add only once for recursive calls on callchain.
    166           if (added_set.find(sample) != added_set.end()) {
    167             continue;
    168           }
    169           added_set.insert(sample);
    170           InsertCallChainForSample(sample, callchain, acc_info);
    171           UpdateCallChainParentInfo(sample, parent);
    172           parent = sample;
    173         }
    174       }
    175     }
    176   }
    177 
    178   std::vector<EntryT*> GetSamples() const {
    179     std::vector<EntryT*> result;
    180     for (auto& entry : sample_set_) {
    181       result.push_back(entry);
    182     }
    183     return result;
    184   }
    185 
    186  protected:
    187   virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
    188                                AccumulateInfoT* acc_info) = 0;
    189   virtual EntryT* CreateBranchSample(const SampleRecord& r,
    190                                      const BranchStackItemType& item) = 0;
    191   virtual EntryT* CreateCallChainSample(const EntryT* sample, uint64_t ip,
    192                                         bool in_kernel,
    193                                         const std::vector<EntryT*>& callchain,
    194                                         const AccumulateInfoT& acc_info) = 0;
    195   virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
    196   virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
    197   virtual bool FilterSample(const EntryT*) { return true; }
    198 
    199   virtual void UpdateSummary(const EntryT*) {}
    200 
    201   virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
    202 
    203   EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
    204     if (sample == nullptr || !FilterSample(sample.get())) {
    205       return nullptr;
    206     }
    207     UpdateSummary(sample.get());
    208     EntryT* result;
    209     auto it = sample_set_.find(sample.get());
    210     if (it == sample_set_.end()) {
    211       result = sample.get();
    212       sample_set_.insert(sample.get());
    213       sample_storage_.push_back(std::move(sample));
    214     } else {
    215       result = *it;
    216       MergeSample(*it, sample.get());
    217     }
    218     return result;
    219   }
    220 
    221   EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
    222                                 const std::vector<EntryT*>& callchain) {
    223     if (sample == nullptr) {
    224       return nullptr;
    225     }
    226     if (!FilterSample(sample.get())) {
    227       // Store in callchain_sample_set_ for use in other EntryT's callchain.
    228       auto it = callchain_sample_set_.find(sample.get());
    229       if (it != callchain_sample_set_.end()) {
    230         return *it;
    231       }
    232       EntryT* result = sample.get();
    233       callchain_sample_set_.insert(sample.get());
    234       sample_storage_.push_back(std::move(sample));
    235       return result;
    236     }
    237 
    238     auto it = sample_set_.find(sample.get());
    239     if (it != sample_set_.end()) {
    240       EntryT* sample = *it;
    241       // Process only once for recursive function call.
    242       if (std::find(callchain.begin(), callchain.end(), sample) !=
    243           callchain.end()) {
    244         return sample;
    245       }
    246     }
    247     return InsertSample(std::move(sample));
    248   }
    249 
    250   void InsertCallChainForSample(EntryT* sample,
    251                                 const std::vector<EntryT*>& callchain,
    252                                 const AccumulateInfoT& acc_info) {
    253     uint64_t period = GetPeriodForCallChain(acc_info);
    254     sample->callchain.AddCallChain(
    255         callchain, period, [&](const EntryT* s1, const EntryT* s2) {
    256           return sample_comparator_.IsSameSample(s1, s2);
    257         });
    258   }
    259 
    260   void AddCallChainDuplicateInfo() {
    261     if (build_callchain_) {
    262       for (EntryT* sample : sample_set_) {
    263         auto it = callchain_parent_map_.find(sample);
    264         if (it != callchain_parent_map_.end() && !it->second.has_multiple_parents) {
    265           sample->callchain.duplicated = true;
    266         }
    267       }
    268     }
    269   }
    270 
    271   std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
    272   bool accumulate_callchain_;
    273 
    274  private:
    275   void UpdateCallChainParentInfo(EntryT* sample, EntryT* parent) {
    276     if (parent == nullptr) {
    277       return;
    278     }
    279     auto it = callchain_parent_map_.find(sample);
    280     if (it == callchain_parent_map_.end()) {
    281       CallChainParentInfo info;
    282       info.parent = parent;
    283       info.has_multiple_parents = false;
    284       callchain_parent_map_[sample] = info;
    285     } else if (it->second.parent != parent) {
    286       it->second.has_multiple_parents = true;
    287     }
    288   }
    289 
    290   const SampleComparator<EntryT> sample_comparator_;
    291   // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
    292   // and only used in other EntryT's callchain.
    293   std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
    294   std::vector<std::unique_ptr<EntryT>> sample_storage_;
    295 
    296   struct CallChainParentInfo {
    297     EntryT* parent;
    298     bool has_multiple_parents;
    299   };
    300   std::unordered_map<EntryT*, CallChainParentInfo> callchain_parent_map_;
    301 
    302   bool use_branch_address_;
    303   bool build_callchain_;
    304   bool use_caller_as_callchain_root_;
    305   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
    306 };
    307 
    308 template <typename EntryT>
    309 class SampleTreeSorter {
    310  public:
    311   explicit SampleTreeSorter(SampleComparator<EntryT> comparator)
    312       : comparator_(comparator) {}
    313 
    314   virtual ~SampleTreeSorter() {}
    315 
    316   void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
    317     if (sort_callchain) {
    318       for (auto& sample : v) {
    319         SortCallChain(sample);
    320       }
    321     }
    322     if (!comparator_.empty()) {
    323       std::sort(v.begin(), v.end(), [this](const EntryT* s1, const EntryT* s2) {
    324         return comparator_(s1, s2);
    325       });
    326     }
    327   }
    328 
    329  protected:
    330   void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
    331 
    332  private:
    333   SampleComparator<EntryT> comparator_;
    334 };
    335 
    336 template <typename EntryT, typename InfoT>
    337 class SampleTreeDisplayer {
    338  public:
    339   explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer)
    340       : displayer_(displayer) {}
    341 
    342   virtual ~SampleTreeDisplayer() {}
    343 
    344   void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples,
    345                       const InfoT* info) {
    346     displayer_.SetInfo(info);
    347     for (const auto& sample : samples) {
    348       displayer_.AdjustWidth(sample);
    349     }
    350     displayer_.PrintNames(fp);
    351     for (const auto& sample : samples) {
    352       displayer_.PrintSample(fp, sample);
    353     }
    354   }
    355 
    356  private:
    357   SampleDisplayer<EntryT, InfoT> displayer_;
    358 };
    359 
    360 #endif  // SIMPLE_PERF_SAMPLE_TREE_H_
    361