Home | History | Annotate | Download | only in ps
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "src/traced/probes/ps/process_stats_data_source.h"
     18 
     19 #include <stdlib.h>
     20 
     21 #include <algorithm>
     22 #include <utility>
     23 
     24 #include "perfetto/base/file_utils.h"
     25 #include "perfetto/base/metatrace.h"
     26 #include "perfetto/base/scoped_file.h"
     27 #include "perfetto/base/string_splitter.h"
     28 #include "perfetto/base/task_runner.h"
     29 #include "perfetto/base/time.h"
     30 
     31 #include "perfetto/trace/ps/process_stats.pbzero.h"
     32 #include "perfetto/trace/ps/process_tree.pbzero.h"
     33 #include "perfetto/trace/trace_packet.pbzero.h"
     34 
     35 // TODO(primiano): the code in this file assumes that PIDs are never recycled
     36 // and that processes/threads never change names. Neither is always true.
     37 
     38 // The notion of PID in the Linux kernel is a bit confusing.
     39 // - PID: is really the thread id (for the main thread: PID == TID).
     40 // - TGID (thread group ID): is the Unix Process ID (the actual PID).
     41 // - PID == TGID for the main thread: the TID of the main thread is also the PID
     42 //   of the process.
     43 // So, in this file, |pid| might refer to either a process id or a thread id.
     44 
     45 namespace perfetto {
     46 
     47 namespace {
     48 
     49 inline int32_t ParseIntValue(const char* str) {
     50   int32_t ret = 0;
     51   for (;;) {
     52     char c = *(str++);
     53     if (!c)
     54       break;
     55     if (c < '0' || c > '9')
     56       return 0;
     57     ret *= 10;
     58     ret += static_cast<int32_t>(c - '0');
     59   }
     60   return ret;
     61 }
     62 
     63 int32_t ReadNextNumericDir(DIR* dirp) {
     64   while (struct dirent* dir_ent = readdir(dirp)) {
     65     if (dir_ent->d_type != DT_DIR)
     66       continue;
     67     int32_t int_value = ParseIntValue(dir_ent->d_name);
     68     if (int_value)
     69       return int_value;
     70   }
     71   return 0;
     72 }
     73 
     74 inline int ToInt(const std::string& str) {
     75   return atoi(str.c_str());
     76 }
     77 
     78 inline uint32_t ToU32(const char* str) {
     79   return static_cast<uint32_t>(strtol(str, nullptr, 10));
     80 }
     81 
     82 }  // namespace
     83 
     84 // static
     85 constexpr int ProcessStatsDataSource::kTypeId;
     86 
     87 ProcessStatsDataSource::ProcessStatsDataSource(
     88     base::TaskRunner* task_runner,
     89     TracingSessionID session_id,
     90     std::unique_ptr<TraceWriter> writer,
     91     const DataSourceConfig& config)
     92     : ProbesDataSource(session_id, kTypeId),
     93       task_runner_(task_runner),
     94       writer_(std::move(writer)),
     95       record_thread_names_(config.process_stats_config().record_thread_names()),
     96       dump_all_procs_on_start_(
     97           config.process_stats_config().scan_all_processes_on_start()),
     98       weak_factory_(this) {
     99   const auto& ps_config = config.process_stats_config();
    100   const auto& quirks = ps_config.quirks();
    101   enable_on_demand_dumps_ =
    102       (std::find(quirks.begin(), quirks.end(),
    103                  ProcessStatsConfig::DISABLE_ON_DEMAND) == quirks.end());
    104   poll_period_ms_ = ps_config.proc_stats_poll_ms();
    105   if (poll_period_ms_ > 0 && poll_period_ms_ < 100) {
    106     PERFETTO_ILOG("proc_stats_poll_ms %" PRIu32
    107                   " is less than minimum of 100ms. Increasing to 100ms.",
    108                   poll_period_ms_);
    109     poll_period_ms_ = 100;
    110   }
    111 
    112   if (poll_period_ms_ > 0) {
    113     auto proc_stats_ttl_ms = ps_config.proc_stats_cache_ttl_ms();
    114     process_stats_cache_ttl_ticks_ =
    115         std::max(proc_stats_ttl_ms / poll_period_ms_, 1u);
    116   }
    117 }
    118 
    119 ProcessStatsDataSource::~ProcessStatsDataSource() = default;
    120 
    121 void ProcessStatsDataSource::Start() {
    122   if (dump_all_procs_on_start_)
    123     WriteAllProcesses();
    124 
    125   if (poll_period_ms_) {
    126     auto weak_this = GetWeakPtr();
    127     task_runner_->PostTask(std::bind(&ProcessStatsDataSource::Tick, weak_this));
    128   }
    129 }
    130 
    131 base::WeakPtr<ProcessStatsDataSource> ProcessStatsDataSource::GetWeakPtr()
    132     const {
    133   return weak_factory_.GetWeakPtr();
    134 }
    135 
    136 void ProcessStatsDataSource::WriteAllProcesses() {
    137   PERFETTO_METATRACE("WriteAllProcesses", 0);
    138   PERFETTO_DCHECK(!cur_ps_tree_);
    139 
    140   CacheProcFsScanStartTimestamp();
    141 
    142   base::ScopedDir proc_dir = OpenProcDir();
    143   if (!proc_dir)
    144     return;
    145   while (int32_t pid = ReadNextNumericDir(*proc_dir)) {
    146     WriteProcessOrThread(pid);
    147     char task_path[255];
    148     sprintf(task_path, "/proc/%d/task", pid);
    149     base::ScopedDir task_dir(opendir(task_path));
    150     if (!task_dir)
    151       continue;
    152 
    153     while (int32_t tid = ReadNextNumericDir(*task_dir)) {
    154       if (tid == pid)
    155         continue;
    156       if (record_thread_names_) {
    157         WriteProcessOrThread(tid);
    158       } else {
    159         // If we are not interested in thread names, there is no need to open
    160         // a proc file for each thread. We can save time and directly write the
    161         // thread record.
    162         WriteThread(tid, pid, /*optional_name=*/nullptr);
    163       }
    164     }
    165   }
    166   FinalizeCurPacket();
    167 }
    168 
    169 void ProcessStatsDataSource::OnPids(const std::vector<int32_t>& pids) {
    170   PERFETTO_METATRACE("OnPids", 0);
    171   if (!enable_on_demand_dumps_)
    172     return;
    173   PERFETTO_DCHECK(!cur_ps_tree_);
    174   for (int32_t pid : pids) {
    175     if (seen_pids_.count(pid) || pid == 0)
    176       continue;
    177     WriteProcessOrThread(pid);
    178   }
    179   FinalizeCurPacket();
    180 }
    181 
    182 void ProcessStatsDataSource::OnRenamePids(const std::vector<int32_t>& pids) {
    183   PERFETTO_METATRACE("OnRenamePids", 0);
    184   if (!enable_on_demand_dumps_)
    185     return;
    186   PERFETTO_DCHECK(!cur_ps_tree_);
    187   for (int32_t pid : pids) {
    188     auto pid_it = seen_pids_.find(pid);
    189     if (pid_it == seen_pids_.end())
    190       continue;
    191     seen_pids_.erase(pid_it);
    192   }
    193 }
    194 
    195 void ProcessStatsDataSource::Flush(FlushRequestID,
    196                                    std::function<void()> callback) {
    197   // We shouldn't get this in the middle of WriteAllProcesses() or OnPids().
    198   PERFETTO_DCHECK(!cur_ps_tree_);
    199   PERFETTO_DCHECK(!cur_ps_stats_);
    200   PERFETTO_DCHECK(!cur_ps_stats_process_);
    201   writer_->Flush(callback);
    202 }
    203 
    204 void ProcessStatsDataSource::WriteProcessOrThread(int32_t pid) {
    205   // In case we're called from outside WriteAllProcesses()
    206   CacheProcFsScanStartTimestamp();
    207 
    208   std::string proc_status = ReadProcPidFile(pid, "status");
    209   if (proc_status.empty())
    210     return;
    211   int tgid = ToInt(ReadProcStatusEntry(proc_status, "Tgid:"));
    212   if (tgid <= 0)
    213     return;
    214   if (!seen_pids_.count(tgid))
    215     WriteProcess(tgid, proc_status);
    216   if (pid != tgid) {
    217     PERFETTO_DCHECK(!seen_pids_.count(pid));
    218     std::string thread_name;
    219     if (record_thread_names_)
    220       thread_name = ReadProcStatusEntry(proc_status, "Name:");
    221     WriteThread(pid, tgid, thread_name.empty() ? nullptr : thread_name.c_str());
    222   }
    223 }
    224 
    225 void ProcessStatsDataSource::WriteProcess(int32_t pid,
    226                                           const std::string& proc_status) {
    227   PERFETTO_DCHECK(ToInt(ReadProcStatusEntry(proc_status, "Tgid:")) == pid);
    228   auto* proc = GetOrCreatePsTree()->add_processes();
    229   proc->set_pid(pid);
    230   proc->set_ppid(ToInt(ReadProcStatusEntry(proc_status, "PPid:")));
    231 
    232   std::string cmdline = ReadProcPidFile(pid, "cmdline");
    233   if (!cmdline.empty()) {
    234     using base::StringSplitter;
    235     for (StringSplitter ss(&cmdline[0], cmdline.size(), '\0'); ss.Next();)
    236       proc->add_cmdline(ss.cur_token());
    237   } else {
    238     // Nothing in cmdline so use the thread name instead (which is == "comm").
    239     proc->add_cmdline(ReadProcStatusEntry(proc_status, "Name:").c_str());
    240   }
    241   seen_pids_.emplace(pid);
    242 }
    243 
    244 void ProcessStatsDataSource::WriteThread(int32_t tid,
    245                                          int32_t tgid,
    246                                          const char* optional_name) {
    247   auto* thread = GetOrCreatePsTree()->add_threads();
    248   thread->set_tid(tid);
    249   thread->set_tgid(tgid);
    250   if (optional_name)
    251     thread->set_name(optional_name);
    252   seen_pids_.emplace(tid);
    253 }
    254 
    255 base::ScopedDir ProcessStatsDataSource::OpenProcDir() {
    256   base::ScopedDir proc_dir(opendir("/proc"));
    257   if (!proc_dir)
    258     PERFETTO_PLOG("Failed to opendir(/proc)");
    259   return proc_dir;
    260 }
    261 
    262 std::string ProcessStatsDataSource::ReadProcPidFile(int32_t pid,
    263                                                     const std::string& file) {
    264   std::string contents;
    265   contents.reserve(4096);
    266   if (!base::ReadFile("/proc/" + std::to_string(pid) + "/" + file, &contents))
    267     return "";
    268   return contents;
    269 }
    270 
    271 std::string ProcessStatsDataSource::ReadProcStatusEntry(const std::string& buf,
    272                                                         const char* key) {
    273   auto begin = buf.find(key);
    274   if (begin == std::string::npos)
    275     return "";
    276   begin = buf.find_first_not_of(" \t", begin + strlen(key));
    277   if (begin == std::string::npos)
    278     return "";
    279   auto end = buf.find('\n', begin);
    280   if (end == std::string::npos || end <= begin)
    281     return "";
    282   return buf.substr(begin, end - begin);
    283 }
    284 
    285 void ProcessStatsDataSource::StartNewPacketIfNeeded() {
    286   if (cur_packet_)
    287     return;
    288   cur_packet_ = writer_->NewTracePacket();
    289   cur_packet_->set_timestamp(CacheProcFsScanStartTimestamp());
    290 
    291   if (did_clear_incremental_state_) {
    292     cur_packet_->set_incremental_state_cleared(true);
    293     did_clear_incremental_state_ = false;
    294   }
    295 }
    296 
    297 protos::pbzero::ProcessTree* ProcessStatsDataSource::GetOrCreatePsTree() {
    298   StartNewPacketIfNeeded();
    299   if (!cur_ps_tree_)
    300     cur_ps_tree_ = cur_packet_->set_process_tree();
    301   cur_ps_stats_ = nullptr;
    302   cur_ps_stats_process_ = nullptr;
    303   return cur_ps_tree_;
    304 }
    305 
    306 protos::pbzero::ProcessStats* ProcessStatsDataSource::GetOrCreateStats() {
    307   StartNewPacketIfNeeded();
    308   if (!cur_ps_stats_)
    309     cur_ps_stats_ = cur_packet_->set_process_stats();
    310   cur_ps_tree_ = nullptr;
    311   cur_ps_stats_process_ = nullptr;
    312   return cur_ps_stats_;
    313 }
    314 
    315 protos::pbzero::ProcessStats_Process*
    316 ProcessStatsDataSource::GetOrCreateStatsProcess(int32_t pid) {
    317   if (cur_ps_stats_process_)
    318     return cur_ps_stats_process_;
    319   cur_ps_stats_process_ = GetOrCreateStats()->add_processes();
    320   cur_ps_stats_process_->set_pid(pid);
    321   return cur_ps_stats_process_;
    322 }
    323 
    324 void ProcessStatsDataSource::FinalizeCurPacket() {
    325   PERFETTO_DCHECK(!cur_ps_tree_ || cur_packet_);
    326   PERFETTO_DCHECK(!cur_ps_stats_ || cur_packet_);
    327   uint64_t now = static_cast<uint64_t>(base::GetBootTimeNs().count());
    328   if (cur_ps_tree_) {
    329     cur_ps_tree_->set_collection_end_timestamp(now);
    330     cur_ps_tree_ = nullptr;
    331   }
    332   if (cur_ps_stats_) {
    333     cur_ps_stats_->set_collection_end_timestamp(now);
    334     cur_ps_stats_ = nullptr;
    335   }
    336   cur_ps_stats_process_ = nullptr;
    337   cur_procfs_scan_start_timestamp_ = 0;
    338   cur_packet_ = TraceWriter::TracePacketHandle{};
    339 }
    340 
    341 // static
    342 void ProcessStatsDataSource::Tick(
    343     base::WeakPtr<ProcessStatsDataSource> weak_this) {
    344   if (!weak_this)
    345     return;
    346   ProcessStatsDataSource& thiz = *weak_this;
    347   uint32_t period_ms = thiz.poll_period_ms_;
    348   uint32_t delay_ms = period_ms - (base::GetWallTimeMs().count() % period_ms);
    349   thiz.task_runner_->PostDelayedTask(
    350       std::bind(&ProcessStatsDataSource::Tick, weak_this), delay_ms);
    351   thiz.WriteAllProcessStats();
    352 
    353   // We clear the cache every process_stats_cache_ttl_ticks_ ticks.
    354   if (++thiz.cache_ticks_ == thiz.process_stats_cache_ttl_ticks_) {
    355     thiz.cache_ticks_ = 0;
    356     thiz.process_stats_cache_.clear();
    357   }
    358 }
    359 
    360 void ProcessStatsDataSource::WriteAllProcessStats() {
    361   // TODO(primiano): implement whitelisting of processes by names.
    362   // TODO(primiano): Have a pid cache to avoid wasting cycles reading kthreads
    363   // proc files over and over. Same for non-whitelist processes (see above).
    364 
    365   CacheProcFsScanStartTimestamp();
    366   PERFETTO_METATRACE("WriteAllProcessStats", 0);
    367   base::ScopedDir proc_dir = OpenProcDir();
    368   if (!proc_dir)
    369     return;
    370   std::vector<int32_t> pids;
    371   while (int32_t pid = ReadNextNumericDir(*proc_dir)) {
    372     cur_ps_stats_process_ = nullptr;
    373 
    374     uint32_t pid_u = static_cast<uint32_t>(pid);
    375     if (skip_stats_for_pids_.size() > pid_u && skip_stats_for_pids_[pid_u])
    376       continue;
    377 
    378     std::string proc_status = ReadProcPidFile(pid, "status");
    379     if (proc_status.empty())
    380       continue;
    381 
    382     if (!WriteMemCounters(pid, proc_status)) {
    383       // If WriteMemCounters() fails the pid is very likely a kernel thread
    384       // that has a valid /proc/[pid]/status but no memory values. In this
    385       // case avoid keep polling it over and over.
    386       if (skip_stats_for_pids_.size() <= pid_u)
    387         skip_stats_for_pids_.resize(pid_u + 1);
    388       skip_stats_for_pids_[pid_u] = true;
    389       continue;
    390     }
    391 
    392     std::string oom_score_adj = ReadProcPidFile(pid, "oom_score_adj");
    393     if (!oom_score_adj.empty()) {
    394       CachedProcessStats& cached = process_stats_cache_[pid];
    395       auto counter = ToInt(oom_score_adj);
    396       if (counter != cached.oom_score_adj) {
    397         GetOrCreateStatsProcess(pid)->set_oom_score_adj(counter);
    398         cached.oom_score_adj = counter;
    399       }
    400     }
    401 
    402     pids.push_back(pid);
    403   }
    404   FinalizeCurPacket();
    405 
    406   // Ensure that we write once long-term process info (e.g., name) for new pids
    407   // that we haven't seen before.
    408   OnPids(pids);
    409 }
    410 
    411 // Returns true if the stats for the given |pid| have been written, false it
    412 // it failed (e.g., |pid| was a kernel thread and, as such, didn't report any
    413 // memory counters).
    414 bool ProcessStatsDataSource::WriteMemCounters(int32_t pid,
    415                                               const std::string& proc_status) {
    416   bool proc_status_has_mem_counters = false;
    417   CachedProcessStats& cached = process_stats_cache_[pid];
    418 
    419   // Parse /proc/[pid]/status, which looks like this:
    420   // Name:   cat
    421   // Umask:  0027
    422   // State:  R (running)
    423   // FDSize: 256
    424   // Groups: 4 20 24 46 997
    425   // VmPeak:     5992 kB
    426   // VmSize:     5992 kB
    427   // VmLck:         0 kB
    428   // ...
    429   std::vector<char> key;
    430   std::vector<char> value;
    431   enum { kKey, kSeparator, kValue } state = kKey;
    432   for (char c : proc_status) {
    433     if (c == '\n') {
    434       key.push_back('\0');
    435       value.push_back('\0');
    436 
    437       // |value| will contain "1234 KB". We rely on strtol() (in ToU32()) to
    438       // stop parsing at the first non-numeric character.
    439       if (strcmp(key.data(), "VmSize") == 0) {
    440         // Assume that if we see VmSize we'll see also the others.
    441         proc_status_has_mem_counters = true;
    442 
    443         auto counter = ToU32(value.data());
    444         if (counter != cached.vm_size_kb) {
    445           GetOrCreateStatsProcess(pid)->set_vm_size_kb(counter);
    446           cached.vm_size_kb = counter;
    447         }
    448       } else if (strcmp(key.data(), "VmLck") == 0) {
    449         auto counter = ToU32(value.data());
    450         if (counter != cached.vm_locked_kb) {
    451           GetOrCreateStatsProcess(pid)->set_vm_locked_kb(counter);
    452           cached.vm_locked_kb = counter;
    453         }
    454       } else if (strcmp(key.data(), "VmHWM") == 0) {
    455         auto counter = ToU32(value.data());
    456         if (counter != cached.vm_hvm_kb) {
    457           GetOrCreateStatsProcess(pid)->set_vm_hwm_kb(counter);
    458           cached.vm_hvm_kb = counter;
    459         }
    460       } else if (strcmp(key.data(), "VmRSS") == 0) {
    461         auto counter = ToU32(value.data());
    462         if (counter != cached.vm_rss_kb) {
    463           GetOrCreateStatsProcess(pid)->set_vm_rss_kb(counter);
    464           cached.vm_rss_kb = counter;
    465         }
    466       } else if (strcmp(key.data(), "RssAnon") == 0) {
    467         auto counter = ToU32(value.data());
    468         if (counter != cached.rss_anon_kb) {
    469           GetOrCreateStatsProcess(pid)->set_rss_anon_kb(counter);
    470           cached.rss_anon_kb = counter;
    471         }
    472       } else if (strcmp(key.data(), "RssFile") == 0) {
    473         auto counter = ToU32(value.data());
    474         if (counter != cached.rss_file_kb) {
    475           GetOrCreateStatsProcess(pid)->set_rss_file_kb(counter);
    476           cached.rss_file_kb = counter;
    477         }
    478       } else if (strcmp(key.data(), "RssShmem") == 0) {
    479         auto counter = ToU32(value.data());
    480         if (counter != cached.rss_shmem_kb) {
    481           GetOrCreateStatsProcess(pid)->set_rss_shmem_kb(counter);
    482           cached.rss_shmem_kb = counter;
    483         }
    484       } else if (strcmp(key.data(), "VmSwap") == 0) {
    485         auto counter = ToU32(value.data());
    486         if (counter != cached.vm_swap_kb) {
    487           GetOrCreateStatsProcess(pid)->set_vm_swap_kb(counter);
    488           cached.vm_swap_kb = counter;
    489         }
    490       }
    491 
    492       key.clear();
    493       state = kKey;
    494       continue;
    495     }
    496 
    497     if (state == kKey) {
    498       if (c == ':') {
    499         state = kSeparator;
    500         continue;
    501       }
    502       key.push_back(c);
    503       continue;
    504     }
    505 
    506     if (state == kSeparator) {
    507       if (isspace(c))
    508         continue;
    509       value.clear();
    510       value.push_back(c);
    511       state = kValue;
    512       continue;
    513     }
    514 
    515     if (state == kValue) {
    516       value.push_back(c);
    517     }
    518   }
    519   return proc_status_has_mem_counters;
    520 }
    521 
    522 uint64_t ProcessStatsDataSource::CacheProcFsScanStartTimestamp() {
    523   if (!cur_procfs_scan_start_timestamp_)
    524     cur_procfs_scan_start_timestamp_ =
    525         static_cast<uint64_t>(base::GetBootTimeNs().count());
    526   return cur_procfs_scan_start_timestamp_;
    527 }
    528 
    529 void ProcessStatsDataSource::ClearIncrementalState() {
    530   PERFETTO_DLOG("ProcessStatsDataSource clearing incremental state.");
    531   seen_pids_.clear();
    532   skip_stats_for_pids_.clear();
    533 
    534   cache_ticks_ = 0;
    535   process_stats_cache_.clear();
    536 
    537   // Set the relevant flag in the next packet.
    538   did_clear_incremental_state_ = true;
    539 }
    540 
    541 }  // namespace perfetto
    542