Home | History | Annotate | Download | only in metricsd
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "metrics_collector.h"
     18 
     19 #include <sysexits.h>
     20 #include <time.h>
     21 
     22 #include <memory>
     23 
     24 #include <base/bind.h>
     25 #include <base/files/file_path.h>
     26 #include <base/files/file_util.h>
     27 #include <base/hash.h>
     28 #include <base/logging.h>
     29 #include <base/strings/string_number_conversions.h>
     30 #include <base/strings/string_split.h>
     31 #include <base/strings/string_util.h>
     32 #include <base/strings/stringprintf.h>
     33 #include <brillo/binder_watcher.h>
     34 #include <brillo/osrelease_reader.h>
     35 
     36 #include "constants.h"
     37 #include "metrics_collector_service_impl.h"
     38 
     39 using base::FilePath;
     40 using base::StringPrintf;
     41 using base::Time;
     42 using base::TimeDelta;
     43 using base::TimeTicks;
     44 using chromeos_metrics::PersistentInteger;
     45 using std::map;
     46 using std::string;
     47 using std::vector;
     48 
     49 namespace {
     50 
     51 const int kSecondsPerMinute = 60;
     52 const int kMinutesPerHour = 60;
     53 const int kHoursPerDay = 24;
     54 const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour;
     55 const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay;
     56 const int kDaysPerWeek = 7;
     57 const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek;
     58 
     59 // Interval between calls to UpdateStats().
     60 const uint32_t kUpdateStatsIntervalMs = 300000;
     61 
     62 const char kKernelCrashDetectedFile[] =
     63     "/data/misc/crash_reporter/run/kernel-crash-detected";
     64 const char kUncleanShutdownDetectedFile[] =
     65     "/var/run/unclean-shutdown-detected";
     66 
     67 const int kMetricMeminfoInterval = 30;    // seconds
     68 
     69 const char kMeminfoFileName[] = "/proc/meminfo";
     70 const char kVmStatFileName[] = "/proc/vmstat";
     71 
     72 const char kWeaveComponent[] = "metrics";
     73 const char kWeaveTrait[] = "_metrics";
     74 
     75 }  // namespace
     76 
     77 // Zram sysfs entries.
     78 
     79 const char MetricsCollector::kComprDataSizeName[] = "compr_data_size";
     80 const char MetricsCollector::kOrigDataSizeName[] = "orig_data_size";
     81 const char MetricsCollector::kZeroPagesName[] = "zero_pages";
     82 
     83 // Memory use stats collection intervals.  We collect some memory use interval
     84 // at these intervals after boot, and we stop collecting after the last one,
     85 // with the assumption that in most cases the memory use won't change much
     86 // after that.
     87 static const int kMemuseIntervals[] = {
     88   1 * kSecondsPerMinute,    // 1 minute mark
     89   4 * kSecondsPerMinute,    // 5 minute mark
     90   25 * kSecondsPerMinute,   // 0.5 hour mark
     91   120 * kSecondsPerMinute,  // 2.5 hour mark
     92   600 * kSecondsPerMinute,  // 12.5 hour mark
     93 };
     94 
     95 MetricsCollector::MetricsCollector()
     96     : memuse_final_time_(0),
     97       memuse_interval_index_(0) {}
     98 
     99 MetricsCollector::~MetricsCollector() {
    100 }
    101 
    102 // static
    103 double MetricsCollector::GetActiveTime() {
    104   struct timespec ts;
    105   int r = clock_gettime(CLOCK_MONOTONIC, &ts);
    106   if (r < 0) {
    107     PLOG(WARNING) << "clock_gettime(CLOCK_MONOTONIC) failed";
    108     return 0;
    109   } else {
    110     return ts.tv_sec + static_cast<double>(ts.tv_nsec) / (1000 * 1000 * 1000);
    111   }
    112 }
    113 
    114 int MetricsCollector::Run() {
    115   if (CheckSystemCrash(kKernelCrashDetectedFile)) {
    116     ProcessKernelCrash();
    117   }
    118 
    119   if (CheckSystemCrash(kUncleanShutdownDetectedFile)) {
    120     ProcessUncleanShutdown();
    121   }
    122 
    123   // On OS version change, clear version stats (which are reported daily).
    124   int32_t version = GetOsVersionHash();
    125   if (version_cycle_->Get() != version) {
    126     version_cycle_->Set(version);
    127     kernel_crashes_version_count_->Set(0);
    128     version_cumulative_active_use_->Set(0);
    129     version_cumulative_cpu_use_->Set(0);
    130   }
    131 
    132   // Start metricscollectorservice
    133   android::sp<BnMetricsCollectorServiceImpl> metrics_collector_service =
    134       new BnMetricsCollectorServiceImpl(this);
    135   android::status_t status = android::defaultServiceManager()->addService(
    136       metrics_collector_service->getInterfaceDescriptor(),
    137       metrics_collector_service);
    138   CHECK(status == android::OK)
    139       << "failed to register service metricscollectorservice";
    140 
    141   // Watch Binder events in the main loop
    142   brillo::BinderWatcher binder_watcher;
    143   CHECK(binder_watcher.Init()) << "Binder FD watcher init failed";
    144   return brillo::Daemon::Run();
    145 }
    146 
    147 uint32_t MetricsCollector::GetOsVersionHash() {
    148   brillo::OsReleaseReader reader;
    149   reader.Load();
    150   string version;
    151   if (!reader.GetString(metrics::kProductVersion, &version)) {
    152     LOG(ERROR) << "failed to read the product version.";
    153     version = metrics::kDefaultVersion;
    154   }
    155 
    156   uint32_t version_hash = base::Hash(version);
    157   if (testing_) {
    158     version_hash = 42;  // return any plausible value for the hash
    159   }
    160   return version_hash;
    161 }
    162 
    163 void MetricsCollector::Init(bool testing, MetricsLibraryInterface* metrics_lib,
    164                             const string& diskstats_path,
    165                             const base::FilePath& private_metrics_directory,
    166                             const base::FilePath& shared_metrics_directory) {
    167   CHECK(metrics_lib);
    168   testing_ = testing;
    169   shared_metrics_directory_ = shared_metrics_directory;
    170   metrics_lib_ = metrics_lib;
    171 
    172   daily_active_use_.reset(new PersistentInteger("Platform.UseTime.PerDay",
    173                                                 private_metrics_directory));
    174   version_cumulative_active_use_.reset(new PersistentInteger(
    175       "Platform.CumulativeUseTime", private_metrics_directory));
    176   version_cumulative_cpu_use_.reset(new PersistentInteger(
    177       "Platform.CumulativeCpuTime", private_metrics_directory));
    178 
    179   kernel_crash_interval_.reset(new PersistentInteger(
    180       "Platform.KernelCrashInterval", private_metrics_directory));
    181   unclean_shutdown_interval_.reset(new PersistentInteger(
    182       "Platform.UncleanShutdownInterval", private_metrics_directory));
    183   user_crash_interval_.reset(new PersistentInteger("Platform.UserCrashInterval",
    184                                                    private_metrics_directory));
    185 
    186   any_crashes_daily_count_.reset(new PersistentInteger(
    187       "Platform.AnyCrashes.PerDay", private_metrics_directory));
    188   any_crashes_weekly_count_.reset(new PersistentInteger(
    189       "Platform.AnyCrashes.PerWeek", private_metrics_directory));
    190   user_crashes_daily_count_.reset(new PersistentInteger(
    191       "Platform.UserCrashes.PerDay", private_metrics_directory));
    192   user_crashes_weekly_count_.reset(new PersistentInteger(
    193       "Platform.UserCrashes.PerWeek", private_metrics_directory));
    194   kernel_crashes_daily_count_.reset(new PersistentInteger(
    195       "Platform.KernelCrashes.PerDay", private_metrics_directory));
    196   kernel_crashes_weekly_count_.reset(new PersistentInteger(
    197       "Platform.KernelCrashes.PerWeek", private_metrics_directory));
    198   kernel_crashes_version_count_.reset(new PersistentInteger(
    199       "Platform.KernelCrashesSinceUpdate", private_metrics_directory));
    200   unclean_shutdowns_daily_count_.reset(new PersistentInteger(
    201       "Platform.UncleanShutdown.PerDay", private_metrics_directory));
    202   unclean_shutdowns_weekly_count_.reset(new PersistentInteger(
    203       "Platform.UncleanShutdowns.PerWeek", private_metrics_directory));
    204 
    205   daily_cycle_.reset(
    206       new PersistentInteger("daily.cycle", private_metrics_directory));
    207   weekly_cycle_.reset(
    208       new PersistentInteger("weekly.cycle", private_metrics_directory));
    209   version_cycle_.reset(
    210       new PersistentInteger("version.cycle", private_metrics_directory));
    211 
    212   disk_usage_collector_.reset(new DiskUsageCollector(metrics_lib_));
    213   averaged_stats_collector_.reset(
    214       new AveragedStatisticsCollector(metrics_lib_, diskstats_path,
    215                                       kVmStatFileName));
    216   cpu_usage_collector_.reset(new CpuUsageCollector(metrics_lib_));
    217 }
    218 
    219 int MetricsCollector::OnInit() {
    220   int return_code = brillo::Daemon::OnInit();
    221   if (return_code != EX_OK)
    222     return return_code;
    223 
    224   StatsReporterInit();
    225 
    226   // Start collecting meminfo stats.
    227   ScheduleMeminfoCallback(kMetricMeminfoInterval);
    228   memuse_final_time_ = GetActiveTime() + kMemuseIntervals[0];
    229   ScheduleMemuseCallback(kMemuseIntervals[0]);
    230 
    231   if (testing_)
    232     return EX_OK;
    233 
    234   weave_service_subscription_ = weaved::Service::Connect(
    235       brillo::MessageLoop::current(),
    236       base::Bind(&MetricsCollector::OnWeaveServiceConnected,
    237                  weak_ptr_factory_.GetWeakPtr()));
    238 
    239   latest_cpu_use_microseconds_ = cpu_usage_collector_->GetCumulativeCpuUse();
    240   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
    241       base::Bind(&MetricsCollector::HandleUpdateStatsTimeout,
    242                  weak_ptr_factory_.GetWeakPtr()),
    243       base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs));
    244 
    245   return EX_OK;
    246 }
    247 
    248 void MetricsCollector::OnWeaveServiceConnected(
    249     const std::weak_ptr<weaved::Service>& service) {
    250   service_ = service;
    251   auto weave_service = service_.lock();
    252   if (!weave_service)
    253     return;
    254 
    255   weave_service->AddComponent(kWeaveComponent, {kWeaveTrait}, nullptr);
    256   weave_service->AddCommandHandler(
    257       kWeaveComponent, kWeaveTrait, "enableAnalyticsReporting",
    258       base::Bind(&MetricsCollector::OnEnableMetrics,
    259                  weak_ptr_factory_.GetWeakPtr()));
    260   weave_service->AddCommandHandler(
    261       kWeaveComponent, kWeaveTrait, "disableAnalyticsReporting",
    262       base::Bind(&MetricsCollector::OnDisableMetrics,
    263                  weak_ptr_factory_.GetWeakPtr()));
    264 
    265   UpdateWeaveState();
    266 }
    267 
    268 void MetricsCollector::OnEnableMetrics(
    269     std::unique_ptr<weaved::Command> command) {
    270   if (base::WriteFile(
    271           shared_metrics_directory_.Append(metrics::kConsentFileName), "", 0) !=
    272       0) {
    273     PLOG(ERROR) << "Could not create the consent file.";
    274     command->Abort("metrics_error", "Could not create the consent file",
    275                    nullptr);
    276     return;
    277   }
    278 
    279   UpdateWeaveState();
    280   command->Complete({}, nullptr);
    281 }
    282 
    283 void MetricsCollector::OnDisableMetrics(
    284     std::unique_ptr<weaved::Command> command) {
    285   if (!base::DeleteFile(
    286           shared_metrics_directory_.Append(metrics::kConsentFileName), false)) {
    287     PLOG(ERROR) << "Could not delete the consent file.";
    288     command->Abort("metrics_error", "Could not delete the consent file",
    289                    nullptr);
    290     return;
    291   }
    292 
    293   UpdateWeaveState();
    294   command->Complete({}, nullptr);
    295 }
    296 
    297 void MetricsCollector::UpdateWeaveState() {
    298   auto weave_service = service_.lock();
    299   if (!weave_service)
    300     return;
    301 
    302   std::string enabled =
    303       metrics_lib_->AreMetricsEnabled() ? "enabled" : "disabled";
    304 
    305   if (!weave_service->SetStateProperty(kWeaveComponent, kWeaveTrait,
    306                                        "analyticsReportingState",
    307                                        *brillo::ToValue(enabled),
    308                                        nullptr)) {
    309     LOG(ERROR) << "failed to update weave's state";
    310   }
    311 }
    312 
    313 void MetricsCollector::ProcessUserCrash() {
    314   // Counts the active time up to now.
    315   UpdateStats(TimeTicks::Now(), Time::Now());
    316 
    317   // Reports the active use time since the last crash and resets it.
    318   SendAndResetCrashIntervalSample(user_crash_interval_);
    319 
    320   any_crashes_daily_count_->Add(1);
    321   any_crashes_weekly_count_->Add(1);
    322   user_crashes_daily_count_->Add(1);
    323   user_crashes_weekly_count_->Add(1);
    324 }
    325 
    326 void MetricsCollector::ProcessKernelCrash() {
    327   // Counts the active time up to now.
    328   UpdateStats(TimeTicks::Now(), Time::Now());
    329 
    330   // Reports the active use time since the last crash and resets it.
    331   SendAndResetCrashIntervalSample(kernel_crash_interval_);
    332 
    333   any_crashes_daily_count_->Add(1);
    334   any_crashes_weekly_count_->Add(1);
    335   kernel_crashes_daily_count_->Add(1);
    336   kernel_crashes_weekly_count_->Add(1);
    337 
    338   kernel_crashes_version_count_->Add(1);
    339 }
    340 
    341 void MetricsCollector::ProcessUncleanShutdown() {
    342   // Counts the active time up to now.
    343   UpdateStats(TimeTicks::Now(), Time::Now());
    344 
    345   // Reports the active use time since the last crash and resets it.
    346   SendAndResetCrashIntervalSample(unclean_shutdown_interval_);
    347 
    348   unclean_shutdowns_daily_count_->Add(1);
    349   unclean_shutdowns_weekly_count_->Add(1);
    350   any_crashes_daily_count_->Add(1);
    351   any_crashes_weekly_count_->Add(1);
    352 }
    353 
    354 bool MetricsCollector::CheckSystemCrash(const string& crash_file) {
    355   FilePath crash_detected(crash_file);
    356   if (!base::PathExists(crash_detected))
    357     return false;
    358 
    359   // Deletes the crash-detected file so that the daemon doesn't report
    360   // another kernel crash in case it's restarted.
    361   base::DeleteFile(crash_detected, false);  // not recursive
    362   return true;
    363 }
    364 
    365 void MetricsCollector::StatsReporterInit() {
    366   disk_usage_collector_->Schedule();
    367 
    368   cpu_usage_collector_->Init();
    369   cpu_usage_collector_->Schedule();
    370 
    371   // Don't start a collection cycle during the first run to avoid delaying the
    372   // boot.
    373   averaged_stats_collector_->ScheduleWait();
    374 }
    375 
    376 void MetricsCollector::ScheduleMeminfoCallback(int wait) {
    377   if (testing_) {
    378     return;
    379   }
    380   base::TimeDelta waitDelta = base::TimeDelta::FromSeconds(wait);
    381   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
    382       base::Bind(&MetricsCollector::MeminfoCallback,
    383                  weak_ptr_factory_.GetWeakPtr(), waitDelta),
    384       waitDelta);
    385 }
    386 
    387 void MetricsCollector::MeminfoCallback(base::TimeDelta wait) {
    388   string meminfo_raw;
    389   const FilePath meminfo_path(kMeminfoFileName);
    390   if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
    391     LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
    392     return;
    393   }
    394   // Make both calls even if the first one fails.
    395   if (ProcessMeminfo(meminfo_raw)) {
    396     base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
    397         base::Bind(&MetricsCollector::MeminfoCallback,
    398                    weak_ptr_factory_.GetWeakPtr(), wait),
    399         wait);
    400   }
    401 }
    402 
    403 // static
    404 bool MetricsCollector::ReadFileToUint64(const base::FilePath& path,
    405                                          uint64_t* value) {
    406   std::string content;
    407   if (!base::ReadFileToString(path, &content)) {
    408     PLOG(WARNING) << "cannot read " << path.MaybeAsASCII();
    409     return false;
    410   }
    411   // Remove final newline.
    412   base::TrimWhitespaceASCII(content, base::TRIM_TRAILING, &content);
    413   if (!base::StringToUint64(content, value)) {
    414     LOG(WARNING) << "invalid integer: " << content;
    415     return false;
    416   }
    417   return true;
    418 }
    419 
    420 bool MetricsCollector::ReportZram(const base::FilePath& zram_dir) {
    421   // Data sizes are in bytes.  |zero_pages| is in number of pages.
    422   uint64_t compr_data_size, orig_data_size, zero_pages;
    423   const size_t page_size = 4096;
    424 
    425   if (!ReadFileToUint64(zram_dir.Append(kComprDataSizeName),
    426                         &compr_data_size) ||
    427       !ReadFileToUint64(zram_dir.Append(kOrigDataSizeName), &orig_data_size) ||
    428       !ReadFileToUint64(zram_dir.Append(kZeroPagesName), &zero_pages)) {
    429     return false;
    430   }
    431 
    432   // |orig_data_size| does not include zero-filled pages.
    433   orig_data_size += zero_pages * page_size;
    434 
    435   const int compr_data_size_mb = compr_data_size >> 20;
    436   const int savings_mb = (orig_data_size - compr_data_size) >> 20;
    437   const int zero_ratio_percent = zero_pages * page_size * 100 / orig_data_size;
    438 
    439   // Report compressed size in megabytes.  100 MB or less has little impact.
    440   SendSample("Platform.ZramCompressedSize", compr_data_size_mb, 100, 4000, 50);
    441   SendSample("Platform.ZramSavings", savings_mb, 100, 4000, 50);
    442   // The compression ratio is multiplied by 100 for better resolution.  The
    443   // ratios of interest are between 1 and 6 (100% and 600% as reported).  We
    444   // don't want samples when very little memory is being compressed.
    445   if (compr_data_size_mb >= 1) {
    446     SendSample("Platform.ZramCompressionRatioPercent",
    447                orig_data_size * 100 / compr_data_size, 100, 600, 50);
    448   }
    449   // The values of interest for zero_pages are between 1MB and 1GB.  The units
    450   // are number of pages.
    451   SendSample("Platform.ZramZeroPages", zero_pages, 256, 256 * 1024, 50);
    452   SendSample("Platform.ZramZeroRatioPercent", zero_ratio_percent, 1, 50, 50);
    453 
    454   return true;
    455 }
    456 
    457 bool MetricsCollector::ProcessMeminfo(const string& meminfo_raw) {
    458   static const MeminfoRecord fields_array[] = {
    459     { "MemTotal", "MemTotal" },  // SPECIAL CASE: total system memory
    460     { "MemFree", "MemFree" },
    461     { "Buffers", "Buffers" },
    462     { "Cached", "Cached" },
    463     // { "SwapCached", "SwapCached" },
    464     { "Active", "Active" },
    465     { "Inactive", "Inactive" },
    466     { "ActiveAnon", "Active(anon)" },
    467     { "InactiveAnon", "Inactive(anon)" },
    468     { "ActiveFile" , "Active(file)" },
    469     { "InactiveFile", "Inactive(file)" },
    470     { "Unevictable", "Unevictable", kMeminfoOp_HistLog },
    471     // { "Mlocked", "Mlocked" },
    472     { "SwapTotal", "SwapTotal", kMeminfoOp_SwapTotal },
    473     { "SwapFree", "SwapFree", kMeminfoOp_SwapFree },
    474     // { "Dirty", "Dirty" },
    475     // { "Writeback", "Writeback" },
    476     { "AnonPages", "AnonPages" },
    477     { "Mapped", "Mapped" },
    478     { "Shmem", "Shmem", kMeminfoOp_HistLog },
    479     { "Slab", "Slab", kMeminfoOp_HistLog },
    480     // { "SReclaimable", "SReclaimable" },
    481     // { "SUnreclaim", "SUnreclaim" },
    482   };
    483   vector<MeminfoRecord> fields(fields_array,
    484                                fields_array + arraysize(fields_array));
    485   if (!FillMeminfo(meminfo_raw, &fields)) {
    486     return false;
    487   }
    488   int total_memory = fields[0].value;
    489   if (total_memory == 0) {
    490     // this "cannot happen"
    491     LOG(WARNING) << "borked meminfo parser";
    492     return false;
    493   }
    494   int swap_total = 0;
    495   int swap_free = 0;
    496   // Send all fields retrieved, except total memory.
    497   for (unsigned int i = 1; i < fields.size(); i++) {
    498     string metrics_name = base::StringPrintf("Platform.Meminfo%s",
    499                                              fields[i].name);
    500     int percent;
    501     switch (fields[i].op) {
    502       case kMeminfoOp_HistPercent:
    503         // report value as percent of total memory
    504         percent = fields[i].value * 100 / total_memory;
    505         SendLinearSample(metrics_name, percent, 100, 101);
    506         break;
    507       case kMeminfoOp_HistLog:
    508         // report value in kbytes, log scale, 4Gb max
    509         SendSample(metrics_name, fields[i].value, 1, 4 * 1000 * 1000, 100);
    510         break;
    511       case kMeminfoOp_SwapTotal:
    512         swap_total = fields[i].value;
    513       case kMeminfoOp_SwapFree:
    514         swap_free = fields[i].value;
    515         break;
    516     }
    517   }
    518   if (swap_total > 0) {
    519     int swap_used = swap_total - swap_free;
    520     int swap_used_percent = swap_used * 100 / swap_total;
    521     SendSample("Platform.MeminfoSwapUsed", swap_used, 1, 8 * 1000 * 1000, 100);
    522     SendLinearSample("Platform.MeminfoSwapUsed.Percent", swap_used_percent,
    523                      100, 101);
    524   }
    525   return true;
    526 }
    527 
    528 bool MetricsCollector::FillMeminfo(const string& meminfo_raw,
    529                                     vector<MeminfoRecord>* fields) {
    530   vector<std::string> lines =
    531       base::SplitString(meminfo_raw, "\n", base::KEEP_WHITESPACE,
    532                         base::SPLIT_WANT_NONEMPTY);
    533 
    534   // Scan meminfo output and collect field values.  Each field name has to
    535   // match a meminfo entry (case insensitive) after removing non-alpha
    536   // characters from the entry.
    537   size_t ifield = 0;
    538   for (size_t iline = 0;
    539        iline < lines.size() && ifield < fields->size();
    540        iline++) {
    541     vector<string> tokens =
    542         base::SplitString(lines[iline], ": ", base::KEEP_WHITESPACE,
    543                           base::SPLIT_WANT_NONEMPTY);
    544     if (strcmp((*fields)[ifield].match, tokens[0].c_str()) == 0) {
    545       // Name matches. Parse value and save.
    546       if (!base::StringToInt(tokens[1], &(*fields)[ifield].value)) {
    547         LOG(WARNING) << "Cound not convert " << tokens[1] << " to int";
    548         return false;
    549       }
    550       ifield++;
    551     }
    552   }
    553   if (ifield < fields->size()) {
    554     // End of input reached while scanning.
    555     LOG(WARNING) << "cannot find field " << (*fields)[ifield].match
    556                  << " and following";
    557     return false;
    558   }
    559   return true;
    560 }
    561 
    562 void MetricsCollector::ScheduleMemuseCallback(double interval) {
    563   if (testing_) {
    564     return;
    565   }
    566   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
    567       base::Bind(&MetricsCollector::MemuseCallback,
    568                  weak_ptr_factory_.GetWeakPtr()),
    569       base::TimeDelta::FromSeconds(interval));
    570 }
    571 
    572 void MetricsCollector::MemuseCallback() {
    573   // Since we only care about active time (i.e. uptime minus sleep time) but
    574   // the callbacks are driven by real time (uptime), we check if we should
    575   // reschedule this callback due to intervening sleep periods.
    576   double now = GetActiveTime();
    577   // Avoid intervals of less than one second.
    578   double remaining_time = ceil(memuse_final_time_ - now);
    579   if (remaining_time > 0) {
    580     ScheduleMemuseCallback(remaining_time);
    581   } else {
    582     // Report stats and advance the measurement interval unless there are
    583     // errors or we've completed the last interval.
    584     if (MemuseCallbackWork() &&
    585         memuse_interval_index_ < arraysize(kMemuseIntervals)) {
    586       double interval = kMemuseIntervals[memuse_interval_index_++];
    587       memuse_final_time_ = now + interval;
    588       ScheduleMemuseCallback(interval);
    589     }
    590   }
    591 }
    592 
    593 bool MetricsCollector::MemuseCallbackWork() {
    594   string meminfo_raw;
    595   const FilePath meminfo_path(kMeminfoFileName);
    596   if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
    597     LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
    598     return false;
    599   }
    600   return ProcessMemuse(meminfo_raw);
    601 }
    602 
    603 bool MetricsCollector::ProcessMemuse(const string& meminfo_raw) {
    604   static const MeminfoRecord fields_array[] = {
    605     { "MemTotal", "MemTotal" },  // SPECIAL CASE: total system memory
    606     { "ActiveAnon", "Active(anon)" },
    607     { "InactiveAnon", "Inactive(anon)" },
    608   };
    609   vector<MeminfoRecord> fields(fields_array,
    610                                fields_array + arraysize(fields_array));
    611   if (!FillMeminfo(meminfo_raw, &fields)) {
    612     return false;
    613   }
    614   int total = fields[0].value;
    615   int active_anon = fields[1].value;
    616   int inactive_anon = fields[2].value;
    617   if (total == 0) {
    618     // this "cannot happen"
    619     LOG(WARNING) << "borked meminfo parser";
    620     return false;
    621   }
    622   string metrics_name = base::StringPrintf("Platform.MemuseAnon%d",
    623                                            memuse_interval_index_);
    624   SendLinearSample(metrics_name, (active_anon + inactive_anon) * 100 / total,
    625                    100, 101);
    626   return true;
    627 }
    628 
    629 void MetricsCollector::SendSample(const string& name, int sample,
    630                                    int min, int max, int nbuckets) {
    631   metrics_lib_->SendToUMA(name, sample, min, max, nbuckets);
    632 }
    633 
    634 void MetricsCollector::SendKernelCrashesCumulativeCountStats() {
    635   // Report the number of crashes for this OS version, but don't clear the
    636   // counter.  It is cleared elsewhere on version change.
    637   int64_t crashes_count = kernel_crashes_version_count_->Get();
    638   SendSample(kernel_crashes_version_count_->Name(),
    639              crashes_count,
    640              1,                         // value of first bucket
    641              500,                       // value of last bucket
    642              100);                      // number of buckets
    643 
    644 
    645   int64_t cpu_use_ms = version_cumulative_cpu_use_->Get();
    646   SendSample(version_cumulative_cpu_use_->Name(),
    647              cpu_use_ms / 1000,         // stat is in seconds
    648              1,                         // device may be used very little...
    649              8 * 1000 * 1000,           // ... or a lot (a little over 90 days)
    650              100);
    651 
    652   // On the first run after an autoupdate, cpu_use_ms and active_use_seconds
    653   // can be zero.  Avoid division by zero.
    654   if (cpu_use_ms > 0) {
    655     // Send the crash frequency since update in number of crashes per CPU year.
    656     SendSample("Logging.KernelCrashesPerCpuYear",
    657                crashes_count * kSecondsPerDay * 365 * 1000 / cpu_use_ms,
    658                1,
    659                1000 * 1000,     // about one crash every 30s of CPU time
    660                100);
    661   }
    662 
    663   int64_t active_use_seconds = version_cumulative_active_use_->Get();
    664   if (active_use_seconds > 0) {
    665     SendSample(version_cumulative_active_use_->Name(),
    666                active_use_seconds,
    667                1,                          // device may be used very little...
    668                8 * 1000 * 1000,            // ... or a lot (about 90 days)
    669                100);
    670     // Same as above, but per year of active time.
    671     SendSample("Logging.KernelCrashesPerActiveYear",
    672                crashes_count * kSecondsPerDay * 365 / active_use_seconds,
    673                1,
    674                1000 * 1000,     // about one crash every 30s of active time
    675                100);
    676   }
    677 }
    678 
    679 void MetricsCollector::SendAndResetDailyUseSample(
    680     const unique_ptr<PersistentInteger>& use) {
    681   SendSample(use->Name(),
    682              use->GetAndClear(),
    683              1,                        // value of first bucket
    684              kSecondsPerDay,           // value of last bucket
    685              50);                      // number of buckets
    686 }
    687 
    688 void MetricsCollector::SendAndResetCrashIntervalSample(
    689     const unique_ptr<PersistentInteger>& interval) {
    690   SendSample(interval->Name(),
    691              interval->GetAndClear(),
    692              1,                        // value of first bucket
    693              4 * kSecondsPerWeek,      // value of last bucket
    694              50);                      // number of buckets
    695 }
    696 
    697 void MetricsCollector::SendAndResetCrashFrequencySample(
    698     const unique_ptr<PersistentInteger>& frequency) {
    699   SendSample(frequency->Name(),
    700              frequency->GetAndClear(),
    701              1,                        // value of first bucket
    702              100,                      // value of last bucket
    703              50);                      // number of buckets
    704 }
    705 
    706 void MetricsCollector::SendLinearSample(const string& name, int sample,
    707                                          int max, int nbuckets) {
    708   // TODO(semenzato): add a proper linear histogram to the Chrome external
    709   // metrics API.
    710   LOG_IF(FATAL, nbuckets != max + 1) << "unsupported histogram scale";
    711   metrics_lib_->SendEnumToUMA(name, sample, max);
    712 }
    713 
    714 void MetricsCollector::UpdateStats(TimeTicks now_ticks,
    715                                     Time now_wall_time) {
    716   const int elapsed_seconds = (now_ticks - last_update_stats_time_).InSeconds();
    717   daily_active_use_->Add(elapsed_seconds);
    718   version_cumulative_active_use_->Add(elapsed_seconds);
    719   user_crash_interval_->Add(elapsed_seconds);
    720   kernel_crash_interval_->Add(elapsed_seconds);
    721   TimeDelta cpu_use = cpu_usage_collector_->GetCumulativeCpuUse();
    722   version_cumulative_cpu_use_->Add(
    723       (cpu_use - latest_cpu_use_microseconds_).InMilliseconds());
    724   latest_cpu_use_microseconds_ = cpu_use;
    725   last_update_stats_time_ = now_ticks;
    726 
    727   const TimeDelta since_epoch = now_wall_time - Time::UnixEpoch();
    728   const int day = since_epoch.InDays();
    729   const int week = day / 7;
    730 
    731   if (daily_cycle_->Get() != day) {
    732     daily_cycle_->Set(day);
    733     SendAndResetDailyUseSample(daily_active_use_);
    734     SendAndResetCrashFrequencySample(any_crashes_daily_count_);
    735     SendAndResetCrashFrequencySample(user_crashes_daily_count_);
    736     SendAndResetCrashFrequencySample(kernel_crashes_daily_count_);
    737     SendAndResetCrashFrequencySample(unclean_shutdowns_daily_count_);
    738     SendKernelCrashesCumulativeCountStats();
    739   }
    740 
    741   if (weekly_cycle_->Get() != week) {
    742     weekly_cycle_->Set(week);
    743     SendAndResetCrashFrequencySample(any_crashes_weekly_count_);
    744     SendAndResetCrashFrequencySample(user_crashes_weekly_count_);
    745     SendAndResetCrashFrequencySample(kernel_crashes_weekly_count_);
    746     SendAndResetCrashFrequencySample(unclean_shutdowns_weekly_count_);
    747   }
    748 }
    749 
    750 void MetricsCollector::HandleUpdateStatsTimeout() {
    751   UpdateStats(TimeTicks::Now(), Time::Now());
    752   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
    753       base::Bind(&MetricsCollector::HandleUpdateStatsTimeout,
    754                  weak_ptr_factory_.GetWeakPtr()),
    755       base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs));
    756 }
    757