1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "metrics_collector.h" 18 19 #include <sysexits.h> 20 #include <time.h> 21 22 #include <memory> 23 24 #include <base/bind.h> 25 #include <base/files/file_path.h> 26 #include <base/files/file_util.h> 27 #include <base/hash.h> 28 #include <base/logging.h> 29 #include <base/strings/string_number_conversions.h> 30 #include <base/strings/string_split.h> 31 #include <base/strings/string_util.h> 32 #include <base/strings/stringprintf.h> 33 #include <brillo/binder_watcher.h> 34 #include <brillo/osrelease_reader.h> 35 36 #include "constants.h" 37 #include "metrics_collector_service_impl.h" 38 39 using base::FilePath; 40 using base::StringPrintf; 41 using base::Time; 42 using base::TimeDelta; 43 using base::TimeTicks; 44 using chromeos_metrics::PersistentInteger; 45 using std::map; 46 using std::string; 47 using std::vector; 48 49 namespace { 50 51 const int kSecondsPerMinute = 60; 52 const int kMinutesPerHour = 60; 53 const int kHoursPerDay = 24; 54 const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour; 55 const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay; 56 const int kDaysPerWeek = 7; 57 const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek; 58 59 // Interval between calls to UpdateStats(). 60 const uint32_t kUpdateStatsIntervalMs = 300000; 61 62 const char kKernelCrashDetectedFile[] = 63 "/data/misc/crash_reporter/run/kernel-crash-detected"; 64 const char kUncleanShutdownDetectedFile[] = 65 "/var/run/unclean-shutdown-detected"; 66 67 const int kMetricMeminfoInterval = 30; // seconds 68 69 const char kMeminfoFileName[] = "/proc/meminfo"; 70 const char kVmStatFileName[] = "/proc/vmstat"; 71 72 const char kWeaveComponent[] = "metrics"; 73 const char kWeaveTrait[] = "_metrics"; 74 75 } // namespace 76 77 // Zram sysfs entries. 78 79 const char MetricsCollector::kComprDataSizeName[] = "compr_data_size"; 80 const char MetricsCollector::kOrigDataSizeName[] = "orig_data_size"; 81 const char MetricsCollector::kZeroPagesName[] = "zero_pages"; 82 83 // Memory use stats collection intervals. We collect some memory use interval 84 // at these intervals after boot, and we stop collecting after the last one, 85 // with the assumption that in most cases the memory use won't change much 86 // after that. 87 static const int kMemuseIntervals[] = { 88 1 * kSecondsPerMinute, // 1 minute mark 89 4 * kSecondsPerMinute, // 5 minute mark 90 25 * kSecondsPerMinute, // 0.5 hour mark 91 120 * kSecondsPerMinute, // 2.5 hour mark 92 600 * kSecondsPerMinute, // 12.5 hour mark 93 }; 94 95 MetricsCollector::MetricsCollector() 96 : memuse_final_time_(0), 97 memuse_interval_index_(0) {} 98 99 MetricsCollector::~MetricsCollector() { 100 } 101 102 // static 103 double MetricsCollector::GetActiveTime() { 104 struct timespec ts; 105 int r = clock_gettime(CLOCK_MONOTONIC, &ts); 106 if (r < 0) { 107 PLOG(WARNING) << "clock_gettime(CLOCK_MONOTONIC) failed"; 108 return 0; 109 } else { 110 return ts.tv_sec + static_cast<double>(ts.tv_nsec) / (1000 * 1000 * 1000); 111 } 112 } 113 114 int MetricsCollector::Run() { 115 if (CheckSystemCrash(kKernelCrashDetectedFile)) { 116 ProcessKernelCrash(); 117 } 118 119 if (CheckSystemCrash(kUncleanShutdownDetectedFile)) { 120 ProcessUncleanShutdown(); 121 } 122 123 // On OS version change, clear version stats (which are reported daily). 124 int32_t version = GetOsVersionHash(); 125 if (version_cycle_->Get() != version) { 126 version_cycle_->Set(version); 127 kernel_crashes_version_count_->Set(0); 128 version_cumulative_active_use_->Set(0); 129 version_cumulative_cpu_use_->Set(0); 130 } 131 132 // Start metricscollectorservice 133 android::sp<BnMetricsCollectorServiceImpl> metrics_collector_service = 134 new BnMetricsCollectorServiceImpl(this); 135 android::status_t status = android::defaultServiceManager()->addService( 136 metrics_collector_service->getInterfaceDescriptor(), 137 metrics_collector_service); 138 CHECK(status == android::OK) 139 << "failed to register service metricscollectorservice"; 140 141 // Watch Binder events in the main loop 142 brillo::BinderWatcher binder_watcher; 143 CHECK(binder_watcher.Init()) << "Binder FD watcher init failed"; 144 return brillo::Daemon::Run(); 145 } 146 147 uint32_t MetricsCollector::GetOsVersionHash() { 148 brillo::OsReleaseReader reader; 149 reader.Load(); 150 string version; 151 if (!reader.GetString(metrics::kProductVersion, &version)) { 152 LOG(ERROR) << "failed to read the product version."; 153 version = metrics::kDefaultVersion; 154 } 155 156 uint32_t version_hash = base::Hash(version); 157 if (testing_) { 158 version_hash = 42; // return any plausible value for the hash 159 } 160 return version_hash; 161 } 162 163 void MetricsCollector::Init(bool testing, MetricsLibraryInterface* metrics_lib, 164 const string& diskstats_path, 165 const base::FilePath& private_metrics_directory, 166 const base::FilePath& shared_metrics_directory) { 167 CHECK(metrics_lib); 168 testing_ = testing; 169 shared_metrics_directory_ = shared_metrics_directory; 170 metrics_lib_ = metrics_lib; 171 172 daily_active_use_.reset(new PersistentInteger("Platform.UseTime.PerDay", 173 private_metrics_directory)); 174 version_cumulative_active_use_.reset(new PersistentInteger( 175 "Platform.CumulativeUseTime", private_metrics_directory)); 176 version_cumulative_cpu_use_.reset(new PersistentInteger( 177 "Platform.CumulativeCpuTime", private_metrics_directory)); 178 179 kernel_crash_interval_.reset(new PersistentInteger( 180 "Platform.KernelCrashInterval", private_metrics_directory)); 181 unclean_shutdown_interval_.reset(new PersistentInteger( 182 "Platform.UncleanShutdownInterval", private_metrics_directory)); 183 user_crash_interval_.reset(new PersistentInteger("Platform.UserCrashInterval", 184 private_metrics_directory)); 185 186 any_crashes_daily_count_.reset(new PersistentInteger( 187 "Platform.AnyCrashes.PerDay", private_metrics_directory)); 188 any_crashes_weekly_count_.reset(new PersistentInteger( 189 "Platform.AnyCrashes.PerWeek", private_metrics_directory)); 190 user_crashes_daily_count_.reset(new PersistentInteger( 191 "Platform.UserCrashes.PerDay", private_metrics_directory)); 192 user_crashes_weekly_count_.reset(new PersistentInteger( 193 "Platform.UserCrashes.PerWeek", private_metrics_directory)); 194 kernel_crashes_daily_count_.reset(new PersistentInteger( 195 "Platform.KernelCrashes.PerDay", private_metrics_directory)); 196 kernel_crashes_weekly_count_.reset(new PersistentInteger( 197 "Platform.KernelCrashes.PerWeek", private_metrics_directory)); 198 kernel_crashes_version_count_.reset(new PersistentInteger( 199 "Platform.KernelCrashesSinceUpdate", private_metrics_directory)); 200 unclean_shutdowns_daily_count_.reset(new PersistentInteger( 201 "Platform.UncleanShutdown.PerDay", private_metrics_directory)); 202 unclean_shutdowns_weekly_count_.reset(new PersistentInteger( 203 "Platform.UncleanShutdowns.PerWeek", private_metrics_directory)); 204 205 daily_cycle_.reset( 206 new PersistentInteger("daily.cycle", private_metrics_directory)); 207 weekly_cycle_.reset( 208 new PersistentInteger("weekly.cycle", private_metrics_directory)); 209 version_cycle_.reset( 210 new PersistentInteger("version.cycle", private_metrics_directory)); 211 212 disk_usage_collector_.reset(new DiskUsageCollector(metrics_lib_)); 213 averaged_stats_collector_.reset( 214 new AveragedStatisticsCollector(metrics_lib_, diskstats_path, 215 kVmStatFileName)); 216 cpu_usage_collector_.reset(new CpuUsageCollector(metrics_lib_)); 217 } 218 219 int MetricsCollector::OnInit() { 220 int return_code = brillo::Daemon::OnInit(); 221 if (return_code != EX_OK) 222 return return_code; 223 224 StatsReporterInit(); 225 226 // Start collecting meminfo stats. 227 ScheduleMeminfoCallback(kMetricMeminfoInterval); 228 memuse_final_time_ = GetActiveTime() + kMemuseIntervals[0]; 229 ScheduleMemuseCallback(kMemuseIntervals[0]); 230 231 if (testing_) 232 return EX_OK; 233 234 weave_service_subscription_ = weaved::Service::Connect( 235 brillo::MessageLoop::current(), 236 base::Bind(&MetricsCollector::OnWeaveServiceConnected, 237 weak_ptr_factory_.GetWeakPtr())); 238 239 latest_cpu_use_microseconds_ = cpu_usage_collector_->GetCumulativeCpuUse(); 240 base::MessageLoop::current()->PostDelayedTask(FROM_HERE, 241 base::Bind(&MetricsCollector::HandleUpdateStatsTimeout, 242 weak_ptr_factory_.GetWeakPtr()), 243 base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs)); 244 245 return EX_OK; 246 } 247 248 void MetricsCollector::OnWeaveServiceConnected( 249 const std::weak_ptr<weaved::Service>& service) { 250 service_ = service; 251 auto weave_service = service_.lock(); 252 if (!weave_service) 253 return; 254 255 weave_service->AddComponent(kWeaveComponent, {kWeaveTrait}, nullptr); 256 weave_service->AddCommandHandler( 257 kWeaveComponent, kWeaveTrait, "enableAnalyticsReporting", 258 base::Bind(&MetricsCollector::OnEnableMetrics, 259 weak_ptr_factory_.GetWeakPtr())); 260 weave_service->AddCommandHandler( 261 kWeaveComponent, kWeaveTrait, "disableAnalyticsReporting", 262 base::Bind(&MetricsCollector::OnDisableMetrics, 263 weak_ptr_factory_.GetWeakPtr())); 264 265 UpdateWeaveState(); 266 } 267 268 void MetricsCollector::OnEnableMetrics( 269 std::unique_ptr<weaved::Command> command) { 270 if (base::WriteFile( 271 shared_metrics_directory_.Append(metrics::kConsentFileName), "", 0) != 272 0) { 273 PLOG(ERROR) << "Could not create the consent file."; 274 command->Abort("metrics_error", "Could not create the consent file", 275 nullptr); 276 return; 277 } 278 279 UpdateWeaveState(); 280 command->Complete({}, nullptr); 281 } 282 283 void MetricsCollector::OnDisableMetrics( 284 std::unique_ptr<weaved::Command> command) { 285 if (!base::DeleteFile( 286 shared_metrics_directory_.Append(metrics::kConsentFileName), false)) { 287 PLOG(ERROR) << "Could not delete the consent file."; 288 command->Abort("metrics_error", "Could not delete the consent file", 289 nullptr); 290 return; 291 } 292 293 UpdateWeaveState(); 294 command->Complete({}, nullptr); 295 } 296 297 void MetricsCollector::UpdateWeaveState() { 298 auto weave_service = service_.lock(); 299 if (!weave_service) 300 return; 301 302 std::string enabled = 303 metrics_lib_->AreMetricsEnabled() ? "enabled" : "disabled"; 304 305 if (!weave_service->SetStateProperty(kWeaveComponent, kWeaveTrait, 306 "analyticsReportingState", 307 *brillo::ToValue(enabled), 308 nullptr)) { 309 LOG(ERROR) << "failed to update weave's state"; 310 } 311 } 312 313 void MetricsCollector::ProcessUserCrash() { 314 // Counts the active time up to now. 315 UpdateStats(TimeTicks::Now(), Time::Now()); 316 317 // Reports the active use time since the last crash and resets it. 318 SendAndResetCrashIntervalSample(user_crash_interval_); 319 320 any_crashes_daily_count_->Add(1); 321 any_crashes_weekly_count_->Add(1); 322 user_crashes_daily_count_->Add(1); 323 user_crashes_weekly_count_->Add(1); 324 } 325 326 void MetricsCollector::ProcessKernelCrash() { 327 // Counts the active time up to now. 328 UpdateStats(TimeTicks::Now(), Time::Now()); 329 330 // Reports the active use time since the last crash and resets it. 331 SendAndResetCrashIntervalSample(kernel_crash_interval_); 332 333 any_crashes_daily_count_->Add(1); 334 any_crashes_weekly_count_->Add(1); 335 kernel_crashes_daily_count_->Add(1); 336 kernel_crashes_weekly_count_->Add(1); 337 338 kernel_crashes_version_count_->Add(1); 339 } 340 341 void MetricsCollector::ProcessUncleanShutdown() { 342 // Counts the active time up to now. 343 UpdateStats(TimeTicks::Now(), Time::Now()); 344 345 // Reports the active use time since the last crash and resets it. 346 SendAndResetCrashIntervalSample(unclean_shutdown_interval_); 347 348 unclean_shutdowns_daily_count_->Add(1); 349 unclean_shutdowns_weekly_count_->Add(1); 350 any_crashes_daily_count_->Add(1); 351 any_crashes_weekly_count_->Add(1); 352 } 353 354 bool MetricsCollector::CheckSystemCrash(const string& crash_file) { 355 FilePath crash_detected(crash_file); 356 if (!base::PathExists(crash_detected)) 357 return false; 358 359 // Deletes the crash-detected file so that the daemon doesn't report 360 // another kernel crash in case it's restarted. 361 base::DeleteFile(crash_detected, false); // not recursive 362 return true; 363 } 364 365 void MetricsCollector::StatsReporterInit() { 366 disk_usage_collector_->Schedule(); 367 368 cpu_usage_collector_->Init(); 369 cpu_usage_collector_->Schedule(); 370 371 // Don't start a collection cycle during the first run to avoid delaying the 372 // boot. 373 averaged_stats_collector_->ScheduleWait(); 374 } 375 376 void MetricsCollector::ScheduleMeminfoCallback(int wait) { 377 if (testing_) { 378 return; 379 } 380 base::TimeDelta waitDelta = base::TimeDelta::FromSeconds(wait); 381 base::MessageLoop::current()->PostDelayedTask(FROM_HERE, 382 base::Bind(&MetricsCollector::MeminfoCallback, 383 weak_ptr_factory_.GetWeakPtr(), waitDelta), 384 waitDelta); 385 } 386 387 void MetricsCollector::MeminfoCallback(base::TimeDelta wait) { 388 string meminfo_raw; 389 const FilePath meminfo_path(kMeminfoFileName); 390 if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) { 391 LOG(WARNING) << "cannot read " << meminfo_path.value().c_str(); 392 return; 393 } 394 // Make both calls even if the first one fails. 395 if (ProcessMeminfo(meminfo_raw)) { 396 base::MessageLoop::current()->PostDelayedTask(FROM_HERE, 397 base::Bind(&MetricsCollector::MeminfoCallback, 398 weak_ptr_factory_.GetWeakPtr(), wait), 399 wait); 400 } 401 } 402 403 // static 404 bool MetricsCollector::ReadFileToUint64(const base::FilePath& path, 405 uint64_t* value) { 406 std::string content; 407 if (!base::ReadFileToString(path, &content)) { 408 PLOG(WARNING) << "cannot read " << path.MaybeAsASCII(); 409 return false; 410 } 411 // Remove final newline. 412 base::TrimWhitespaceASCII(content, base::TRIM_TRAILING, &content); 413 if (!base::StringToUint64(content, value)) { 414 LOG(WARNING) << "invalid integer: " << content; 415 return false; 416 } 417 return true; 418 } 419 420 bool MetricsCollector::ReportZram(const base::FilePath& zram_dir) { 421 // Data sizes are in bytes. |zero_pages| is in number of pages. 422 uint64_t compr_data_size, orig_data_size, zero_pages; 423 const size_t page_size = 4096; 424 425 if (!ReadFileToUint64(zram_dir.Append(kComprDataSizeName), 426 &compr_data_size) || 427 !ReadFileToUint64(zram_dir.Append(kOrigDataSizeName), &orig_data_size) || 428 !ReadFileToUint64(zram_dir.Append(kZeroPagesName), &zero_pages)) { 429 return false; 430 } 431 432 // |orig_data_size| does not include zero-filled pages. 433 orig_data_size += zero_pages * page_size; 434 435 const int compr_data_size_mb = compr_data_size >> 20; 436 const int savings_mb = (orig_data_size - compr_data_size) >> 20; 437 const int zero_ratio_percent = zero_pages * page_size * 100 / orig_data_size; 438 439 // Report compressed size in megabytes. 100 MB or less has little impact. 440 SendSample("Platform.ZramCompressedSize", compr_data_size_mb, 100, 4000, 50); 441 SendSample("Platform.ZramSavings", savings_mb, 100, 4000, 50); 442 // The compression ratio is multiplied by 100 for better resolution. The 443 // ratios of interest are between 1 and 6 (100% and 600% as reported). We 444 // don't want samples when very little memory is being compressed. 445 if (compr_data_size_mb >= 1) { 446 SendSample("Platform.ZramCompressionRatioPercent", 447 orig_data_size * 100 / compr_data_size, 100, 600, 50); 448 } 449 // The values of interest for zero_pages are between 1MB and 1GB. The units 450 // are number of pages. 451 SendSample("Platform.ZramZeroPages", zero_pages, 256, 256 * 1024, 50); 452 SendSample("Platform.ZramZeroRatioPercent", zero_ratio_percent, 1, 50, 50); 453 454 return true; 455 } 456 457 bool MetricsCollector::ProcessMeminfo(const string& meminfo_raw) { 458 static const MeminfoRecord fields_array[] = { 459 { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory 460 { "MemFree", "MemFree" }, 461 { "Buffers", "Buffers" }, 462 { "Cached", "Cached" }, 463 // { "SwapCached", "SwapCached" }, 464 { "Active", "Active" }, 465 { "Inactive", "Inactive" }, 466 { "ActiveAnon", "Active(anon)" }, 467 { "InactiveAnon", "Inactive(anon)" }, 468 { "ActiveFile" , "Active(file)" }, 469 { "InactiveFile", "Inactive(file)" }, 470 { "Unevictable", "Unevictable", kMeminfoOp_HistLog }, 471 // { "Mlocked", "Mlocked" }, 472 { "SwapTotal", "SwapTotal", kMeminfoOp_SwapTotal }, 473 { "SwapFree", "SwapFree", kMeminfoOp_SwapFree }, 474 // { "Dirty", "Dirty" }, 475 // { "Writeback", "Writeback" }, 476 { "AnonPages", "AnonPages" }, 477 { "Mapped", "Mapped" }, 478 { "Shmem", "Shmem", kMeminfoOp_HistLog }, 479 { "Slab", "Slab", kMeminfoOp_HistLog }, 480 // { "SReclaimable", "SReclaimable" }, 481 // { "SUnreclaim", "SUnreclaim" }, 482 }; 483 vector<MeminfoRecord> fields(fields_array, 484 fields_array + arraysize(fields_array)); 485 if (!FillMeminfo(meminfo_raw, &fields)) { 486 return false; 487 } 488 int total_memory = fields[0].value; 489 if (total_memory == 0) { 490 // this "cannot happen" 491 LOG(WARNING) << "borked meminfo parser"; 492 return false; 493 } 494 int swap_total = 0; 495 int swap_free = 0; 496 // Send all fields retrieved, except total memory. 497 for (unsigned int i = 1; i < fields.size(); i++) { 498 string metrics_name = base::StringPrintf("Platform.Meminfo%s", 499 fields[i].name); 500 int percent; 501 switch (fields[i].op) { 502 case kMeminfoOp_HistPercent: 503 // report value as percent of total memory 504 percent = fields[i].value * 100 / total_memory; 505 SendLinearSample(metrics_name, percent, 100, 101); 506 break; 507 case kMeminfoOp_HistLog: 508 // report value in kbytes, log scale, 4Gb max 509 SendSample(metrics_name, fields[i].value, 1, 4 * 1000 * 1000, 100); 510 break; 511 case kMeminfoOp_SwapTotal: 512 swap_total = fields[i].value; 513 case kMeminfoOp_SwapFree: 514 swap_free = fields[i].value; 515 break; 516 } 517 } 518 if (swap_total > 0) { 519 int swap_used = swap_total - swap_free; 520 int swap_used_percent = swap_used * 100 / swap_total; 521 SendSample("Platform.MeminfoSwapUsed", swap_used, 1, 8 * 1000 * 1000, 100); 522 SendLinearSample("Platform.MeminfoSwapUsed.Percent", swap_used_percent, 523 100, 101); 524 } 525 return true; 526 } 527 528 bool MetricsCollector::FillMeminfo(const string& meminfo_raw, 529 vector<MeminfoRecord>* fields) { 530 vector<std::string> lines = 531 base::SplitString(meminfo_raw, "\n", base::KEEP_WHITESPACE, 532 base::SPLIT_WANT_NONEMPTY); 533 534 // Scan meminfo output and collect field values. Each field name has to 535 // match a meminfo entry (case insensitive) after removing non-alpha 536 // characters from the entry. 537 size_t ifield = 0; 538 for (size_t iline = 0; 539 iline < lines.size() && ifield < fields->size(); 540 iline++) { 541 vector<string> tokens = 542 base::SplitString(lines[iline], ": ", base::KEEP_WHITESPACE, 543 base::SPLIT_WANT_NONEMPTY); 544 if (strcmp((*fields)[ifield].match, tokens[0].c_str()) == 0) { 545 // Name matches. Parse value and save. 546 if (!base::StringToInt(tokens[1], &(*fields)[ifield].value)) { 547 LOG(WARNING) << "Cound not convert " << tokens[1] << " to int"; 548 return false; 549 } 550 ifield++; 551 } 552 } 553 if (ifield < fields->size()) { 554 // End of input reached while scanning. 555 LOG(WARNING) << "cannot find field " << (*fields)[ifield].match 556 << " and following"; 557 return false; 558 } 559 return true; 560 } 561 562 void MetricsCollector::ScheduleMemuseCallback(double interval) { 563 if (testing_) { 564 return; 565 } 566 base::MessageLoop::current()->PostDelayedTask(FROM_HERE, 567 base::Bind(&MetricsCollector::MemuseCallback, 568 weak_ptr_factory_.GetWeakPtr()), 569 base::TimeDelta::FromSeconds(interval)); 570 } 571 572 void MetricsCollector::MemuseCallback() { 573 // Since we only care about active time (i.e. uptime minus sleep time) but 574 // the callbacks are driven by real time (uptime), we check if we should 575 // reschedule this callback due to intervening sleep periods. 576 double now = GetActiveTime(); 577 // Avoid intervals of less than one second. 578 double remaining_time = ceil(memuse_final_time_ - now); 579 if (remaining_time > 0) { 580 ScheduleMemuseCallback(remaining_time); 581 } else { 582 // Report stats and advance the measurement interval unless there are 583 // errors or we've completed the last interval. 584 if (MemuseCallbackWork() && 585 memuse_interval_index_ < arraysize(kMemuseIntervals)) { 586 double interval = kMemuseIntervals[memuse_interval_index_++]; 587 memuse_final_time_ = now + interval; 588 ScheduleMemuseCallback(interval); 589 } 590 } 591 } 592 593 bool MetricsCollector::MemuseCallbackWork() { 594 string meminfo_raw; 595 const FilePath meminfo_path(kMeminfoFileName); 596 if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) { 597 LOG(WARNING) << "cannot read " << meminfo_path.value().c_str(); 598 return false; 599 } 600 return ProcessMemuse(meminfo_raw); 601 } 602 603 bool MetricsCollector::ProcessMemuse(const string& meminfo_raw) { 604 static const MeminfoRecord fields_array[] = { 605 { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory 606 { "ActiveAnon", "Active(anon)" }, 607 { "InactiveAnon", "Inactive(anon)" }, 608 }; 609 vector<MeminfoRecord> fields(fields_array, 610 fields_array + arraysize(fields_array)); 611 if (!FillMeminfo(meminfo_raw, &fields)) { 612 return false; 613 } 614 int total = fields[0].value; 615 int active_anon = fields[1].value; 616 int inactive_anon = fields[2].value; 617 if (total == 0) { 618 // this "cannot happen" 619 LOG(WARNING) << "borked meminfo parser"; 620 return false; 621 } 622 string metrics_name = base::StringPrintf("Platform.MemuseAnon%d", 623 memuse_interval_index_); 624 SendLinearSample(metrics_name, (active_anon + inactive_anon) * 100 / total, 625 100, 101); 626 return true; 627 } 628 629 void MetricsCollector::SendSample(const string& name, int sample, 630 int min, int max, int nbuckets) { 631 metrics_lib_->SendToUMA(name, sample, min, max, nbuckets); 632 } 633 634 void MetricsCollector::SendKernelCrashesCumulativeCountStats() { 635 // Report the number of crashes for this OS version, but don't clear the 636 // counter. It is cleared elsewhere on version change. 637 int64_t crashes_count = kernel_crashes_version_count_->Get(); 638 SendSample(kernel_crashes_version_count_->Name(), 639 crashes_count, 640 1, // value of first bucket 641 500, // value of last bucket 642 100); // number of buckets 643 644 645 int64_t cpu_use_ms = version_cumulative_cpu_use_->Get(); 646 SendSample(version_cumulative_cpu_use_->Name(), 647 cpu_use_ms / 1000, // stat is in seconds 648 1, // device may be used very little... 649 8 * 1000 * 1000, // ... or a lot (a little over 90 days) 650 100); 651 652 // On the first run after an autoupdate, cpu_use_ms and active_use_seconds 653 // can be zero. Avoid division by zero. 654 if (cpu_use_ms > 0) { 655 // Send the crash frequency since update in number of crashes per CPU year. 656 SendSample("Logging.KernelCrashesPerCpuYear", 657 crashes_count * kSecondsPerDay * 365 * 1000 / cpu_use_ms, 658 1, 659 1000 * 1000, // about one crash every 30s of CPU time 660 100); 661 } 662 663 int64_t active_use_seconds = version_cumulative_active_use_->Get(); 664 if (active_use_seconds > 0) { 665 SendSample(version_cumulative_active_use_->Name(), 666 active_use_seconds, 667 1, // device may be used very little... 668 8 * 1000 * 1000, // ... or a lot (about 90 days) 669 100); 670 // Same as above, but per year of active time. 671 SendSample("Logging.KernelCrashesPerActiveYear", 672 crashes_count * kSecondsPerDay * 365 / active_use_seconds, 673 1, 674 1000 * 1000, // about one crash every 30s of active time 675 100); 676 } 677 } 678 679 void MetricsCollector::SendAndResetDailyUseSample( 680 const unique_ptr<PersistentInteger>& use) { 681 SendSample(use->Name(), 682 use->GetAndClear(), 683 1, // value of first bucket 684 kSecondsPerDay, // value of last bucket 685 50); // number of buckets 686 } 687 688 void MetricsCollector::SendAndResetCrashIntervalSample( 689 const unique_ptr<PersistentInteger>& interval) { 690 SendSample(interval->Name(), 691 interval->GetAndClear(), 692 1, // value of first bucket 693 4 * kSecondsPerWeek, // value of last bucket 694 50); // number of buckets 695 } 696 697 void MetricsCollector::SendAndResetCrashFrequencySample( 698 const unique_ptr<PersistentInteger>& frequency) { 699 SendSample(frequency->Name(), 700 frequency->GetAndClear(), 701 1, // value of first bucket 702 100, // value of last bucket 703 50); // number of buckets 704 } 705 706 void MetricsCollector::SendLinearSample(const string& name, int sample, 707 int max, int nbuckets) { 708 // TODO(semenzato): add a proper linear histogram to the Chrome external 709 // metrics API. 710 LOG_IF(FATAL, nbuckets != max + 1) << "unsupported histogram scale"; 711 metrics_lib_->SendEnumToUMA(name, sample, max); 712 } 713 714 void MetricsCollector::UpdateStats(TimeTicks now_ticks, 715 Time now_wall_time) { 716 const int elapsed_seconds = (now_ticks - last_update_stats_time_).InSeconds(); 717 daily_active_use_->Add(elapsed_seconds); 718 version_cumulative_active_use_->Add(elapsed_seconds); 719 user_crash_interval_->Add(elapsed_seconds); 720 kernel_crash_interval_->Add(elapsed_seconds); 721 TimeDelta cpu_use = cpu_usage_collector_->GetCumulativeCpuUse(); 722 version_cumulative_cpu_use_->Add( 723 (cpu_use - latest_cpu_use_microseconds_).InMilliseconds()); 724 latest_cpu_use_microseconds_ = cpu_use; 725 last_update_stats_time_ = now_ticks; 726 727 const TimeDelta since_epoch = now_wall_time - Time::UnixEpoch(); 728 const int day = since_epoch.InDays(); 729 const int week = day / 7; 730 731 if (daily_cycle_->Get() != day) { 732 daily_cycle_->Set(day); 733 SendAndResetDailyUseSample(daily_active_use_); 734 SendAndResetCrashFrequencySample(any_crashes_daily_count_); 735 SendAndResetCrashFrequencySample(user_crashes_daily_count_); 736 SendAndResetCrashFrequencySample(kernel_crashes_daily_count_); 737 SendAndResetCrashFrequencySample(unclean_shutdowns_daily_count_); 738 SendKernelCrashesCumulativeCountStats(); 739 } 740 741 if (weekly_cycle_->Get() != week) { 742 weekly_cycle_->Set(week); 743 SendAndResetCrashFrequencySample(any_crashes_weekly_count_); 744 SendAndResetCrashFrequencySample(user_crashes_weekly_count_); 745 SendAndResetCrashFrequencySample(kernel_crashes_weekly_count_); 746 SendAndResetCrashFrequencySample(unclean_shutdowns_weekly_count_); 747 } 748 } 749 750 void MetricsCollector::HandleUpdateStatsTimeout() { 751 UpdateStats(TimeTicks::Now(), Time::Now()); 752 base::MessageLoop::current()->PostDelayedTask(FROM_HERE, 753 base::Bind(&MetricsCollector::HandleUpdateStatsTimeout, 754 weak_ptr_factory_.GetWeakPtr()), 755 base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs)); 756 } 757