Home | History | Annotate | Download | only in metricsd
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef METRICS_METRICS_COLLECTOR_H_
     18 #define METRICS_METRICS_COLLECTOR_H_
     19 
     20 #include <stdint.h>
     21 
     22 #include <map>
     23 #include <memory>
     24 #include <string>
     25 #include <vector>
     26 
     27 #include <base/files/file_path.h>
     28 #include <base/memory/weak_ptr.h>
     29 #include <base/time/time.h>
     30 #include <brillo/binder_watcher.h>
     31 #include <brillo/daemons/daemon.h>
     32 #include <libweaved/command.h>
     33 #include <libweaved/service.h>
     34 #include <gtest/gtest_prod.h>  // for FRIEND_TEST
     35 
     36 #include "collectors/averaged_statistics_collector.h"
     37 #include "collectors/cpu_usage_collector.h"
     38 #include "collectors/disk_usage_collector.h"
     39 #include "metrics/metrics_library.h"
     40 #include "persistent_integer.h"
     41 
     42 using chromeos_metrics::PersistentInteger;
     43 using std::unique_ptr;
     44 
     45 class MetricsCollector : public brillo::Daemon {
     46  public:
     47   MetricsCollector();
     48   ~MetricsCollector();
     49 
     50   // Initializes metrics class variables.
     51   void Init(bool testing,
     52             MetricsLibraryInterface* metrics_lib,
     53             const std::string& diskstats_path,
     54             const base::FilePath& private_metrics_directory,
     55             const base::FilePath& shared_metrics_directory);
     56 
     57   // Initializes the daemon.
     58   int OnInit() override;
     59 
     60   // Does all the work.
     61   int Run() override;
     62 
     63   // Returns the active time since boot (uptime minus sleep time) in seconds.
     64   static double GetActiveTime();
     65 
     66   // Updates the active use time and logs time between user-space
     67   // process crashes.  Called via MetricsCollectorServiceTrampoline.
     68   void ProcessUserCrash();
     69 
     70  protected:
     71   // Used also by the unit tests.
     72   static const char kComprDataSizeName[];
     73   static const char kOrigDataSizeName[];
     74   static const char kZeroPagesName[];
     75 
     76  private:
     77   friend class MetricsCollectorTest;
     78   FRIEND_TEST(MetricsCollectorTest, CheckSystemCrash);
     79   FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoCurrent);
     80   FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoLast);
     81   FRIEND_TEST(MetricsCollectorTest, GetHistogramPath);
     82   FRIEND_TEST(MetricsCollectorTest, IsNewEpoch);
     83   FRIEND_TEST(MetricsCollectorTest, MessageFilter);
     84   FRIEND_TEST(MetricsCollectorTest, ProcessKernelCrash);
     85   FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo);
     86   FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo2);
     87   FRIEND_TEST(MetricsCollectorTest, ProcessUncleanShutdown);
     88   FRIEND_TEST(MetricsCollectorTest, ProcessUserCrash);
     89   FRIEND_TEST(MetricsCollectorTest, ReportCrashesDailyFrequency);
     90   FRIEND_TEST(MetricsCollectorTest, ReportKernelCrashInterval);
     91   FRIEND_TEST(MetricsCollectorTest, ReportUncleanShutdownInterval);
     92   FRIEND_TEST(MetricsCollectorTest, ReportUserCrashInterval);
     93   FRIEND_TEST(MetricsCollectorTest, SendSample);
     94   FRIEND_TEST(MetricsCollectorTest, SendZramMetrics);
     95 
     96   // Type of scale to use for meminfo histograms.  For most of them we use
     97   // percent of total RAM, but for some we use absolute numbers, usually in
     98   // megabytes, on a log scale from 0 to 4000, and 0 to 8000 for compressed
     99   // swap (since it can be larger than total RAM).
    100   enum MeminfoOp {
    101     kMeminfoOp_HistPercent = 0,
    102     kMeminfoOp_HistLog,
    103     kMeminfoOp_SwapTotal,
    104     kMeminfoOp_SwapFree,
    105   };
    106 
    107   // Record for retrieving and reporting values from /proc/meminfo.
    108   struct MeminfoRecord {
    109     const char* name;        // print name
    110     const char* match;       // string to match in output of /proc/meminfo
    111     MeminfoOp op;            // histogram scale selector, or other operator
    112     int value;               // value from /proc/meminfo
    113   };
    114 
    115   // Enables metrics reporting.
    116   void OnEnableMetrics(std::unique_ptr<weaved::Command> command);
    117 
    118   // Disables metrics reporting.
    119   void OnDisableMetrics(std::unique_ptr<weaved::Command> command);
    120 
    121   // Updates the weave device state.
    122   void UpdateWeaveState();
    123 
    124   // Updates the active use time and logs time between kernel crashes.
    125   void ProcessKernelCrash();
    126 
    127   // Updates the active use time and logs time between unclean shutdowns.
    128   void ProcessUncleanShutdown();
    129 
    130   // Checks if a kernel crash has been detected and returns true if
    131   // so.  The method assumes that a kernel crash has happened if
    132   // |crash_file| exists.  It removes the file immediately if it
    133   // exists, so it must not be called more than once.
    134   bool CheckSystemCrash(const std::string& crash_file);
    135 
    136   // Sends a regular (exponential) histogram sample to Chrome for
    137   // transport to UMA. See MetricsLibrary::SendToUMA in
    138   // metrics_library.h for a description of the arguments.
    139   void SendSample(const std::string& name, int sample,
    140                   int min, int max, int nbuckets);
    141 
    142   // Sends a linear histogram sample to Chrome for transport to UMA. See
    143   // MetricsLibrary::SendToUMA in metrics_library.h for a description of the
    144   // arguments.
    145   void SendLinearSample(const std::string& name, int sample,
    146                         int max, int nbuckets);
    147 
    148   // Sends various cumulative kernel crash-related stats, for instance the
    149   // total number of kernel crashes since the last version update.
    150   void SendKernelCrashesCumulativeCountStats();
    151 
    152   // Sends a sample representing the number of seconds of active use
    153   // for a 24-hour period and reset |use|.
    154   void SendAndResetDailyUseSample(const unique_ptr<PersistentInteger>& use);
    155 
    156   // Sends a sample representing a time interval between two crashes of the
    157   // same type and reset |interval|.
    158   void SendAndResetCrashIntervalSample(
    159       const unique_ptr<PersistentInteger>& interval);
    160 
    161   // Sends a sample representing a frequency of crashes of some type and reset
    162   // |frequency|.
    163   void SendAndResetCrashFrequencySample(
    164       const unique_ptr<PersistentInteger>& frequency);
    165 
    166   // Initializes vm and disk stats reporting.
    167   void StatsReporterInit();
    168 
    169   // Schedules meminfo collection callback.
    170   void ScheduleMeminfoCallback(int wait);
    171 
    172   // Reports memory statistics.  Reschedules callback on success.
    173   void MeminfoCallback(base::TimeDelta wait);
    174 
    175   // Parses content of /proc/meminfo and sends fields of interest to UMA.
    176   // Returns false on errors.  |meminfo_raw| contains the content of
    177   // /proc/meminfo.
    178   bool ProcessMeminfo(const std::string& meminfo_raw);
    179 
    180   // Parses meminfo data from |meminfo_raw|.  |fields| is a vector containing
    181   // the fields of interest.  The order of the fields must be the same in which
    182   // /proc/meminfo prints them.  The result of parsing fields[i] is placed in
    183   // fields[i].value.
    184   bool FillMeminfo(const std::string& meminfo_raw,
    185                    std::vector<MeminfoRecord>* fields);
    186 
    187   // Schedule a memory use callback in |interval| seconds.
    188   void ScheduleMemuseCallback(double interval);
    189 
    190   // Calls MemuseCallbackWork, and possibly schedules next callback, if enough
    191   // active time has passed.  Otherwise reschedules itself to simulate active
    192   // time callbacks (i.e. wall clock time minus sleep time).
    193   void MemuseCallback();
    194 
    195   // Reads /proc/meminfo and sends total anonymous memory usage to UMA.
    196   bool MemuseCallbackWork();
    197 
    198   // Parses meminfo data and sends it to UMA.
    199   bool ProcessMemuse(const std::string& meminfo_raw);
    200 
    201   // Reads the current OS version from /etc/lsb-release and hashes it
    202   // to a unsigned 32-bit int.
    203   uint32_t GetOsVersionHash();
    204 
    205   // Updates stats, additionally sending them to UMA if enough time has elapsed
    206   // since the last report.
    207   void UpdateStats(base::TimeTicks now_ticks, base::Time now_wall_time);
    208 
    209   // Invoked periodically by |update_stats_timeout_id_| to call UpdateStats().
    210   void HandleUpdateStatsTimeout();
    211 
    212   // Reports zram statistics.
    213   bool ReportZram(const base::FilePath& zram_dir);
    214 
    215   // Reads a string from a file and converts it to uint64_t.
    216   static bool ReadFileToUint64(const base::FilePath& path, uint64_t* value);
    217 
    218   // Callback invoked when a connection to weaved's service is established
    219   // over Binder interface.
    220   void OnWeaveServiceConnected(const std::weak_ptr<weaved::Service>& service);
    221 
    222   // VARIABLES
    223 
    224   // Test mode.
    225   bool testing_;
    226 
    227   // Publicly readable metrics directory.
    228   base::FilePath shared_metrics_directory_;
    229 
    230   // The metrics library handle.
    231   MetricsLibraryInterface* metrics_lib_;
    232 
    233   // The last time that UpdateStats() was called.
    234   base::TimeTicks last_update_stats_time_;
    235 
    236   // End time of current memuse stat collection interval.
    237   double memuse_final_time_;
    238 
    239   // Selects the wait time for the next memory use callback.
    240   unsigned int memuse_interval_index_;
    241 
    242   // Used internally by GetIncrementalCpuUse() to return the CPU utilization
    243   // between calls.
    244   base::TimeDelta latest_cpu_use_microseconds_;
    245 
    246   // Persistent values and accumulators for crash statistics.
    247   unique_ptr<PersistentInteger> daily_cycle_;
    248   unique_ptr<PersistentInteger> weekly_cycle_;
    249   unique_ptr<PersistentInteger> version_cycle_;
    250 
    251   // Active use accumulated in a day.
    252   unique_ptr<PersistentInteger> daily_active_use_;
    253   // Active use accumulated since the latest version update.
    254   unique_ptr<PersistentInteger> version_cumulative_active_use_;
    255 
    256   // The CPU time accumulator.  This contains the CPU time, in milliseconds,
    257   // used by the system since the most recent OS version update.
    258   unique_ptr<PersistentInteger> version_cumulative_cpu_use_;
    259 
    260   unique_ptr<PersistentInteger> user_crash_interval_;
    261   unique_ptr<PersistentInteger> kernel_crash_interval_;
    262   unique_ptr<PersistentInteger> unclean_shutdown_interval_;
    263 
    264   unique_ptr<PersistentInteger> any_crashes_daily_count_;
    265   unique_ptr<PersistentInteger> any_crashes_weekly_count_;
    266   unique_ptr<PersistentInteger> user_crashes_daily_count_;
    267   unique_ptr<PersistentInteger> user_crashes_weekly_count_;
    268   unique_ptr<PersistentInteger> kernel_crashes_daily_count_;
    269   unique_ptr<PersistentInteger> kernel_crashes_weekly_count_;
    270   unique_ptr<PersistentInteger> kernel_crashes_version_count_;
    271   unique_ptr<PersistentInteger> unclean_shutdowns_daily_count_;
    272   unique_ptr<PersistentInteger> unclean_shutdowns_weekly_count_;
    273 
    274   unique_ptr<CpuUsageCollector> cpu_usage_collector_;
    275   unique_ptr<DiskUsageCollector> disk_usage_collector_;
    276   unique_ptr<AveragedStatisticsCollector> averaged_stats_collector_;
    277 
    278   unique_ptr<weaved::Service::Subscription> weave_service_subscription_;
    279   std::weak_ptr<weaved::Service> service_;
    280 
    281   base::WeakPtrFactory<MetricsCollector> weak_ptr_factory_{this};
    282 };
    283 
    284 #endif  // METRICS_METRICS_COLLECTOR_H_
    285