Home | History | Annotate | Download | only in guardrail
      1 /*
      2  * Copyright 2017, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 #pragma once
     17 
     18 #include "config/ConfigKey.h"
     19 #include "statslog.h"
     20 
     21 #include <gtest/gtest_prod.h>
     22 #include <log/log_time.h>
     23 #include <list>
     24 #include <mutex>
     25 #include <string>
     26 #include <vector>
     27 
     28 namespace android {
     29 namespace os {
     30 namespace statsd {
     31 
     32 struct ConfigStats {
     33     int32_t uid;
     34     int64_t id;
     35     int32_t creation_time_sec;
     36     int32_t deletion_time_sec = 0;
     37     int32_t reset_time_sec = 0;
     38     int32_t metric_count;
     39     int32_t condition_count;
     40     int32_t matcher_count;
     41     int32_t alert_count;
     42     bool is_valid;
     43 
     44     std::list<int32_t> broadcast_sent_time_sec;
     45     std::list<int32_t> data_drop_time_sec;
     46     std::list<std::pair<int32_t, int64_t>> dump_report_stats;
     47 
     48     // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount.
     49     std::map<const int64_t, int> matcher_stats;
     50 
     51     // Stores the number of output tuple of condition trackers when it's bigger than
     52     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
     53     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
     54     std::map<const int64_t, int> condition_stats;
     55 
     56     // Stores the number of output tuple of metric producers when it's bigger than
     57     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
     58     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
     59     std::map<const int64_t, int> metric_stats;
     60 
     61     // Stores the max number of output tuple of dimensions in condition across dimensions in what
     62     // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is
     63     // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by
     64     // kMaxConfigCount.
     65     std::map<const int64_t, int> metric_dimension_in_condition_stats;
     66 
     67     // Stores the number of times an anomaly detection alert has been declared.
     68     // The map size is capped by kMaxConfigCount.
     69     std::map<const int64_t, int> alert_stats;
     70 
     71     // Stores the config ID for each sub-config used.
     72     std::list<std::pair<const int64_t, const int32_t>> annotations;
     73 };
     74 
     75 struct UidMapStats {
     76     int32_t changes;
     77     int32_t bytes_used;
     78     int32_t dropped_changes;
     79     int32_t deleted_apps = 0;
     80 };
     81 
     82 // Keeps track of stats of statsd.
     83 // Single instance shared across the process. All public methods are thread safe.
     84 class StatsdStats {
     85 public:
     86     static StatsdStats& getInstance();
     87     ~StatsdStats(){};
     88 
     89     // TODO: set different limit if the device is low ram.
     90     const static int kDimensionKeySizeSoftLimit = 500;
     91     const static int kDimensionKeySizeHardLimit = 800;
     92 
     93     // Per atom dimension key size limit
     94     static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap;
     95 
     96     const static int kMaxConfigCountPerUid = 10;
     97     const static int kMaxAlertCountPerConfig = 100;
     98     const static int kMaxConditionCountPerConfig = 300;
     99     const static int kMaxMetricCountPerConfig = 1000;
    100     const static int kMaxMatcherCountPerConfig = 800;
    101 
    102     // The max number of old config stats we keep.
    103     const static int kMaxIceBoxSize = 20;
    104 
    105     const static int kMaxLoggerErrors = 20;
    106 
    107     const static int kMaxSystemServerRestarts = 20;
    108 
    109     const static int kMaxTimestampCount = 20;
    110 
    111     const static int kMaxLogSourceCount = 50;
    112 
    113     // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd
    114     // drops the metrics data in memory.
    115     static const size_t kMaxMetricsBytesPerConfig = 256 * 1024;
    116 
    117     // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the
    118     // data subscriber that it's time to call getData.
    119     static const size_t kBytesPerConfigTriggerGetData = 192 * 1024;
    120 
    121     // Cap the UID map's memory usage to this. This should be fairly high since the UID information
    122     // is critical for understanding the metrics.
    123     const static size_t kMaxBytesUsedUidMap = 50 * 1024;
    124 
    125     // The number of deleted apps that are stored in the uid map.
    126     const static int kMaxDeletedAppsInUidMap = 100;
    127 
    128     /* Minimum period between two broadcasts in nanoseconds. */
    129     static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC;
    130 
    131     /* Min period between two checks of byte size per config key in nanoseconds. */
    132     static const int64_t kMinByteSizeCheckPeriodNs = 10 * NS_PER_SEC;
    133 
    134     // Maximum age (30 days) that files on disk can exist in seconds.
    135     static const int kMaxAgeSecond = 60 * 60 * 24 * 30;
    136 
    137     // Maximum number of files (1000) that can be in stats directory on disk.
    138     static const int kMaxFileNumber = 1000;
    139 
    140     // Maximum size of all files that can be written to stats directory on disk.
    141     static const int kMaxFileSize = 50 * 1024 * 1024;
    142 
    143     // How long to try to clear puller cache from last time
    144     static const long kPullerCacheClearIntervalSec = 1;
    145 
    146     /**
    147      * Report a new config has been received and report the static stats about the config.
    148      *
    149      * The static stats include: the count of metrics, conditions, matchers, and alerts.
    150      * If the config is not valid, this config stats will be put into icebox immediately.
    151      */
    152     void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount,
    153                             int matchersCount, int alertCount,
    154                             const std::list<std::pair<const int64_t, const int32_t>>& annotations,
    155                             bool isValid);
    156     /**
    157      * Report a config has been removed.
    158      */
    159     void noteConfigRemoved(const ConfigKey& key);
    160    /**
    161      * Report a config has been reset when ttl expires.
    162      */
    163     void noteConfigReset(const ConfigKey& key);
    164 
    165     /**
    166      * Report a broadcast has been sent to a config owner to collect the data.
    167      */
    168     void noteBroadcastSent(const ConfigKey& key);
    169 
    170     /**
    171      * Report a config's metrics data has been dropped.
    172      */
    173     void noteDataDropped(const ConfigKey& key);
    174 
    175     /**
    176      * Report metrics data report has been sent.
    177      *
    178      * The report may be requested via StatsManager API, or through adb cmd.
    179      */
    180     void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes);
    181 
    182     /**
    183      * Report the size of output tuple of a condition.
    184      *
    185      * Note: only report when the condition has an output dimension, and the tuple
    186      * count > kDimensionKeySizeSoftLimit.
    187      *
    188      * [key]: The config key that this condition belongs to.
    189      * [id]: The id of the condition.
    190      * [size]: The output tuple size.
    191      */
    192     void noteConditionDimensionSize(const ConfigKey& key, const int64_t& id, int size);
    193 
    194     /**
    195      * Report the size of output tuple of a metric.
    196      *
    197      * Note: only report when the metric has an output dimension, and the tuple
    198      * count > kDimensionKeySizeSoftLimit.
    199      *
    200      * [key]: The config key that this metric belongs to.
    201      * [id]: The id of the metric.
    202      * [size]: The output tuple size.
    203      */
    204     void noteMetricDimensionSize(const ConfigKey& key, const int64_t& id, int size);
    205 
    206 
    207     /**
    208      * Report the max size of output tuple of dimension in condition across dimensions in what.
    209      *
    210      * Note: only report when the metric has an output dimension in condition, and the max tuple
    211      * count > kDimensionKeySizeSoftLimit.
    212      *
    213      * [key]: The config key that this metric belongs to.
    214      * [id]: The id of the metric.
    215      * [size]: The output tuple size.
    216      */
    217     void noteMetricDimensionInConditionSize(const ConfigKey& key, const int64_t& id, int size);
    218 
    219     /**
    220      * Report a matcher has been matched.
    221      *
    222      * [key]: The config key that this matcher belongs to.
    223      * [id]: The id of the matcher.
    224      */
    225     void noteMatcherMatched(const ConfigKey& key, const int64_t& id);
    226 
    227     /**
    228      * Report that an anomaly detection alert has been declared.
    229      *
    230      * [key]: The config key that this alert belongs to.
    231      * [id]: The id of the alert.
    232      */
    233     void noteAnomalyDeclared(const ConfigKey& key, const int64_t& id);
    234 
    235     /**
    236      * Report an atom event has been logged.
    237      */
    238     void noteAtomLogged(int atomId, int32_t timeSec);
    239 
    240     /**
    241      * Report that statsd modified the anomaly alarm registered with StatsCompanionService.
    242      */
    243     void noteRegisteredAnomalyAlarmChanged();
    244 
    245     /**
    246      * Report that statsd modified the periodic alarm registered with StatsCompanionService.
    247      */
    248     void noteRegisteredPeriodicAlarmChanged();
    249 
    250     /**
    251      * Records the number of delta entries that are being dropped from the uid map.
    252      */
    253     void noteUidMapDropped(int deltas);
    254 
    255     /**
    256      * Records that an app was deleted (from statsd's map).
    257      */
    258     void noteUidMapAppDeletionDropped();
    259 
    260     /**
    261      * Updates the number of changes currently stored in the uid map.
    262      */
    263     void setUidMapChanges(int changes);
    264     void setCurrentUidMapMemory(int bytes);
    265 
    266     // Update minimum interval between pulls for an pulled atom
    267     void updateMinPullIntervalSec(int pullAtomId, long intervalSec);
    268 
    269     // Notify pull request for an atom
    270     void notePull(int pullAtomId);
    271 
    272     // Notify pull request for an atom served from cached data
    273     void notePullFromCache(int pullAtomId);
    274 
    275     /**
    276      * Records statsd met an error while reading from logd.
    277      */
    278     void noteLoggerError(int error);
    279 
    280     /*
    281     * Records when system server restarts.
    282     */
    283     void noteSystemServerRestart(int32_t timeSec);
    284 
    285     /**
    286      * Records statsd skipped an event.
    287      */
    288     void noteLogLost(int64_t timestamp);
    289 
    290     /**
    291      * Reset the historical stats. Including all stats in icebox, and the tracked stats about
    292      * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue
    293      * to collect stats after reset() has been called.
    294      */
    295     void reset();
    296 
    297     /**
    298      * Output the stats in protobuf binary format to [buffer].
    299      *
    300      * [reset]: whether to clear the historical stats after the call.
    301      */
    302     void dumpStats(std::vector<uint8_t>* buffer, bool reset);
    303 
    304     /**
    305      * Output statsd stats in human readable format to [out] file.
    306      */
    307     void dumpStats(FILE* out) const;
    308 
    309     typedef struct {
    310         long totalPull;
    311         long totalPullFromCache;
    312         long minPullIntervalSec;
    313     } PulledAtomStats;
    314 
    315 private:
    316     StatsdStats();
    317 
    318     mutable std::mutex mLock;
    319 
    320     int32_t mStartTimeSec;
    321 
    322     // Track the number of dropped entries used by the uid map.
    323     UidMapStats mUidMapStats;
    324 
    325     // The stats about the configs that are still in use.
    326     // The map size is capped by kMaxConfigCount.
    327     std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats;
    328 
    329     // Stores the stats for the configs that are no longer in use.
    330     // The size of the vector is capped by kMaxIceBoxSize.
    331     std::list<const std::shared_ptr<ConfigStats>> mIceBox;
    332 
    333     // Stores the number of times a pushed atom is logged.
    334     // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms
    335     // out of that range will be dropped (it's either pulled atoms or test atoms).
    336     // This is a vector, not a map because it will be accessed A LOT -- for each stats log.
    337     std::vector<int> mPushedAtomStats;
    338 
    339     // Maps PullAtomId to its stats. The size is capped by the puller atom counts.
    340     std::map<int, PulledAtomStats> mPulledAtomStats;
    341 
    342     // Logd errors. Size capped by kMaxLoggerErrors.
    343     std::list<const std::pair<int, int>> mLoggerErrors;
    344 
    345     // Timestamps when we detect log loss after logd reconnect.
    346     std::list<int64_t> mLogLossTimestampNs;
    347 
    348     std::list<int32_t> mSystemServerRestartSec;
    349 
    350     // Stores the number of times statsd modified the anomaly alarm registered with
    351     // StatsCompanionService.
    352     int mAnomalyAlarmRegisteredStats = 0;
    353 
    354     // Stores the number of times statsd registers the periodic alarm changes
    355     int mPeriodicAlarmRegisteredStats = 0;
    356 
    357     void noteConfigResetInternalLocked(const ConfigKey& key);
    358 
    359     void noteConfigRemovedInternalLocked(const ConfigKey& key);
    360 
    361     void resetInternalLocked();
    362 
    363     void noteDataDropped(const ConfigKey& key, int32_t timeSec);
    364 
    365     void noteMetricsReportSent(const ConfigKey& key, const size_t num_bytes, int32_t timeSec);
    366 
    367     void noteBroadcastSent(const ConfigKey& key, int32_t timeSec);
    368 
    369     void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats);
    370 
    371     FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd);
    372     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd);
    373     FRIEND_TEST(StatsdStatsTest, TestConfigRemove);
    374     FRIEND_TEST(StatsdStatsTest, TestSubStats);
    375     FRIEND_TEST(StatsdStatsTest, TestAtomLog);
    376     FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold);
    377     FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor);
    378     FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash);
    379 };
    380 
    381 }  // namespace statsd
    382 }  // namespace os
    383 }  // namespace android
    384