Home | History | Annotate | Download | only in metrics
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef METRIC_PRODUCER_H
     18 #define METRIC_PRODUCER_H
     19 
     20 #include <shared_mutex>
     21 
     22 #include <frameworks/base/cmds/statsd/src/active_config_list.pb.h>
     23 #include "HashableDimensionKey.h"
     24 #include "anomaly/AnomalyTracker.h"
     25 #include "condition/ConditionWizard.h"
     26 #include "config/ConfigKey.h"
     27 #include "matchers/matcher_util.h"
     28 #include "packages/PackageInfoListener.h"
     29 
     30 #include <log/logprint.h>
     31 #include <utils/RefBase.h>
     32 #include <unordered_map>
     33 
     34 namespace android {
     35 namespace os {
     36 namespace statsd {
     37 
     38 // Keep this in sync with DumpReportReason enum in stats_log.proto
     39 enum DumpReportReason {
     40     DEVICE_SHUTDOWN = 1,
     41     CONFIG_UPDATED = 2,
     42     CONFIG_REMOVED = 3,
     43     GET_DATA_CALLED = 4,
     44     ADB_DUMP = 5,
     45     CONFIG_RESET = 6,
     46     STATSCOMPANION_DIED = 7,
     47     TERMINATION_SIGNAL_RECEIVED = 8
     48 };
     49 
     50 // If the metric has no activation requirement, it will be active once the metric producer is
     51 // created.
     52 // If the metric needs to be activated by atoms, the metric producer will start
     53 // with kNotActive state, turn to kActive or kActiveOnBoot when the activation event arrives, become
     54 // kNotActive when it reaches the duration limit (timebomb). If the activation event arrives again
     55 // before or after it expires, the event producer will be re-activated and ttl will be reset.
     56 enum ActivationState {
     57     kNotActive = 0,
     58     kActive = 1,
     59     kActiveOnBoot = 2,
     60 };
     61 
     62 enum DumpLatency {
     63     // In some cases, we only have a short time range to do the dump, e.g. statsd is being killed.
     64     // We might be able to return all the data in this mode. For instance, pull metrics might need
     65     // to be pulled when the current bucket is requested.
     66     FAST = 1,
     67     // In other cases, it is fine for a dump to take more than a few milliseconds, e.g. config
     68     // updates.
     69     NO_TIME_CONSTRAINTS = 2
     70 };
     71 
     72 // A MetricProducer is responsible for compute one single metrics, creating stats log report, and
     73 // writing the report to dropbox. MetricProducers should respond to package changes as required in
     74 // PackageInfoListener, but if none of the metrics are slicing by package name, then the update can
     75 // be a no-op.
     76 class MetricProducer : public virtual PackageInfoListener {
     77 public:
     78     MetricProducer(const int64_t& metricId, const ConfigKey& key, const int64_t timeBaseNs,
     79                    const int conditionIndex, const sp<ConditionWizard>& wizard)
     80         : mMetricId(metricId),
     81           mConfigKey(key),
     82           mTimeBaseNs(timeBaseNs),
     83           mCurrentBucketStartTimeNs(timeBaseNs),
     84           mCurrentBucketNum(0),
     85           mCondition(initialCondition(conditionIndex)),
     86           mConditionSliced(false),
     87           mWizard(wizard),
     88           mConditionTrackerIndex(conditionIndex),
     89           mContainANYPositionInDimensionsInWhat(false),
     90           mSliceByPositionALL(false),
     91           mSameConditionDimensionsInTracker(false),
     92           mHasLinksToAllConditionDimensionsInTracker(false),
     93           mIsActive(true) {
     94     }
     95 
     96     virtual ~MetricProducer(){};
     97 
     98     ConditionState initialCondition(const int conditionIndex) const {
     99         return conditionIndex >= 0 ? ConditionState::kUnknown : ConditionState::kTrue;
    100     }
    101 
    102     /**
    103      * Forces this metric to split into a partial bucket right now. If we're past a full bucket, we
    104      * first call the standard flushing code to flush up to the latest full bucket. Then we call
    105      * the flush again when the end timestamp is forced to be now, and then after flushing, update
    106      * the start timestamp to be now.
    107      */
    108     void notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid,
    109                           const int64_t version) override {
    110         std::lock_guard<std::mutex> lock(mMutex);
    111 
    112         if (eventTimeNs > getCurrentBucketEndTimeNs()) {
    113             // Flush full buckets on the normal path up to the latest bucket boundary.
    114             flushIfNeededLocked(eventTimeNs);
    115         }
    116         // Now flush a partial bucket.
    117         flushCurrentBucketLocked(eventTimeNs, eventTimeNs);
    118         // Don't update the current bucket number so that the anomaly tracker knows this bucket
    119         // is a partial bucket and can merge it with the previous bucket.
    120     };
    121 
    122     void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid) override{
    123         // Force buckets to split on removal also.
    124         notifyAppUpgrade(eventTimeNs, apk, uid, 0);
    125     };
    126 
    127     void onUidMapReceived(const int64_t& eventTimeNs) override{
    128             // Purposefully don't flush partial buckets on a new snapshot.
    129             // This occurs if a new user is added/removed or statsd crashes.
    130     };
    131 
    132     // Consume the parsed stats log entry that already matched the "what" of the metric.
    133     void onMatchedLogEvent(const size_t matcherIndex, const LogEvent& event) {
    134         std::lock_guard<std::mutex> lock(mMutex);
    135         onMatchedLogEventLocked(matcherIndex, event);
    136     }
    137 
    138     void onConditionChanged(const bool condition, const int64_t eventTime) {
    139         std::lock_guard<std::mutex> lock(mMutex);
    140         onConditionChangedLocked(condition, eventTime);
    141     }
    142 
    143     void onSlicedConditionMayChange(bool overallCondition, const int64_t eventTime) {
    144         std::lock_guard<std::mutex> lock(mMutex);
    145         onSlicedConditionMayChangeLocked(overallCondition, eventTime);
    146     }
    147 
    148     bool isConditionSliced() const {
    149         std::lock_guard<std::mutex> lock(mMutex);
    150         return mConditionSliced;
    151     };
    152 
    153     // Output the metrics data to [protoOutput]. All metrics reports end with the same timestamp.
    154     // This method clears all the past buckets.
    155     void onDumpReport(const int64_t dumpTimeNs,
    156                       const bool include_current_partial_bucket,
    157                       const bool erase_data,
    158                       const DumpLatency dumpLatency,
    159                       std::set<string> *str_set,
    160                       android::util::ProtoOutputStream* protoOutput) {
    161         std::lock_guard<std::mutex> lock(mMutex);
    162         return onDumpReportLocked(dumpTimeNs, include_current_partial_bucket, erase_data,
    163                 dumpLatency, str_set, protoOutput);
    164     }
    165 
    166     void clearPastBuckets(const int64_t dumpTimeNs) {
    167         std::lock_guard<std::mutex> lock(mMutex);
    168         return clearPastBucketsLocked(dumpTimeNs);
    169     }
    170 
    171     void dumpStates(FILE* out, bool verbose) const {
    172         std::lock_guard<std::mutex> lock(mMutex);
    173         dumpStatesLocked(out, verbose);
    174     }
    175 
    176     // Returns the memory in bytes currently used to store this metric's data. Does not change
    177     // state.
    178     size_t byteSize() const {
    179         std::lock_guard<std::mutex> lock(mMutex);
    180         return byteSizeLocked();
    181     }
    182 
    183     /* If alert is valid, adds an AnomalyTracker and returns it. If invalid, returns nullptr. */
    184     virtual sp<AnomalyTracker> addAnomalyTracker(const Alert &alert,
    185                                                  const sp<AlarmMonitor>& anomalyAlarmMonitor) {
    186         std::lock_guard<std::mutex> lock(mMutex);
    187         sp<AnomalyTracker> anomalyTracker = new AnomalyTracker(alert, mConfigKey);
    188         if (anomalyTracker != nullptr) {
    189             mAnomalyTrackers.push_back(anomalyTracker);
    190         }
    191         return anomalyTracker;
    192     }
    193 
    194     int64_t getBuckeSizeInNs() const {
    195         std::lock_guard<std::mutex> lock(mMutex);
    196         return mBucketSizeNs;
    197     }
    198 
    199     // Only needed for unit-testing to override guardrail.
    200     void setBucketSize(int64_t bucketSize) {
    201         mBucketSizeNs = bucketSize;
    202     }
    203 
    204     inline const int64_t& getMetricId() const {
    205         return mMetricId;
    206     }
    207 
    208     void loadActiveMetric(const ActiveMetric& activeMetric, int64_t currentTimeNs) {
    209         std::lock_guard<std::mutex> lock(mMutex);
    210         loadActiveMetricLocked(activeMetric, currentTimeNs);
    211     }
    212 
    213     // Let MetricProducer drop in-memory data to save memory.
    214     // We still need to keep future data valid and anomaly tracking work, which means we will
    215     // have to flush old data, informing anomaly trackers then safely drop old data.
    216     // We still keep current bucket data for future metrics' validity.
    217     void dropData(const int64_t dropTimeNs) {
    218         std::lock_guard<std::mutex> lock(mMutex);
    219         dropDataLocked(dropTimeNs);
    220     }
    221 
    222     // For test only.
    223     inline int64_t getCurrentBucketNum() const {
    224         return mCurrentBucketNum;
    225     }
    226 
    227     void activate(int activationTrackerIndex, int64_t elapsedTimestampNs) {
    228         std::lock_guard<std::mutex> lock(mMutex);
    229         activateLocked(activationTrackerIndex, elapsedTimestampNs);
    230     }
    231 
    232     void cancelEventActivation(int deactivationTrackerIndex) {
    233         std::lock_guard<std::mutex> lock(mMutex);
    234         cancelEventActivationLocked(deactivationTrackerIndex);
    235     }
    236 
    237     bool isActive() const {
    238         std::lock_guard<std::mutex> lock(mMutex);
    239         return isActiveLocked();
    240     }
    241 
    242     void addActivation(int activationTrackerIndex, const ActivationType& activationType,
    243             int64_t ttl_seconds, int deactivationTrackerIndex = -1);
    244 
    245     void prepareFirstBucket() {
    246         std::lock_guard<std::mutex> lock(mMutex);
    247         prepareFirstBucketLocked();
    248     }
    249 
    250     void flushIfExpire(int64_t elapsedTimestampNs);
    251 
    252     void writeActiveMetricToProtoOutputStream(
    253             int64_t currentTimeNs, const DumpReportReason reason, ProtoOutputStream* proto);
    254 protected:
    255     virtual void onConditionChangedLocked(const bool condition, const int64_t eventTime) = 0;
    256     virtual void onSlicedConditionMayChangeLocked(bool overallCondition,
    257                                                   const int64_t eventTime) = 0;
    258     virtual void onDumpReportLocked(const int64_t dumpTimeNs,
    259                                     const bool include_current_partial_bucket,
    260                                     const bool erase_data,
    261                                     const DumpLatency dumpLatency,
    262                                     std::set<string> *str_set,
    263                                     android::util::ProtoOutputStream* protoOutput) = 0;
    264     virtual void clearPastBucketsLocked(const int64_t dumpTimeNs) = 0;
    265     virtual size_t byteSizeLocked() const = 0;
    266     virtual void dumpStatesLocked(FILE* out, bool verbose) const = 0;
    267 
    268     bool evaluateActiveStateLocked(int64_t elapsedTimestampNs);
    269 
    270     void activateLocked(int activationTrackerIndex, int64_t elapsedTimestampNs);
    271     void cancelEventActivationLocked(int deactivationTrackerIndex);
    272 
    273     inline bool isActiveLocked() const {
    274         return mIsActive;
    275     }
    276 
    277     void loadActiveMetricLocked(const ActiveMetric& activeMetric, int64_t currentTimeNs);
    278 
    279     virtual void prepareFirstBucketLocked() {};
    280     /**
    281      * Flushes the current bucket if the eventTime is after the current bucket's end time. This will
    282        also flush the current partial bucket in memory.
    283      */
    284     virtual void flushIfNeededLocked(const int64_t& eventTime){};
    285 
    286     /**
    287      * Flushes all the data including the current partial bucket.
    288      */
    289     virtual void flushLocked(const int64_t& eventTimeNs) {
    290         flushIfNeededLocked(eventTimeNs);
    291         flushCurrentBucketLocked(eventTimeNs, eventTimeNs);
    292     };
    293 
    294     /**
    295      * For metrics that aggregate (ie, every metric producer except for EventMetricProducer),
    296      * we need to be able to flush the current buckets on demand (ie, end the current bucket and
    297      * start new bucket). If this function is called when eventTimeNs is greater than the current
    298      * bucket's end timestamp, than we flush up to the end of the latest full bucket; otherwise,
    299      * we assume that we want to flush a partial bucket. The bucket start timestamp and bucket
    300      * number are not changed by this function. This method should only be called by
    301      * flushIfNeededLocked or flushLocked or the app upgrade handler; the caller MUST update the
    302      * bucket timestamp and bucket number as needed.
    303      */
    304     virtual void flushCurrentBucketLocked(const int64_t& eventTimeNs,
    305                                           const int64_t& nextBucketStartTimeNs) {};
    306 
    307     virtual void onActiveStateChangedLocked(const int64_t& eventTimeNs) {
    308         if (!mIsActive) {
    309             flushLocked(eventTimeNs);
    310         }
    311     }
    312 
    313     // Convenience to compute the current bucket's end time, which is always aligned with the
    314     // start time of the metric.
    315     int64_t getCurrentBucketEndTimeNs() const {
    316         return mTimeBaseNs + (mCurrentBucketNum + 1) * mBucketSizeNs;
    317     }
    318 
    319     int64_t getBucketNumFromEndTimeNs(const int64_t endNs) {
    320         return (endNs - mTimeBaseNs) / mBucketSizeNs - 1;
    321     }
    322 
    323     virtual void dropDataLocked(const int64_t dropTimeNs) = 0;
    324 
    325     const int64_t mMetricId;
    326 
    327     const ConfigKey mConfigKey;
    328 
    329     // The time when this metric producer was first created. The end time for the current bucket
    330     // can be computed from this based on mCurrentBucketNum.
    331     int64_t mTimeBaseNs;
    332 
    333     // Start time may not be aligned with the start of statsd if there is an app upgrade in the
    334     // middle of a bucket.
    335     int64_t mCurrentBucketStartTimeNs;
    336 
    337     // Used by anomaly detector to track which bucket we are in. This is not sent with the produced
    338     // report.
    339     int64_t mCurrentBucketNum;
    340 
    341     int64_t mBucketSizeNs;
    342 
    343     ConditionState mCondition;
    344 
    345     bool mConditionSliced;
    346 
    347     sp<ConditionWizard> mWizard;
    348 
    349     int mConditionTrackerIndex;
    350 
    351     vector<Matcher> mDimensionsInWhat;       // The dimensions_in_what defined in statsd_config
    352     vector<Matcher> mDimensionsInCondition;  // The dimensions_in_condition defined in statsd_config
    353 
    354     bool mContainANYPositionInDimensionsInWhat;
    355     bool mSliceByPositionALL;
    356 
    357     // True iff the condition dimensions equal to the sliced dimensions in the simple condition
    358     // tracker. This field is always false for combinational condition trackers.
    359     bool mSameConditionDimensionsInTracker;
    360 
    361     // True iff the metric to condition links cover all dimension fields in the condition tracker.
    362     // This field is always false for combinational condition trackers.
    363     bool mHasLinksToAllConditionDimensionsInTracker;
    364 
    365     std::vector<Metric2Condition> mMetric2ConditionLinks;
    366 
    367     std::vector<sp<AnomalyTracker>> mAnomalyTrackers;
    368 
    369     /*
    370      * Individual metrics can implement their own business logic here. All pre-processing is done.
    371      *
    372      * [matcherIndex]: the index of the matcher which matched this event. This is interesting to
    373      *                 DurationMetric, because it has start/stop/stop_all 3 matchers.
    374      * [eventKey]: the extracted dimension key for the final output. if the metric doesn't have
    375      *             dimensions, it will be DEFAULT_DIMENSION_KEY
    376      * [conditionKey]: the keys of conditions which should be used to query the condition for this
    377      *                 target event (from MetricConditionLink). This is passed to individual metrics
    378      *                 because DurationMetric needs it to be cached.
    379      * [condition]: whether condition is met. If condition is sliced, this is the result coming from
    380      *              query with ConditionWizard; If condition is not sliced, this is the
    381      *              nonSlicedCondition.
    382      * [event]: the log event, just in case the metric needs its data, e.g., EventMetric.
    383      */
    384     virtual void onMatchedLogEventInternalLocked(
    385             const size_t matcherIndex, const MetricDimensionKey& eventKey,
    386             const ConditionKey& conditionKey, bool condition,
    387             const LogEvent& event) = 0;
    388 
    389     // Consume the parsed stats log entry that already matched the "what" of the metric.
    390     virtual void onMatchedLogEventLocked(const size_t matcherIndex, const LogEvent& event);
    391 
    392     mutable std::mutex mMutex;
    393 
    394     struct Activation {
    395         Activation(const ActivationType& activationType, const int64_t ttlNs)
    396             : ttl_ns(ttlNs),
    397               start_ns(0),
    398               state(ActivationState::kNotActive),
    399               activationType(activationType) {}
    400 
    401         const int64_t ttl_ns;
    402         int64_t start_ns;
    403         ActivationState state;
    404         const ActivationType activationType;
    405     };
    406     // When the metric producer has multiple activations, these activations are ORed to determine
    407     // whether the metric producer is ready to generate metrics.
    408     std::unordered_map<int, std::shared_ptr<Activation>> mEventActivationMap;
    409 
    410     // Maps index of atom matcher for deactivation to a list of Activation structs.
    411     std::unordered_map<int, std::vector<std::shared_ptr<Activation>>> mEventDeactivationMap;
    412 
    413     bool mIsActive;
    414 
    415     FRIEND_TEST(DurationMetricE2eTest, TestOneBucket);
    416     FRIEND_TEST(DurationMetricE2eTest, TestTwoBuckets);
    417     FRIEND_TEST(DurationMetricE2eTest, TestWithActivation);
    418     FRIEND_TEST(DurationMetricE2eTest, TestWithCondition);
    419     FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedCondition);
    420     FRIEND_TEST(DurationMetricE2eTest, TestWithActivationAndSlicedCondition);
    421 
    422     FRIEND_TEST(MetricActivationE2eTest, TestCountMetric);
    423     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithOneDeactivation);
    424     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoDeactivations);
    425     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithSameDeactivation);
    426     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoMetricsTwoDeactivations);
    427 
    428     FRIEND_TEST(StatsLogProcessorTest, TestActiveConfigMetricDiskWriteRead);
    429     FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBoot);
    430     FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBootMultipleActivations);
    431     FRIEND_TEST(StatsLogProcessorTest,
    432             TestActivationOnBootMultipleActivationsDifferentActivationTypes);
    433     FRIEND_TEST(StatsLogProcessorTest, TestActivationsPersistAcrossSystemServerRestart);
    434 };
    435 
    436 }  // namespace statsd
    437 }  // namespace os
    438 }  // namespace android
    439 #endif  // METRIC_PRODUCER_H
    440