Home | History | Annotate | Download | only in metrics
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/common/metrics/entropy_provider.h"
      6 
      7 #include <cmath>
      8 #include <limits>
      9 #include <numeric>
     10 
     11 #include "base/basictypes.h"
     12 #include "base/guid.h"
     13 #include "base/memory/scoped_ptr.h"
     14 #include "base/rand_util.h"
     15 #include "base/strings/string_number_conversions.h"
     16 #include "chrome/common/metrics/metrics_util.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 namespace metrics {
     20 
     21 namespace {
     22 
     23 // Size of the low entropy source to use for the permuted entropy provider
     24 // in tests.
     25 const size_t kMaxLowEntropySize = 8000;
     26 
     27 // Field trial names used in unit tests.
     28 const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
     29                                         "NewTabButton" };
     30 
     31 // Computes the Chi-Square statistic for |values| assuming they follow a uniform
     32 // distribution, where each entry has expected value |expected_value|.
     33 //
     34 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
     35 // value and E is the expected value.
     36 double ComputeChiSquare(const std::vector<int>& values,
     37                         double expected_value) {
     38   double sum = 0;
     39   for (size_t i = 0; i < values.size(); ++i) {
     40     const double delta = values[i] - expected_value;
     41     sum += (delta * delta) / expected_value;
     42   }
     43   return sum;
     44 }
     45 
     46 // Computes SHA1-based entropy for the given |trial_name| based on
     47 // |entropy_source|
     48 double GenerateSHA1Entropy(const std::string& entropy_source,
     49                            const std::string& trial_name) {
     50   SHA1EntropyProvider sha1_provider(entropy_source);
     51   return sha1_provider.GetEntropyForTrial(trial_name, 0);
     52 }
     53 
     54 // Generates permutation-based entropy for the given |trial_name| based on
     55 // |entropy_source| which must be in the range [0, entropy_max).
     56 double GeneratePermutedEntropy(uint16 entropy_source,
     57                                size_t entropy_max,
     58                                const std::string& trial_name) {
     59   PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
     60   return permuted_provider.GetEntropyForTrial(trial_name, 0);
     61 }
     62 
     63 // Helper interface for testing used to generate entropy values for a given
     64 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source
     65 // value constant and generates entropy for different trial names, instances
     66 // of TrialEntropyGenerator keep the trial name constant and generate low/high
     67 // entropy source values internally to produce each output entropy value.
     68 class TrialEntropyGenerator {
     69  public:
     70   virtual ~TrialEntropyGenerator() {}
     71   virtual double GenerateEntropyValue() const = 0;
     72 };
     73 
     74 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
     75 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of
     76 // entropy corresponding to a low entropy source).
     77 class SHA1EntropyGenerator : public TrialEntropyGenerator {
     78  public:
     79   explicit SHA1EntropyGenerator(const std::string& trial_name)
     80       : trial_name_(trial_name) {
     81   }
     82 
     83   virtual ~SHA1EntropyGenerator() {
     84   }
     85 
     86   virtual double GenerateEntropyValue() const OVERRIDE {
     87     // Use a random GUID + 13 additional bits of entropy to match how the
     88     // SHA1EntropyProvider is used in metrics_service.cc.
     89     const int low_entropy_source =
     90         static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
     91     const std::string high_entropy_source =
     92         base::GenerateGUID() + base::IntToString(low_entropy_source);
     93     return GenerateSHA1Entropy(high_entropy_source, trial_name_);
     94   }
     95 
     96  private:
     97   std::string trial_name_;
     98 
     99   DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
    100 };
    101 
    102 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
    103 // using 13-bit low entropy source values.
    104 class PermutedEntropyGenerator : public TrialEntropyGenerator {
    105  public:
    106   explicit PermutedEntropyGenerator(const std::string& trial_name)
    107       : mapping_(kMaxLowEntropySize) {
    108     // Note: Given a trial name, the computed mapping will be the same.
    109     // As a performance optimization, pre-compute the mapping once per trial
    110     // name and index into it for each entropy value.
    111     const uint32 randomization_seed = HashName(trial_name);
    112     internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
    113                                                    &mapping_);
    114   }
    115 
    116   virtual ~PermutedEntropyGenerator() {
    117   }
    118 
    119   virtual double GenerateEntropyValue() const OVERRIDE {
    120     const int low_entropy_source =
    121         static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
    122     return mapping_[low_entropy_source] /
    123            static_cast<double>(kMaxLowEntropySize);
    124   }
    125 
    126  private:
    127   std::vector<uint16> mapping_;
    128 
    129   DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
    130 };
    131 
    132 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
    133 // of Fit Test.
    134 void PerformEntropyUniformityTest(
    135     const std::string& trial_name,
    136     const TrialEntropyGenerator& entropy_generator) {
    137   // Number of buckets in the simulated field trials.
    138   const size_t kBucketCount = 20;
    139   // Max number of iterations to perform before giving up and failing.
    140   const size_t kMaxIterationCount = 100000;
    141   // The number of iterations to perform before each time the statistical
    142   // significance of the results is checked.
    143   const size_t kCheckIterationCount = 10000;
    144   // This is the Chi-Square threshold from the Chi-Square statistic table for
    145   // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
    146   // level. See: http://www.medcalc.org/manual/chi-square-table.php
    147   const double kChiSquareThreshold = 43.82;
    148 
    149   std::vector<int> distribution(kBucketCount);
    150 
    151   for (size_t i = 1; i <= kMaxIterationCount; ++i) {
    152     const double entropy_value = entropy_generator.GenerateEntropyValue();
    153     const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
    154     ASSERT_LT(bucket, kBucketCount);
    155     distribution[bucket] += 1;
    156 
    157     // After |kCheckIterationCount| iterations, compute the Chi-Square
    158     // statistic of the distribution. If the resulting statistic is greater
    159     // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
    160     // that the observed samples do not follow a uniform distribution.
    161     //
    162     // However, since 99.9% would still result in a false negative every
    163     // 1000 runs of the test, do not treat it as a failure (else the test
    164     // will be flaky). Instead, perform additional iterations to determine
    165     // if the distribution will converge, up to |kMaxIterationCount|.
    166     if ((i % kCheckIterationCount) == 0) {
    167       const double expected_value_per_bucket =
    168           static_cast<double>(i) / kBucketCount;
    169       const double chi_square =
    170           ComputeChiSquare(distribution, expected_value_per_bucket);
    171       if (chi_square < kChiSquareThreshold)
    172         break;
    173 
    174       // If |i == kMaxIterationCount|, the Chi-Square statistic did not
    175       // converge after |kMaxIterationCount|.
    176       EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
    177           trial_name << " with chi_square = " << chi_square <<
    178           " after " << kMaxIterationCount << " iterations.";
    179     }
    180   }
    181 }
    182 
    183 }  // namespace
    184 
    185 TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
    186   // Simply asserts that two trials using one-time randomization
    187   // that have different names, normally generate different results.
    188   //
    189   // Note that depending on the one-time random initialization, they
    190   // _might_ actually give the same result, but we know that given
    191   // the particular client_id we use for unit tests they won't.
    192   base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
    193   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
    194   scoped_refptr<base::FieldTrial> trials[] = {
    195       base::FieldTrialList::FactoryGetFieldTrial(
    196           "one", 100, "default", kNoExpirationYear, 1, 1,
    197           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
    198       base::FieldTrialList::FactoryGetFieldTrial(
    199           "two", 100, "default", kNoExpirationYear, 1, 1,
    200           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
    201   };
    202 
    203   for (size_t i = 0; i < arraysize(trials); ++i) {
    204     for (int j = 0; j < 100; ++j)
    205       trials[i]->AppendGroup(std::string(), 1);
    206   }
    207 
    208   // The trials are most likely to give different results since they have
    209   // different names.
    210   EXPECT_NE(trials[0]->group(), trials[1]->group());
    211   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
    212 }
    213 
    214 TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
    215   // Simply asserts that two trials using one-time randomization
    216   // that have different names, normally generate different results.
    217   //
    218   // Note that depending on the one-time random initialization, they
    219   // _might_ actually give the same result, but we know that given
    220   // the particular client_id we use for unit tests they won't.
    221   base::FieldTrialList field_trial_list(
    222       new PermutedEntropyProvider(1234, kMaxLowEntropySize));
    223   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
    224   scoped_refptr<base::FieldTrial> trials[] = {
    225       base::FieldTrialList::FactoryGetFieldTrial(
    226           "one", 100, "default", kNoExpirationYear, 1, 1,
    227           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
    228       base::FieldTrialList::FactoryGetFieldTrial(
    229           "two", 100, "default", kNoExpirationYear, 1, 1,
    230           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
    231   };
    232 
    233   for (size_t i = 0; i < arraysize(trials); ++i) {
    234     for (int j = 0; j < 100; ++j)
    235       trials[i]->AppendGroup(std::string(), 1);
    236   }
    237 
    238   // The trials are most likely to give different results since they have
    239   // different names.
    240   EXPECT_NE(trials[0]->group(), trials[1]->group());
    241   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
    242 }
    243 
    244 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
    245   // Ensures that two trials with different names but the same custom seed used
    246   // for one time randomization produce the same group assignments.
    247   base::FieldTrialList field_trial_list(
    248       new PermutedEntropyProvider(1234, kMaxLowEntropySize));
    249   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
    250   const uint32 kCustomSeed = 9001;
    251   scoped_refptr<base::FieldTrial> trials[] = {
    252       base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
    253           "one", 100, "default", kNoExpirationYear, 1, 1,
    254           base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
    255       base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
    256           "two", 100, "default", kNoExpirationYear, 1, 1,
    257           base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
    258   };
    259 
    260   for (size_t i = 0; i < arraysize(trials); ++i) {
    261     for (int j = 0; j < 100; ++j)
    262       trials[i]->AppendGroup(std::string(), 1);
    263   }
    264 
    265   // Normally, these trials should produce different groups, but if the same
    266   // custom seed is used, they should produce the same group assignment.
    267   EXPECT_EQ(trials[0]->group(), trials[1]->group());
    268   EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
    269 }
    270 
    271 TEST(EntropyProviderTest, SHA1Entropy) {
    272   const double results[] = { GenerateSHA1Entropy("hi", "1"),
    273                              GenerateSHA1Entropy("there", "1") };
    274 
    275   EXPECT_NE(results[0], results[1]);
    276   for (size_t i = 0; i < arraysize(results); ++i) {
    277     EXPECT_LE(0.0, results[i]);
    278     EXPECT_GT(1.0, results[i]);
    279   }
    280 
    281   EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
    282             GenerateSHA1Entropy("yo", "1"));
    283   EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
    284             GenerateSHA1Entropy("yo", "else"));
    285 }
    286 
    287 TEST(EntropyProviderTest, PermutedEntropy) {
    288   const double results[] = {
    289       GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
    290       GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
    291 
    292   EXPECT_NE(results[0], results[1]);
    293   for (size_t i = 0; i < arraysize(results); ++i) {
    294     EXPECT_LE(0.0, results[i]);
    295     EXPECT_GT(1.0, results[i]);
    296   }
    297 
    298   EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
    299             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
    300   EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
    301             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
    302 }
    303 
    304 TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
    305   // Verifies that PermutedEntropyProvider produces expected results. This
    306   // ensures that the results are the same between platforms and ensures that
    307   // changes to the implementation do not regress this accidentally.
    308 
    309   EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
    310                    GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
    311   EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
    312                    GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
    313   EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
    314                    GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
    315 }
    316 
    317 TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
    318   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    319     SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
    320     PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
    321   }
    322 }
    323 
    324 TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
    325   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    326     PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
    327     PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
    328   }
    329 }
    330 
    331 TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) {
    332   // Verifies that SeededRandGenerator has a uniform distribution.
    333   //
    334   // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
    335 
    336   const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
    337   const uint32 kExpectedAverage = kTopOfRange / 2ULL;
    338   const uint32 kAllowedVariance = kExpectedAverage / 50ULL;  // +/- 2%
    339   const int kMinAttempts = 1000;
    340   const int kMaxAttempts = 1000000;
    341 
    342   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
    343     const uint32 seed = HashName(kTestTrialNames[i]);
    344     internal::SeededRandGenerator rand_generator(seed);
    345 
    346     double cumulative_average = 0.0;
    347     int count = 0;
    348     while (count < kMaxAttempts) {
    349       uint32 value = rand_generator(kTopOfRange);
    350       cumulative_average = (count * cumulative_average + value) / (count + 1);
    351 
    352       // Don't quit too quickly for things to start converging, or we may have
    353       // a false positive.
    354       if (count > kMinAttempts &&
    355           kExpectedAverage - kAllowedVariance < cumulative_average &&
    356           cumulative_average < kExpectedAverage + kAllowedVariance) {
    357         break;
    358       }
    359 
    360       ++count;
    361     }
    362 
    363     ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
    364         kExpectedAverage << ", average ended at " << cumulative_average <<
    365         ", for trial " << kTestTrialNames[i];
    366   }
    367 }
    368 
    369 }  // namespace metrics
    370