1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/common/metrics/entropy_provider.h" 6 7 #include <cmath> 8 #include <limits> 9 #include <numeric> 10 11 #include "base/basictypes.h" 12 #include "base/guid.h" 13 #include "base/memory/scoped_ptr.h" 14 #include "base/rand_util.h" 15 #include "base/strings/string_number_conversions.h" 16 #include "chrome/common/metrics/metrics_util.h" 17 #include "testing/gtest/include/gtest/gtest.h" 18 19 namespace metrics { 20 21 namespace { 22 23 // Size of the low entropy source to use for the permuted entropy provider 24 // in tests. 25 const size_t kMaxLowEntropySize = 8000; 26 27 // Field trial names used in unit tests. 28 const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial", 29 "NewTabButton" }; 30 31 // Computes the Chi-Square statistic for |values| assuming they follow a uniform 32 // distribution, where each entry has expected value |expected_value|. 33 // 34 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed 35 // value and E is the expected value. 36 double ComputeChiSquare(const std::vector<int>& values, 37 double expected_value) { 38 double sum = 0; 39 for (size_t i = 0; i < values.size(); ++i) { 40 const double delta = values[i] - expected_value; 41 sum += (delta * delta) / expected_value; 42 } 43 return sum; 44 } 45 46 // Computes SHA1-based entropy for the given |trial_name| based on 47 // |entropy_source| 48 double GenerateSHA1Entropy(const std::string& entropy_source, 49 const std::string& trial_name) { 50 SHA1EntropyProvider sha1_provider(entropy_source); 51 return sha1_provider.GetEntropyForTrial(trial_name, 0); 52 } 53 54 // Generates permutation-based entropy for the given |trial_name| based on 55 // |entropy_source| which must be in the range [0, entropy_max). 56 double GeneratePermutedEntropy(uint16 entropy_source, 57 size_t entropy_max, 58 const std::string& trial_name) { 59 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max); 60 return permuted_provider.GetEntropyForTrial(trial_name, 0); 61 } 62 63 // Helper interface for testing used to generate entropy values for a given 64 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source 65 // value constant and generates entropy for different trial names, instances 66 // of TrialEntropyGenerator keep the trial name constant and generate low/high 67 // entropy source values internally to produce each output entropy value. 68 class TrialEntropyGenerator { 69 public: 70 virtual ~TrialEntropyGenerator() {} 71 virtual double GenerateEntropyValue() const = 0; 72 }; 73 74 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high 75 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of 76 // entropy corresponding to a low entropy source). 77 class SHA1EntropyGenerator : public TrialEntropyGenerator { 78 public: 79 explicit SHA1EntropyGenerator(const std::string& trial_name) 80 : trial_name_(trial_name) { 81 } 82 83 virtual ~SHA1EntropyGenerator() { 84 } 85 86 virtual double GenerateEntropyValue() const OVERRIDE { 87 // Use a random GUID + 13 additional bits of entropy to match how the 88 // SHA1EntropyProvider is used in metrics_service.cc. 89 const int low_entropy_source = 90 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); 91 const std::string high_entropy_source = 92 base::GenerateGUID() + base::IntToString(low_entropy_source); 93 return GenerateSHA1Entropy(high_entropy_source, trial_name_); 94 } 95 96 private: 97 std::string trial_name_; 98 99 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator); 100 }; 101 102 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm, 103 // using 13-bit low entropy source values. 104 class PermutedEntropyGenerator : public TrialEntropyGenerator { 105 public: 106 explicit PermutedEntropyGenerator(const std::string& trial_name) 107 : mapping_(kMaxLowEntropySize) { 108 // Note: Given a trial name, the computed mapping will be the same. 109 // As a performance optimization, pre-compute the mapping once per trial 110 // name and index into it for each entropy value. 111 const uint32 randomization_seed = HashName(trial_name); 112 internal::PermuteMappingUsingRandomizationSeed(randomization_seed, 113 &mapping_); 114 } 115 116 virtual ~PermutedEntropyGenerator() { 117 } 118 119 virtual double GenerateEntropyValue() const OVERRIDE { 120 const int low_entropy_source = 121 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); 122 return mapping_[low_entropy_source] / 123 static_cast<double>(kMaxLowEntropySize); 124 } 125 126 private: 127 std::vector<uint16> mapping_; 128 129 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator); 130 }; 131 132 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness 133 // of Fit Test. 134 void PerformEntropyUniformityTest( 135 const std::string& trial_name, 136 const TrialEntropyGenerator& entropy_generator) { 137 // Number of buckets in the simulated field trials. 138 const size_t kBucketCount = 20; 139 // Max number of iterations to perform before giving up and failing. 140 const size_t kMaxIterationCount = 100000; 141 // The number of iterations to perform before each time the statistical 142 // significance of the results is checked. 143 const size_t kCheckIterationCount = 10000; 144 // This is the Chi-Square threshold from the Chi-Square statistic table for 145 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence 146 // level. See: http://www.medcalc.org/manual/chi-square-table.php 147 const double kChiSquareThreshold = 43.82; 148 149 std::vector<int> distribution(kBucketCount); 150 151 for (size_t i = 1; i <= kMaxIterationCount; ++i) { 152 const double entropy_value = entropy_generator.GenerateEntropyValue(); 153 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value); 154 ASSERT_LT(bucket, kBucketCount); 155 distribution[bucket] += 1; 156 157 // After |kCheckIterationCount| iterations, compute the Chi-Square 158 // statistic of the distribution. If the resulting statistic is greater 159 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence 160 // that the observed samples do not follow a uniform distribution. 161 // 162 // However, since 99.9% would still result in a false negative every 163 // 1000 runs of the test, do not treat it as a failure (else the test 164 // will be flaky). Instead, perform additional iterations to determine 165 // if the distribution will converge, up to |kMaxIterationCount|. 166 if ((i % kCheckIterationCount) == 0) { 167 const double expected_value_per_bucket = 168 static_cast<double>(i) / kBucketCount; 169 const double chi_square = 170 ComputeChiSquare(distribution, expected_value_per_bucket); 171 if (chi_square < kChiSquareThreshold) 172 break; 173 174 // If |i == kMaxIterationCount|, the Chi-Square statistic did not 175 // converge after |kMaxIterationCount|. 176 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " << 177 trial_name << " with chi_square = " << chi_square << 178 " after " << kMaxIterationCount << " iterations."; 179 } 180 } 181 } 182 183 } // namespace 184 185 TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) { 186 // Simply asserts that two trials using one-time randomization 187 // that have different names, normally generate different results. 188 // 189 // Note that depending on the one-time random initialization, they 190 // _might_ actually give the same result, but we know that given 191 // the particular client_id we use for unit tests they won't. 192 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id")); 193 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; 194 scoped_refptr<base::FieldTrial> trials[] = { 195 base::FieldTrialList::FactoryGetFieldTrial( 196 "one", 100, "default", kNoExpirationYear, 1, 1, 197 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), 198 base::FieldTrialList::FactoryGetFieldTrial( 199 "two", 100, "default", kNoExpirationYear, 1, 1, 200 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), 201 }; 202 203 for (size_t i = 0; i < arraysize(trials); ++i) { 204 for (int j = 0; j < 100; ++j) 205 trials[i]->AppendGroup(std::string(), 1); 206 } 207 208 // The trials are most likely to give different results since they have 209 // different names. 210 EXPECT_NE(trials[0]->group(), trials[1]->group()); 211 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name()); 212 } 213 214 TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) { 215 // Simply asserts that two trials using one-time randomization 216 // that have different names, normally generate different results. 217 // 218 // Note that depending on the one-time random initialization, they 219 // _might_ actually give the same result, but we know that given 220 // the particular client_id we use for unit tests they won't. 221 base::FieldTrialList field_trial_list( 222 new PermutedEntropyProvider(1234, kMaxLowEntropySize)); 223 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; 224 scoped_refptr<base::FieldTrial> trials[] = { 225 base::FieldTrialList::FactoryGetFieldTrial( 226 "one", 100, "default", kNoExpirationYear, 1, 1, 227 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), 228 base::FieldTrialList::FactoryGetFieldTrial( 229 "two", 100, "default", kNoExpirationYear, 1, 1, 230 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), 231 }; 232 233 for (size_t i = 0; i < arraysize(trials); ++i) { 234 for (int j = 0; j < 100; ++j) 235 trials[i]->AppendGroup(std::string(), 1); 236 } 237 238 // The trials are most likely to give different results since they have 239 // different names. 240 EXPECT_NE(trials[0]->group(), trials[1]->group()); 241 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name()); 242 } 243 244 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) { 245 // Ensures that two trials with different names but the same custom seed used 246 // for one time randomization produce the same group assignments. 247 base::FieldTrialList field_trial_list( 248 new PermutedEntropyProvider(1234, kMaxLowEntropySize)); 249 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; 250 const uint32 kCustomSeed = 9001; 251 scoped_refptr<base::FieldTrial> trials[] = { 252 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed( 253 "one", 100, "default", kNoExpirationYear, 1, 1, 254 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL), 255 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed( 256 "two", 100, "default", kNoExpirationYear, 1, 1, 257 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL), 258 }; 259 260 for (size_t i = 0; i < arraysize(trials); ++i) { 261 for (int j = 0; j < 100; ++j) 262 trials[i]->AppendGroup(std::string(), 1); 263 } 264 265 // Normally, these trials should produce different groups, but if the same 266 // custom seed is used, they should produce the same group assignment. 267 EXPECT_EQ(trials[0]->group(), trials[1]->group()); 268 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name()); 269 } 270 271 TEST(EntropyProviderTest, SHA1Entropy) { 272 const double results[] = { GenerateSHA1Entropy("hi", "1"), 273 GenerateSHA1Entropy("there", "1") }; 274 275 EXPECT_NE(results[0], results[1]); 276 for (size_t i = 0; i < arraysize(results); ++i) { 277 EXPECT_LE(0.0, results[i]); 278 EXPECT_GT(1.0, results[i]); 279 } 280 281 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"), 282 GenerateSHA1Entropy("yo", "1")); 283 EXPECT_NE(GenerateSHA1Entropy("yo", "something"), 284 GenerateSHA1Entropy("yo", "else")); 285 } 286 287 TEST(EntropyProviderTest, PermutedEntropy) { 288 const double results[] = { 289 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), 290 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") }; 291 292 EXPECT_NE(results[0], results[1]); 293 for (size_t i = 0; i < arraysize(results); ++i) { 294 EXPECT_LE(0.0, results[i]); 295 EXPECT_GT(1.0, results[i]); 296 } 297 298 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), 299 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1")); 300 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"), 301 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else")); 302 } 303 304 TEST(EntropyProviderTest, PermutedEntropyProviderResults) { 305 // Verifies that PermutedEntropyProvider produces expected results. This 306 // ensures that the results are the same between platforms and ensures that 307 // changes to the implementation do not regress this accidentally. 308 309 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize), 310 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ")); 311 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize), 312 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test")); 313 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize), 314 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo")); 315 } 316 317 TEST(EntropyProviderTest, SHA1EntropyIsUniform) { 318 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { 319 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]); 320 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator); 321 } 322 } 323 324 TEST(EntropyProviderTest, PermutedEntropyIsUniform) { 325 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { 326 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]); 327 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator); 328 } 329 } 330 331 TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) { 332 // Verifies that SeededRandGenerator has a uniform distribution. 333 // 334 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc. 335 336 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL; 337 const uint32 kExpectedAverage = kTopOfRange / 2ULL; 338 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2% 339 const int kMinAttempts = 1000; 340 const int kMaxAttempts = 1000000; 341 342 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { 343 const uint32 seed = HashName(kTestTrialNames[i]); 344 internal::SeededRandGenerator rand_generator(seed); 345 346 double cumulative_average = 0.0; 347 int count = 0; 348 while (count < kMaxAttempts) { 349 uint32 value = rand_generator(kTopOfRange); 350 cumulative_average = (count * cumulative_average + value) / (count + 1); 351 352 // Don't quit too quickly for things to start converging, or we may have 353 // a false positive. 354 if (count > kMinAttempts && 355 kExpectedAverage - kAllowedVariance < cumulative_average && 356 cumulative_average < kExpectedAverage + kAllowedVariance) { 357 break; 358 } 359 360 ++count; 361 } 362 363 ASSERT_LT(count, kMaxAttempts) << "Expected average was " << 364 kExpectedAverage << ", average ended at " << cumulative_average << 365 ", for trial " << kTestTrialNames[i]; 366 } 367 } 368 369 } // namespace metrics 370