Home | History | Annotate | Download | only in perf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/test/perf/generate_profile.h"
      6 
      7 #include "base/at_exit.h"
      8 #include "base/command_line.h"
      9 #include "base/file_util.h"
     10 #include "base/files/file_enumerator.h"
     11 #include "base/files/file_path.h"
     12 #include "base/i18n/icu_util.h"
     13 #include "base/logging.h"
     14 #include "base/message_loop/message_loop.h"
     15 #include "base/path_service.h"
     16 #include "base/strings/string_number_conversions.h"
     17 #include "base/strings/utf_string_conversions.h"
     18 #include "base/time/time.h"
     19 #include "chrome/browser/history/history_service.h"
     20 #include "chrome/browser/history/history_service_factory.h"
     21 #include "chrome/browser/history/top_sites.h"
     22 #include "chrome/common/chrome_paths.h"
     23 #include "chrome/common/thumbnail_score.h"
     24 #include "chrome/test/base/testing_browser_process.h"
     25 #include "chrome/test/base/testing_profile.h"
     26 #include "chrome/tools/profiles/thumbnail-inl.h"
     27 #include "content/public/browser/browser_thread.h"
     28 #include "content/public/browser/notification_service.h"
     29 #include "content/public/test/test_browser_thread.h"
     30 #include "third_party/skia/include/core/SkBitmap.h"
     31 #include "ui/base/resource/resource_bundle.h"
     32 #include "ui/base/ui_base_paths.h"
     33 #include "ui/gfx/codec/jpeg_codec.h"
     34 
     35 using base::Time;
     36 using content::BrowserThread;
     37 
     38 namespace {
     39 
     40 // RAII for initializing and shutting down the TestBrowserProcess
     41 class InitBrowserProcess {
     42  public:
     43   InitBrowserProcess() {
     44     DCHECK(!g_browser_process);
     45     g_browser_process = new TestingBrowserProcess;
     46   }
     47 
     48   ~InitBrowserProcess() {
     49     DCHECK(g_browser_process);
     50     delete g_browser_process;
     51     g_browser_process = NULL;
     52   }
     53 };
     54 
     55 // Probabilities of different word lengths, as measured from Darin's profile.
     56 //   kWordLengthProbabilities[n-1] = P(word of length n)
     57 const float kWordLengthProbabilities[] = { 0.069f, 0.132f, 0.199f,
     58   0.137f, 0.088f, 0.115f, 0.081f, 0.055f, 0.034f, 0.021f, 0.019f, 0.018f,
     59   0.007f, 0.007f, 0.005f, 0.004f, 0.003f, 0.003f, 0.003f };
     60 
     61 // Return a float uniformly in [0,1].
     62 // Useful for making probabilistic decisions.
     63 inline float RandomFloat() {
     64   return rand() / static_cast<float>(RAND_MAX);
     65 }
     66 
     67 // Return an integer uniformly in [min,max).
     68 inline int RandomInt(int min, int max) {
     69   return min + (rand() % (max-min));
     70 }
     71 
     72 // Return a string of |count| lowercase random characters.
     73 string16 RandomChars(int count) {
     74   string16 str;
     75   for (int i = 0; i < count; ++i)
     76     str += L'a' + rand() % 26;
     77   return str;
     78 }
     79 
     80 string16 RandomWord() {
     81   // TODO(evanm): should we instead use the markov chain based
     82   // version of this that I already wrote?
     83 
     84   // Sample a word length from kWordLengthProbabilities.
     85   float sample = RandomFloat();
     86   size_t i;
     87   for (i = 0; i < arraysize(kWordLengthProbabilities); ++i) {
     88     sample -= kWordLengthProbabilities[i];
     89     if (sample < 0) break;
     90   }
     91   const int word_length = i + 1;
     92   return RandomChars(word_length);
     93 }
     94 
     95 // Return a string of |count| random words.
     96 string16 RandomWords(int count) {
     97   string16 str;
     98   for (int i = 0; i < count; ++i) {
     99     if (!str.empty())
    100       str += L' ';
    101     str += RandomWord();
    102   }
    103   return str;
    104 }
    105 
    106 // Return a random URL-looking string.
    107 GURL ConstructRandomURL() {
    108   return GURL(ASCIIToUTF16("http://") + RandomChars(3) + ASCIIToUTF16(".com/") +
    109       RandomChars(RandomInt(5, 20)));
    110 }
    111 
    112 // Return a random page title-looking string.
    113 string16 ConstructRandomTitle() {
    114   return RandomWords(RandomInt(3, 15));
    115 }
    116 
    117 // Return a random string that could function as page contents.
    118 string16 ConstructRandomPage() {
    119   return RandomWords(RandomInt(10, 4000));
    120 }
    121 
    122 // Insert a batch of |batch_size| URLs, starting at pageid |page_id|.
    123 void InsertURLBatch(Profile* profile,
    124                     int page_id,
    125                     int batch_size,
    126                     int types) {
    127   HistoryService* history_service =
    128       HistoryServiceFactory::GetForProfile(profile, Profile::EXPLICIT_ACCESS);
    129 
    130   // Probability of following a link on the current "page"
    131   // (vs randomly jumping to a new page).
    132   const float kFollowLinkProbability = 0.85f;
    133   // Probability of visiting a page we've visited before.
    134   const float kRevisitLinkProbability = 0.1f;
    135   // Probability of a URL being "good enough" to revisit.
    136   const float kRevisitableURLProbability = 0.05f;
    137   // Probability of a URL being the end of a redirect chain.
    138   const float kRedirectProbability = 0.05f;
    139 
    140   // A list of URLs that we sometimes revisit.
    141   std::vector<GURL> revisit_urls;
    142 
    143   // Scoping value for page IDs (required by the history service).
    144   void* id_scope = reinterpret_cast<void*>(1);
    145 
    146   scoped_refptr<base::RefCountedMemory> google_bitmap(
    147       new base::RefCountedStaticMemory(kGoogleThumbnail,
    148                                        sizeof(kGoogleThumbnail)));
    149   scoped_refptr<base::RefCountedMemory> weewar_bitmap(
    150       new base::RefCountedStaticMemory(kWeewarThumbnail,
    151                                        sizeof(kWeewarThumbnail)));
    152 
    153   printf("Inserting %d URLs...\n", batch_size);
    154   GURL previous_url;
    155   content::PageTransition transition = content::PAGE_TRANSITION_TYPED;
    156   const int end_page_id = page_id + batch_size;
    157   history::TopSites* top_sites = profile->GetTopSites();
    158   for (; page_id < end_page_id; ++page_id) {
    159     // Randomly decide whether this new URL simulates following a link or
    160     // whether it's a jump to a new URL.
    161     if (!previous_url.is_empty() && RandomFloat() < kFollowLinkProbability) {
    162       transition = content::PAGE_TRANSITION_LINK;
    163     } else {
    164       previous_url = GURL();
    165       transition = content::PAGE_TRANSITION_TYPED;
    166     }
    167 
    168     // Pick a URL, either newly at random or from our list of previously
    169     // visited URLs.
    170     GURL url;
    171     if (!revisit_urls.empty() && RandomFloat() < kRevisitLinkProbability) {
    172       // Draw a URL from revisit_urls at random.
    173       url = revisit_urls[RandomInt(0, static_cast<int>(revisit_urls.size()))];
    174     } else {
    175       url = ConstructRandomURL();
    176     }
    177 
    178     // Randomly construct a redirect chain.
    179     history::RedirectList redirects;
    180     if (RandomFloat() < kRedirectProbability) {
    181       const int redir_count = RandomInt(1, 4);
    182       for (int i = 0; i < redir_count; ++i)
    183         redirects.push_back(ConstructRandomURL());
    184       redirects.push_back(url);
    185     }
    186 
    187     // Add all of this information to the history service.
    188     history_service->AddPage(url, base::Time::Now(),
    189                              id_scope, page_id,
    190                              previous_url, redirects,
    191                              transition, history::SOURCE_BROWSED, true);
    192     ThumbnailScore score(0.75, false, false);
    193     history_service->SetPageTitle(url, ConstructRandomTitle());
    194     if (types & TOP_SITES && top_sites) {
    195       top_sites->SetPageThumbnailToJPEGBytes(
    196           url,
    197           (RandomInt(0, 2) == 0) ? google_bitmap.get() : weewar_bitmap.get(),
    198           score);
    199     }
    200 
    201     previous_url = url;
    202 
    203     if (revisit_urls.empty() || RandomFloat() < kRevisitableURLProbability)
    204       revisit_urls.push_back(url);
    205   }
    206 }
    207 
    208 }  // namespace
    209 
    210 bool GenerateProfile(GenerateProfileTypes types,
    211                      int url_count,
    212                      const base::FilePath& dst_dir) {
    213   if (!file_util::CreateDirectory(dst_dir)) {
    214     PLOG(ERROR) << "Unable to create directory " << dst_dir.value().c_str();
    215     return false;
    216   }
    217 
    218   // We want this profile to be as deterministic as possible, so seed the
    219   // random number generator with the number of urls we're generating.
    220   srand(static_cast<unsigned int>(url_count));
    221 
    222   printf("Creating profiles for testing...\n");
    223 
    224   InitBrowserProcess initialize_browser_process;
    225   base::MessageLoopForUI message_loop;
    226   content::TestBrowserThread ui_thread(BrowserThread::UI, &message_loop);
    227   content::TestBrowserThread db_thread(BrowserThread::DB, &message_loop);
    228   TestingProfile profile;
    229   if (!profile.CreateHistoryService(false, false)) {
    230       PLOG(ERROR) << "Creating history service failed";
    231       return false;
    232   }
    233   if (types & TOP_SITES) {
    234     profile.CreateTopSites();
    235     profile.BlockUntilTopSitesLoaded();
    236   }
    237 
    238   // The maximum number of URLs to insert into history in one batch.
    239   const int kBatchSize = 2000;
    240   int page_id = 0;
    241   while (page_id < url_count) {
    242     const int batch_size = std::min(kBatchSize, url_count - page_id);
    243     InsertURLBatch(&profile, page_id, batch_size, types);
    244     // Run all pending messages to give TopSites a chance to catch up.
    245     message_loop.RunUntilIdle();
    246     page_id += batch_size;
    247   }
    248 
    249   profile.DestroyTopSites();
    250   profile.DestroyHistoryService();
    251 
    252   message_loop.RunUntilIdle();
    253 
    254   base::FileEnumerator file_iterator(profile.GetPath(), false,
    255                                      base::FileEnumerator::FILES);
    256   base::FilePath path = file_iterator.Next();
    257   while (!path.empty()) {
    258     base::FilePath dst_file = dst_dir.Append(path.BaseName());
    259     base::DeleteFile(dst_file, false);
    260     if (!base::CopyFile(path, dst_file)) {
    261       PLOG(ERROR) << "Copying file failed";
    262       return false;
    263     }
    264     path = file_iterator.Next();
    265   }
    266 
    267   printf("Finished creating profiles for testing.\n");
    268 
    269   // Restore the random seed.
    270   srand(static_cast<unsigned int>(Time::Now().ToInternalValue()));
    271 
    272   return true;
    273 }
    274