1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/test/perf/generate_profile.h" 6 7 #include "base/at_exit.h" 8 #include "base/command_line.h" 9 #include "base/file_util.h" 10 #include "base/files/file_enumerator.h" 11 #include "base/files/file_path.h" 12 #include "base/i18n/icu_util.h" 13 #include "base/logging.h" 14 #include "base/message_loop/message_loop.h" 15 #include "base/path_service.h" 16 #include "base/strings/string_number_conversions.h" 17 #include "base/strings/utf_string_conversions.h" 18 #include "base/time/time.h" 19 #include "chrome/browser/history/history_service.h" 20 #include "chrome/browser/history/history_service_factory.h" 21 #include "chrome/browser/history/top_sites.h" 22 #include "chrome/common/chrome_paths.h" 23 #include "chrome/common/thumbnail_score.h" 24 #include "chrome/test/base/testing_browser_process.h" 25 #include "chrome/test/base/testing_profile.h" 26 #include "chrome/tools/profiles/thumbnail-inl.h" 27 #include "content/public/browser/browser_thread.h" 28 #include "content/public/browser/notification_service.h" 29 #include "content/public/test/test_browser_thread.h" 30 #include "third_party/skia/include/core/SkBitmap.h" 31 #include "ui/base/resource/resource_bundle.h" 32 #include "ui/base/ui_base_paths.h" 33 #include "ui/gfx/codec/jpeg_codec.h" 34 35 using base::Time; 36 using content::BrowserThread; 37 38 namespace { 39 40 // RAII for initializing and shutting down the TestBrowserProcess 41 class InitBrowserProcess { 42 public: 43 InitBrowserProcess() { 44 DCHECK(!g_browser_process); 45 g_browser_process = new TestingBrowserProcess; 46 } 47 48 ~InitBrowserProcess() { 49 DCHECK(g_browser_process); 50 delete g_browser_process; 51 g_browser_process = NULL; 52 } 53 }; 54 55 // Probabilities of different word lengths, as measured from Darin's profile. 56 // kWordLengthProbabilities[n-1] = P(word of length n) 57 const float kWordLengthProbabilities[] = { 0.069f, 0.132f, 0.199f, 58 0.137f, 0.088f, 0.115f, 0.081f, 0.055f, 0.034f, 0.021f, 0.019f, 0.018f, 59 0.007f, 0.007f, 0.005f, 0.004f, 0.003f, 0.003f, 0.003f }; 60 61 // Return a float uniformly in [0,1]. 62 // Useful for making probabilistic decisions. 63 inline float RandomFloat() { 64 return rand() / static_cast<float>(RAND_MAX); 65 } 66 67 // Return an integer uniformly in [min,max). 68 inline int RandomInt(int min, int max) { 69 return min + (rand() % (max-min)); 70 } 71 72 // Return a string of |count| lowercase random characters. 73 string16 RandomChars(int count) { 74 string16 str; 75 for (int i = 0; i < count; ++i) 76 str += L'a' + rand() % 26; 77 return str; 78 } 79 80 string16 RandomWord() { 81 // TODO(evanm): should we instead use the markov chain based 82 // version of this that I already wrote? 83 84 // Sample a word length from kWordLengthProbabilities. 85 float sample = RandomFloat(); 86 size_t i; 87 for (i = 0; i < arraysize(kWordLengthProbabilities); ++i) { 88 sample -= kWordLengthProbabilities[i]; 89 if (sample < 0) break; 90 } 91 const int word_length = i + 1; 92 return RandomChars(word_length); 93 } 94 95 // Return a string of |count| random words. 96 string16 RandomWords(int count) { 97 string16 str; 98 for (int i = 0; i < count; ++i) { 99 if (!str.empty()) 100 str += L' '; 101 str += RandomWord(); 102 } 103 return str; 104 } 105 106 // Return a random URL-looking string. 107 GURL ConstructRandomURL() { 108 return GURL(ASCIIToUTF16("http://") + RandomChars(3) + ASCIIToUTF16(".com/") + 109 RandomChars(RandomInt(5, 20))); 110 } 111 112 // Return a random page title-looking string. 113 string16 ConstructRandomTitle() { 114 return RandomWords(RandomInt(3, 15)); 115 } 116 117 // Return a random string that could function as page contents. 118 string16 ConstructRandomPage() { 119 return RandomWords(RandomInt(10, 4000)); 120 } 121 122 // Insert a batch of |batch_size| URLs, starting at pageid |page_id|. 123 void InsertURLBatch(Profile* profile, 124 int page_id, 125 int batch_size, 126 int types) { 127 HistoryService* history_service = 128 HistoryServiceFactory::GetForProfile(profile, Profile::EXPLICIT_ACCESS); 129 130 // Probability of following a link on the current "page" 131 // (vs randomly jumping to a new page). 132 const float kFollowLinkProbability = 0.85f; 133 // Probability of visiting a page we've visited before. 134 const float kRevisitLinkProbability = 0.1f; 135 // Probability of a URL being "good enough" to revisit. 136 const float kRevisitableURLProbability = 0.05f; 137 // Probability of a URL being the end of a redirect chain. 138 const float kRedirectProbability = 0.05f; 139 140 // A list of URLs that we sometimes revisit. 141 std::vector<GURL> revisit_urls; 142 143 // Scoping value for page IDs (required by the history service). 144 void* id_scope = reinterpret_cast<void*>(1); 145 146 scoped_refptr<base::RefCountedMemory> google_bitmap( 147 new base::RefCountedStaticMemory(kGoogleThumbnail, 148 sizeof(kGoogleThumbnail))); 149 scoped_refptr<base::RefCountedMemory> weewar_bitmap( 150 new base::RefCountedStaticMemory(kWeewarThumbnail, 151 sizeof(kWeewarThumbnail))); 152 153 printf("Inserting %d URLs...\n", batch_size); 154 GURL previous_url; 155 content::PageTransition transition = content::PAGE_TRANSITION_TYPED; 156 const int end_page_id = page_id + batch_size; 157 history::TopSites* top_sites = profile->GetTopSites(); 158 for (; page_id < end_page_id; ++page_id) { 159 // Randomly decide whether this new URL simulates following a link or 160 // whether it's a jump to a new URL. 161 if (!previous_url.is_empty() && RandomFloat() < kFollowLinkProbability) { 162 transition = content::PAGE_TRANSITION_LINK; 163 } else { 164 previous_url = GURL(); 165 transition = content::PAGE_TRANSITION_TYPED; 166 } 167 168 // Pick a URL, either newly at random or from our list of previously 169 // visited URLs. 170 GURL url; 171 if (!revisit_urls.empty() && RandomFloat() < kRevisitLinkProbability) { 172 // Draw a URL from revisit_urls at random. 173 url = revisit_urls[RandomInt(0, static_cast<int>(revisit_urls.size()))]; 174 } else { 175 url = ConstructRandomURL(); 176 } 177 178 // Randomly construct a redirect chain. 179 history::RedirectList redirects; 180 if (RandomFloat() < kRedirectProbability) { 181 const int redir_count = RandomInt(1, 4); 182 for (int i = 0; i < redir_count; ++i) 183 redirects.push_back(ConstructRandomURL()); 184 redirects.push_back(url); 185 } 186 187 // Add all of this information to the history service. 188 history_service->AddPage(url, base::Time::Now(), 189 id_scope, page_id, 190 previous_url, redirects, 191 transition, history::SOURCE_BROWSED, true); 192 ThumbnailScore score(0.75, false, false); 193 history_service->SetPageTitle(url, ConstructRandomTitle()); 194 if (types & TOP_SITES && top_sites) { 195 top_sites->SetPageThumbnailToJPEGBytes( 196 url, 197 (RandomInt(0, 2) == 0) ? google_bitmap.get() : weewar_bitmap.get(), 198 score); 199 } 200 201 previous_url = url; 202 203 if (revisit_urls.empty() || RandomFloat() < kRevisitableURLProbability) 204 revisit_urls.push_back(url); 205 } 206 } 207 208 } // namespace 209 210 bool GenerateProfile(GenerateProfileTypes types, 211 int url_count, 212 const base::FilePath& dst_dir) { 213 if (!file_util::CreateDirectory(dst_dir)) { 214 PLOG(ERROR) << "Unable to create directory " << dst_dir.value().c_str(); 215 return false; 216 } 217 218 // We want this profile to be as deterministic as possible, so seed the 219 // random number generator with the number of urls we're generating. 220 srand(static_cast<unsigned int>(url_count)); 221 222 printf("Creating profiles for testing...\n"); 223 224 InitBrowserProcess initialize_browser_process; 225 base::MessageLoopForUI message_loop; 226 content::TestBrowserThread ui_thread(BrowserThread::UI, &message_loop); 227 content::TestBrowserThread db_thread(BrowserThread::DB, &message_loop); 228 TestingProfile profile; 229 if (!profile.CreateHistoryService(false, false)) { 230 PLOG(ERROR) << "Creating history service failed"; 231 return false; 232 } 233 if (types & TOP_SITES) { 234 profile.CreateTopSites(); 235 profile.BlockUntilTopSitesLoaded(); 236 } 237 238 // The maximum number of URLs to insert into history in one batch. 239 const int kBatchSize = 2000; 240 int page_id = 0; 241 while (page_id < url_count) { 242 const int batch_size = std::min(kBatchSize, url_count - page_id); 243 InsertURLBatch(&profile, page_id, batch_size, types); 244 // Run all pending messages to give TopSites a chance to catch up. 245 message_loop.RunUntilIdle(); 246 page_id += batch_size; 247 } 248 249 profile.DestroyTopSites(); 250 profile.DestroyHistoryService(); 251 252 message_loop.RunUntilIdle(); 253 254 base::FileEnumerator file_iterator(profile.GetPath(), false, 255 base::FileEnumerator::FILES); 256 base::FilePath path = file_iterator.Next(); 257 while (!path.empty()) { 258 base::FilePath dst_file = dst_dir.Append(path.BaseName()); 259 base::DeleteFile(dst_file, false); 260 if (!base::CopyFile(path, dst_file)) { 261 PLOG(ERROR) << "Copying file failed"; 262 return false; 263 } 264 path = file_iterator.Next(); 265 } 266 267 printf("Finished creating profiles for testing.\n"); 268 269 // Restore the random seed. 270 srand(static_cast<unsigned int>(Time::Now().ToInternalValue())); 271 272 return true; 273 } 274