1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/memory/weak_ptr.h" 6 #include "base/path_service.h" 7 #include "base/run_loop.h" 8 #include "base/values.h" 9 #include "components/dom_distiller/content/distiller_page_web_contents.h" 10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h" 11 #include "components/dom_distiller/core/distiller_page.h" 12 #include "components/dom_distiller/core/proto/distilled_article.pb.h" 13 #include "components/dom_distiller/core/proto/distilled_page.pb.h" 14 #include "components/dom_distiller/core/viewer.h" 15 #include "content/public/browser/browser_context.h" 16 #include "content/public/browser/navigation_controller.h" 17 #include "content/public/browser/render_frame_host.h" 18 #include "content/public/browser/web_contents_observer.h" 19 #include "content/public/test/content_browser_test.h" 20 #include "content/shell/browser/shell.h" 21 #include "grit/components_strings.h" 22 #include "net/test/embedded_test_server/embedded_test_server.h" 23 #include "testing/gmock/include/gmock/gmock.h" 24 #include "third_party/dom_distiller_js/dom_distiller.pb.h" 25 #include "ui/base/l10n/l10n_util.h" 26 #include "ui/base/resource/resource_bundle.h" 27 28 using content::ContentBrowserTest; 29 using testing::ContainsRegex; 30 using testing::HasSubstr; 31 using testing::Not; 32 33 namespace dom_distiller { 34 35 const char* kSimpleArticlePath = "/simple_article.html"; 36 const char* kVideoArticlePath = "/video_article.html"; 37 38 class DistillerPageWebContentsTest : public ContentBrowserTest { 39 public: 40 // ContentBrowserTest: 41 virtual void SetUpOnMainThread() OVERRIDE { 42 AddComponentsResources(); 43 SetUpTestServer(); 44 ContentBrowserTest::SetUpOnMainThread(); 45 } 46 47 void DistillPage(const base::Closure& quit_closure, const std::string& url) { 48 quit_closure_ = quit_closure; 49 distiller_page_->DistillPage( 50 embedded_test_server()->GetURL(url), 51 dom_distiller::proto::DomDistillerOptions(), 52 base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished, 53 this)); 54 } 55 56 void OnPageDistillationFinished( 57 scoped_ptr<proto::DomDistillerResult> distiller_result, 58 bool distillation_successful) { 59 distiller_result_ = distiller_result.Pass(); 60 quit_closure_.Run(); 61 } 62 63 private: 64 void AddComponentsResources() { 65 base::FilePath pak_file; 66 base::FilePath pak_dir; 67 PathService::Get(base::DIR_MODULE, &pak_dir); 68 pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak")); 69 ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath( 70 pak_file, ui::SCALE_FACTOR_NONE); 71 } 72 73 void SetUpTestServer() { 74 base::FilePath path; 75 PathService::Get(base::DIR_SOURCE_ROOT, &path); 76 path = path.AppendASCII("components/test/data/dom_distiller"); 77 embedded_test_server()->ServeFilesFromDirectory(path); 78 ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady()); 79 } 80 81 protected: 82 void RunUseCurrentWebContentsTest(const std::string& url, 83 bool expect_new_web_contents, 84 bool setup_main_frame_observer, 85 bool wait_for_document_loaded); 86 87 DistillerPageWebContents* distiller_page_; 88 base::Closure quit_closure_; 89 scoped_ptr<proto::DomDistillerResult> distiller_result_; 90 }; 91 92 // Use this class to be able to leak the WebContents, which is needed for when 93 // the current WebContents is used for distillation. 94 class TestDistillerPageWebContents : public DistillerPageWebContents { 95 public: 96 TestDistillerPageWebContents( 97 content::BrowserContext* browser_context, 98 const gfx::Size& render_view_size, 99 scoped_ptr<SourcePageHandleWebContents> optional_web_contents_handle, 100 bool expect_new_web_contents) 101 : DistillerPageWebContents(browser_context, render_view_size, 102 optional_web_contents_handle.Pass()), 103 expect_new_web_contents_(expect_new_web_contents), 104 new_web_contents_created_(false) {} 105 106 virtual void CreateNewWebContents(const GURL& url) OVERRIDE { 107 ASSERT_EQ(true, expect_new_web_contents_); 108 new_web_contents_created_ = true; 109 // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to 110 // the WebContents, so intentionally leak WebContents here, since it is 111 // owned by the shell. 112 content::WebContents* web_contents = web_contents_.release(); 113 web_contents->GetLastCommittedURL(); 114 DistillerPageWebContents::CreateNewWebContents(url); 115 } 116 117 virtual ~TestDistillerPageWebContents() { 118 if (!expect_new_web_contents_) { 119 // Intentionally leaking WebContents, since it is owned by the shell. 120 content::WebContents* web_contents = web_contents_.release(); 121 web_contents->GetLastCommittedURL(); 122 } 123 } 124 125 bool new_web_contents_created() { return new_web_contents_created_; } 126 127 private: 128 bool expect_new_web_contents_; 129 bool new_web_contents_created_; 130 }; 131 132 // Helper class to know how far in the loading process the current WebContents 133 // has come. It will call the callback either after 134 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the 135 // main frame, based on the value of |wait_for_document_loaded|. 136 class WebContentsMainFrameHelper : public content::WebContentsObserver { 137 public: 138 WebContentsMainFrameHelper(content::WebContents* web_contents, 139 const base::Closure& callback, 140 bool wait_for_document_loaded) 141 : WebContentsObserver(web_contents), 142 callback_(callback), 143 wait_for_document_loaded_(wait_for_document_loaded) {} 144 145 virtual void DidCommitProvisionalLoadForFrame( 146 content::RenderFrameHost* render_frame_host, 147 const GURL& url, 148 ui::PageTransition transition_type) OVERRIDE { 149 if (wait_for_document_loaded_) 150 return; 151 if (!render_frame_host->GetParent()) 152 callback_.Run(); 153 } 154 155 virtual void DocumentLoadedInFrame( 156 content::RenderFrameHost* render_frame_host) OVERRIDE { 157 if (wait_for_document_loaded_) { 158 if (!render_frame_host->GetParent()) 159 callback_.Run(); 160 } 161 } 162 163 private: 164 base::Closure callback_; 165 bool wait_for_document_loaded_; 166 }; 167 168 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) { 169 DistillerPageWebContents distiller_page( 170 shell()->web_contents()->GetBrowserContext(), 171 shell()->web_contents()->GetContainerBounds().size(), 172 scoped_ptr<SourcePageHandleWebContents>()); 173 distiller_page_ = &distiller_page; 174 175 base::RunLoop run_loop; 176 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath); 177 run_loop.Run(); 178 179 EXPECT_EQ("Test Page Title", distiller_result_->title()); 180 EXPECT_THAT(distiller_result_->distilled_content().html(), 181 HasSubstr("Lorem ipsum")); 182 EXPECT_THAT(distiller_result_->distilled_content().html(), 183 Not(HasSubstr("questionable content"))); 184 EXPECT_EQ("", distiller_result_->pagination_info().next_page()); 185 EXPECT_EQ("", distiller_result_->pagination_info().prev_page()); 186 } 187 188 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) { 189 DistillerPageWebContents distiller_page( 190 shell()->web_contents()->GetBrowserContext(), 191 shell()->web_contents()->GetContainerBounds().size(), 192 scoped_ptr<SourcePageHandleWebContents>()); 193 distiller_page_ = &distiller_page; 194 195 base::RunLoop run_loop; 196 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath); 197 run_loop.Run(); 198 199 // A relative link should've been updated. 200 EXPECT_THAT(distiller_result_->distilled_content().html(), 201 ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\"")); 202 EXPECT_THAT(distiller_result_->distilled_content().html(), 203 HasSubstr("href=\"http://www.google.com/absolutelink.html\"")); 204 } 205 206 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) { 207 DistillerPageWebContents distiller_page( 208 shell()->web_contents()->GetBrowserContext(), 209 shell()->web_contents()->GetContainerBounds().size(), 210 scoped_ptr<SourcePageHandleWebContents>()); 211 distiller_page_ = &distiller_page; 212 213 base::RunLoop run_loop; 214 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath); 215 run_loop.Run(); 216 217 // A relative link should've been updated. 218 EXPECT_THAT(distiller_result_->distilled_content().html(), 219 ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\"")); 220 EXPECT_THAT(distiller_result_->distilled_content().html(), 221 HasSubstr("src=\"http://www.google.com/absoluteimage.png\"")); 222 } 223 224 225 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeVideos) { 226 DistillerPageWebContents distiller_page( 227 shell()->web_contents()->GetBrowserContext(), 228 shell()->web_contents()->GetContainerBounds().size(), 229 scoped_ptr<SourcePageHandleWebContents>()); 230 distiller_page_ = &distiller_page; 231 232 base::RunLoop run_loop; 233 DistillPage(run_loop.QuitClosure(), kVideoArticlePath); 234 run_loop.Run(); 235 236 // A relative source/track should've been updated. 237 EXPECT_THAT(distiller_result_->distilled_content().html(), 238 ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\"")); 239 EXPECT_THAT( 240 distiller_result_->distilled_content().html(), 241 ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\"")); 242 EXPECT_THAT(distiller_result_->distilled_content().html(), 243 HasSubstr("src=\"http://www.google.com/absolute_video.ogg\"")); 244 EXPECT_THAT(distiller_result_->distilled_content().html(), 245 HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\"")); 246 } 247 248 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) { 249 DistillerPageWebContents distiller_page( 250 shell()->web_contents()->GetBrowserContext(), 251 shell()->web_contents()->GetContainerBounds().size(), 252 scoped_ptr<SourcePageHandleWebContents>()); 253 distiller_page_ = &distiller_page; 254 255 // visble_style.html and invisible_style.html only differ by the visibility 256 // internal stylesheet. 257 258 { 259 base::RunLoop run_loop; 260 DistillPage(run_loop.QuitClosure(), "/visible_style.html"); 261 run_loop.Run(); 262 EXPECT_THAT(distiller_result_->distilled_content().html(), 263 HasSubstr("Lorem ipsum")); 264 } 265 266 { 267 base::RunLoop run_loop; 268 DistillPage(run_loop.QuitClosure(), "/invisible_style.html"); 269 run_loop.Run(); 270 EXPECT_THAT(distiller_result_->distilled_content().html(), 271 Not(HasSubstr("Lorem ipsum"))); 272 } 273 } 274 275 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, 276 UsingCurrentWebContentsWrongUrl) { 277 std::string url("/bogus"); 278 bool expect_new_web_contents = true; 279 bool setup_main_frame_observer = true; 280 bool wait_for_document_loaded = true; 281 RunUseCurrentWebContentsTest(url, 282 expect_new_web_contents, 283 setup_main_frame_observer, 284 wait_for_document_loaded); 285 } 286 287 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, 288 UsingCurrentWebContentsNoMainFrameObserver) { 289 std::string url(kSimpleArticlePath); 290 bool expect_new_web_contents = true; 291 bool setup_main_frame_observer = false; 292 bool wait_for_document_loaded = true; 293 RunUseCurrentWebContentsTest(url, 294 expect_new_web_contents, 295 setup_main_frame_observer, 296 wait_for_document_loaded); 297 } 298 299 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, 300 UsingCurrentWebContentsNotFinishedLoadingYet) { 301 std::string url(kSimpleArticlePath); 302 bool expect_new_web_contents = false; 303 bool setup_main_frame_observer = true; 304 bool wait_for_document_loaded = false; 305 RunUseCurrentWebContentsTest(url, 306 expect_new_web_contents, 307 setup_main_frame_observer, 308 wait_for_document_loaded); 309 } 310 311 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, 312 UsingCurrentWebContentsReadyForDistillation) { 313 std::string url(kSimpleArticlePath); 314 bool expect_new_web_contents = false; 315 bool setup_main_frame_observer = true; 316 bool wait_for_document_loaded = true; 317 RunUseCurrentWebContentsTest(url, 318 expect_new_web_contents, 319 setup_main_frame_observer, 320 wait_for_document_loaded); 321 } 322 323 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest( 324 const std::string& url, 325 bool expect_new_web_contents, 326 bool setup_main_frame_observer, 327 bool wait_for_document_loaded) { 328 content::WebContents* current_web_contents = shell()->web_contents(); 329 if (setup_main_frame_observer) { 330 dom_distiller::WebContentsMainFrameObserver::CreateForWebContents( 331 current_web_contents); 332 } 333 base::RunLoop url_loaded_runner; 334 WebContentsMainFrameHelper main_frame_loaded(current_web_contents, 335 url_loaded_runner.QuitClosure(), 336 wait_for_document_loaded); 337 current_web_contents->GetController().LoadURL( 338 embedded_test_server()->GetURL(url), 339 content::Referrer(), 340 ui::PAGE_TRANSITION_TYPED, 341 std::string()); 342 url_loaded_runner.Run(); 343 344 scoped_ptr<content::WebContents> old_web_contents_sptr(current_web_contents); 345 scoped_ptr<SourcePageHandleWebContents> source_page_handle( 346 new SourcePageHandleWebContents(old_web_contents_sptr.Pass())); 347 348 TestDistillerPageWebContents distiller_page( 349 shell()->web_contents()->GetBrowserContext(), 350 shell()->web_contents()->GetContainerBounds().size(), 351 source_page_handle.Pass(), 352 expect_new_web_contents); 353 distiller_page_ = &distiller_page; 354 355 base::RunLoop run_loop; 356 DistillPage(run_loop.QuitClosure(), kSimpleArticlePath); 357 run_loop.Run(); 358 359 // Sanity check of distillation process. 360 EXPECT_EQ(expect_new_web_contents, distiller_page.new_web_contents_created()); 361 EXPECT_EQ("Test Page Title", distiller_result_->title()); 362 } 363 364 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) { 365 DistillerPageWebContents distiller_page( 366 shell()->web_contents()->GetBrowserContext(), 367 shell()->web_contents()->GetContainerBounds().size(), 368 scoped_ptr<SourcePageHandleWebContents>()); 369 distiller_page_ = &distiller_page; 370 371 base::RunLoop run_loop; 372 DistillPage(run_loop.QuitClosure(), "/markup_article.html"); 373 run_loop.Run(); 374 375 EXPECT_THAT(distiller_result_->distilled_content().html(), 376 HasSubstr("Lorem ipsum")); 377 EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_->title()); 378 379 const proto::MarkupInfo markup_info = distiller_result_->markup_info(); 380 EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title()); 381 EXPECT_EQ("Article", markup_info.type()); 382 EXPECT_EQ("http://test/markup.html", markup_info.url()); 383 EXPECT_EQ("This page tests Markup Info.", markup_info.description()); 384 EXPECT_EQ("Whoever Published", markup_info.publisher()); 385 EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright()); 386 EXPECT_EQ("Whoever Authored", markup_info.author()); 387 388 const proto::MarkupArticle markup_article = markup_info.article(); 389 EXPECT_EQ("Whatever Section", markup_article.section()); 390 EXPECT_EQ("July 23, 2014", markup_article.published_time()); 391 EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time()); 392 EXPECT_EQ("", markup_article.expiration_time()); 393 ASSERT_EQ(1, markup_article.authors_size()); 394 EXPECT_EQ("Whoever Authored", markup_article.authors(0)); 395 396 ASSERT_EQ(2, markup_info.images_size()); 397 398 const proto::MarkupImage markup_image1 = markup_info.images(0); 399 EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url()); 400 EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url()); 401 EXPECT_EQ("jpeg", markup_image1.type()); 402 EXPECT_EQ("", markup_image1.caption()); 403 EXPECT_EQ(600, markup_image1.width()); 404 EXPECT_EQ(400, markup_image1.height()); 405 406 const proto::MarkupImage markup_image2 = markup_info.images(1); 407 EXPECT_EQ("http://test/markup2.gif", markup_image2.url()); 408 EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url()); 409 EXPECT_EQ("gif", markup_image2.type()); 410 EXPECT_EQ("", markup_image2.caption()); 411 EXPECT_EQ(1000, markup_image2.width()); 412 EXPECT_EQ(600, markup_image2.height()); 413 } 414 415 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, 416 TestTitleAndContentAreNeverEmpty) { 417 const std::string some_title = "some title"; 418 const std::string some_content = "some content"; 419 const std::string no_title = 420 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE); 421 const std::string no_content = 422 l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_CONTENT); 423 424 { // Test non-empty title and content for article. 425 scoped_ptr<DistilledArticleProto> article_proto( 426 new DistilledArticleProto()); 427 article_proto->set_title(some_title); 428 (*(article_proto->add_pages())).set_html(some_content); 429 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(), 430 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 431 EXPECT_THAT(html, HasSubstr(some_title)); 432 EXPECT_THAT(html, HasSubstr(some_content)); 433 EXPECT_THAT(html, Not(HasSubstr(no_title))); 434 EXPECT_THAT(html, Not(HasSubstr(no_content))); 435 } 436 437 { // Test empty title and content for article. 438 scoped_ptr<DistilledArticleProto> article_proto( 439 new DistilledArticleProto()); 440 article_proto->set_title(""); 441 (*(article_proto->add_pages())).set_html(""); 442 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(), 443 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 444 EXPECT_THAT(html, HasSubstr(no_title)); 445 EXPECT_THAT(html, HasSubstr(no_content)); 446 EXPECT_THAT(html, Not(HasSubstr(some_title))); 447 EXPECT_THAT(html, Not(HasSubstr(some_content))); 448 } 449 450 { // Test missing title and non-empty content for article. 451 scoped_ptr<DistilledArticleProto> article_proto( 452 new DistilledArticleProto()); 453 (*(article_proto->add_pages())).set_html(some_content); 454 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(), 455 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 456 EXPECT_THAT(html, HasSubstr(no_title)); 457 EXPECT_THAT(html, HasSubstr(no_content)); 458 EXPECT_THAT(html, Not(HasSubstr(some_title))); 459 EXPECT_THAT(html, Not(HasSubstr(some_content))); 460 } 461 462 { // Test non-empty title and missing content for article. 463 scoped_ptr<DistilledArticleProto> article_proto( 464 new DistilledArticleProto()); 465 article_proto->set_title(some_title); 466 std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(), 467 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 468 EXPECT_THAT(html, HasSubstr(no_title)); 469 EXPECT_THAT(html, HasSubstr(no_content)); 470 EXPECT_THAT(html, Not(HasSubstr(some_title))); 471 EXPECT_THAT(html, Not(HasSubstr(some_content))); 472 } 473 474 { // Test non-empty title and content for page. 475 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto()); 476 page_proto->set_title(some_title); 477 page_proto->set_html(some_content); 478 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(), 479 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 480 EXPECT_THAT(html, HasSubstr(some_title)); 481 EXPECT_THAT(html, HasSubstr(some_content)); 482 EXPECT_THAT(html, Not(HasSubstr(no_title))); 483 EXPECT_THAT(html, Not(HasSubstr(no_content))); 484 } 485 486 { // Test empty title and content for page. 487 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto()); 488 page_proto->set_title(""); 489 page_proto->set_html(""); 490 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(), 491 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 492 EXPECT_THAT(html, HasSubstr(no_title)); 493 EXPECT_THAT(html, HasSubstr(no_content)); 494 EXPECT_THAT(html, Not(HasSubstr(some_title))); 495 EXPECT_THAT(html, Not(HasSubstr(some_content))); 496 } 497 498 { // Test missing title and non-empty content for page. 499 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto()); 500 page_proto->set_html(some_content); 501 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(), 502 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 503 EXPECT_THAT(html, HasSubstr(no_title)); 504 EXPECT_THAT(html, HasSubstr(some_content)); 505 EXPECT_THAT(html, Not(HasSubstr(some_title))); 506 EXPECT_THAT(html, Not(HasSubstr(no_content))); 507 } 508 509 { // Test non-empty title and missing content for page. 510 scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto()); 511 page_proto->set_title(some_title); 512 std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(), 513 DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF); 514 EXPECT_THAT(html, HasSubstr(some_title)); 515 EXPECT_THAT(html, HasSubstr(no_content)); 516 EXPECT_THAT(html, Not(HasSubstr(no_title))); 517 EXPECT_THAT(html, Not(HasSubstr(some_content))); 518 } 519 } 520 521 } // namespace dom_distiller 522