Home | History | Annotate | Download | only in renderer
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/renderer/chrome_render_view_observer.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/bind_helpers.h"
      9 #include "base/command_line.h"
     10 #include "base/debug/trace_event.h"
     11 #include "base/message_loop/message_loop.h"
     12 #include "base/metrics/histogram.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/strings/utf_string_conversions.h"
     15 #include "chrome/common/chrome_constants.h"
     16 #include "chrome/common/chrome_switches.h"
     17 #include "chrome/common/prerender_messages.h"
     18 #include "chrome/common/render_messages.h"
     19 #include "chrome/common/url_constants.h"
     20 #include "chrome/renderer/isolated_world_ids.h"
     21 #include "chrome/renderer/prerender/prerender_helper.h"
     22 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
     23 #include "chrome/renderer/web_apps.h"
     24 #include "chrome/renderer/webview_color_overlay.h"
     25 #include "components/translate/content/renderer/translate_helper.h"
     26 #include "components/web_cache/renderer/web_cache_render_process_observer.h"
     27 #include "content/public/common/bindings_policy.h"
     28 #include "content/public/renderer/content_renderer_client.h"
     29 #include "content/public/renderer/render_frame.h"
     30 #include "content/public/renderer/render_view.h"
     31 #include "extensions/common/constants.h"
     32 #include "extensions/renderer/extension_groups.h"
     33 #include "net/base/data_url.h"
     34 #include "skia/ext/platform_canvas.h"
     35 #include "third_party/WebKit/public/platform/WebCString.h"
     36 #include "third_party/WebKit/public/platform/WebRect.h"
     37 #include "third_party/WebKit/public/platform/WebSize.h"
     38 #include "third_party/WebKit/public/platform/WebString.h"
     39 #include "third_party/WebKit/public/platform/WebURLRequest.h"
     40 #include "third_party/WebKit/public/platform/WebVector.h"
     41 #include "third_party/WebKit/public/web/WebAXObject.h"
     42 #include "third_party/WebKit/public/web/WebDataSource.h"
     43 #include "third_party/WebKit/public/web/WebDocument.h"
     44 #include "third_party/WebKit/public/web/WebElement.h"
     45 #include "third_party/WebKit/public/web/WebInputEvent.h"
     46 #include "third_party/WebKit/public/web/WebLocalFrame.h"
     47 #include "third_party/WebKit/public/web/WebNode.h"
     48 #include "third_party/WebKit/public/web/WebNodeList.h"
     49 #include "third_party/WebKit/public/web/WebView.h"
     50 #include "ui/base/ui_base_switches_util.h"
     51 #include "ui/gfx/favicon_size.h"
     52 #include "ui/gfx/size.h"
     53 #include "ui/gfx/size_f.h"
     54 #include "ui/gfx/skbitmap_operations.h"
     55 #include "v8/include/v8-testing.h"
     56 
     57 #if defined(ENABLE_EXTENSIONS)
     58 #include "chrome/common/extensions/chrome_extension_messages.h"
     59 #endif
     60 
     61 using blink::WebAXObject;
     62 using blink::WebCString;
     63 using blink::WebDataSource;
     64 using blink::WebDocument;
     65 using blink::WebElement;
     66 using blink::WebFrame;
     67 using blink::WebGestureEvent;
     68 using blink::WebIconURL;
     69 using blink::WebLocalFrame;
     70 using blink::WebNode;
     71 using blink::WebNodeList;
     72 using blink::WebRect;
     73 using blink::WebSecurityOrigin;
     74 using blink::WebSize;
     75 using blink::WebString;
     76 using blink::WebTouchEvent;
     77 using blink::WebURL;
     78 using blink::WebURLRequest;
     79 using blink::WebView;
     80 using blink::WebVector;
     81 using blink::WebWindowFeatures;
     82 
     83 // Delay in milliseconds that we'll wait before capturing the page contents
     84 // and thumbnail.
     85 static const int kDelayForCaptureMs = 500;
     86 
     87 // Typically, we capture the page data once the page is loaded.
     88 // Sometimes, the page never finishes to load, preventing the page capture
     89 // To workaround this problem, we always perform a capture after the following
     90 // delay.
     91 static const int kDelayForForcedCaptureMs = 6000;
     92 
     93 // define to write the time necessary for thumbnail/DOM text retrieval,
     94 // respectively, into the system debug log
     95 // #define TIME_TEXT_RETRIEVAL
     96 
     97 // maximum number of characters in the document to index, any text beyond this
     98 // point will be clipped
     99 static const size_t kMaxIndexChars = 65535;
    100 
    101 // Constants for UMA statistic collection.
    102 static const char kTranslateCaptureText[] = "Translate.CaptureText";
    103 
    104 namespace {
    105 
    106 #if defined(OS_ANDROID)
    107 // Parses the DOM for a <meta> tag with a particular name.
    108 // |meta_tag_content| is set to the contents of the 'content' attribute.
    109 // |found_tag| is set to true if the tag was successfully found.
    110 // Returns true if the document was parsed without errors.
    111 bool RetrieveMetaTagContent(const WebFrame* main_frame,
    112                             const GURL& expected_url,
    113                             const std::string& meta_tag_name,
    114                             bool* found_tag,
    115                             std::string* meta_tag_content) {
    116   WebDocument document =
    117       main_frame ? main_frame->document() : WebDocument();
    118   WebElement head = document.isNull() ? WebElement() : document.head();
    119   GURL document_url = document.isNull() ? GURL() : GURL(document.url());
    120 
    121   // Search the DOM for the <meta> tag with the given name.
    122   *found_tag = false;
    123   *meta_tag_content = "";
    124   if (!head.isNull()) {
    125     WebNodeList children = head.childNodes();
    126     for (unsigned i = 0; i < children.length(); ++i) {
    127       WebNode child = children.item(i);
    128       if (!child.isElementNode())
    129         continue;
    130       WebElement elem = child.to<WebElement>();
    131       if (elem.hasHTMLTagName("meta")) {
    132         if (elem.hasAttribute("name") && elem.hasAttribute("content")) {
    133           std::string name = elem.getAttribute("name").utf8();
    134           if (name == meta_tag_name) {
    135             *meta_tag_content = elem.getAttribute("content").utf8();
    136             *found_tag = true;
    137             break;
    138           }
    139         }
    140       }
    141     }
    142   }
    143 
    144   // Make sure we're checking the right page and that the length of the content
    145   // string is reasonable.
    146   bool success = document_url == expected_url;
    147   if (meta_tag_content->size() > chrome::kMaxMetaTagAttributeLength) {
    148     *meta_tag_content = "";
    149     success = false;
    150   }
    151 
    152   return success;
    153 }
    154 #endif
    155 
    156 }  // namespace
    157 
    158 ChromeRenderViewObserver::ChromeRenderViewObserver(
    159     content::RenderView* render_view,
    160     web_cache::WebCacheRenderProcessObserver* web_cache_render_process_observer)
    161     : content::RenderViewObserver(render_view),
    162       web_cache_render_process_observer_(web_cache_render_process_observer),
    163       translate_helper_(new translate::TranslateHelper(
    164           render_view,
    165           chrome::ISOLATED_WORLD_ID_TRANSLATE,
    166           extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
    167           extensions::kExtensionScheme)),
    168       phishing_classifier_(NULL),
    169       capture_timer_(false, false) {
    170   const CommandLine& command_line = *CommandLine::ForCurrentProcess();
    171   if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection))
    172     OnSetClientSidePhishingDetection(true);
    173 }
    174 
    175 ChromeRenderViewObserver::~ChromeRenderViewObserver() {
    176 }
    177 
    178 bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) {
    179   bool handled = true;
    180   IPC_BEGIN_MESSAGE_MAP(ChromeRenderViewObserver, message)
    181 #if !defined(OS_ANDROID) && !defined(OS_IOS)
    182     IPC_MESSAGE_HANDLER(ChromeViewMsg_WebUIJavaScript, OnWebUIJavaScript)
    183 #endif
    184 #if defined(ENABLE_EXTENSIONS)
    185     IPC_MESSAGE_HANDLER(ChromeViewMsg_SetVisuallyDeemphasized,
    186                         OnSetVisuallyDeemphasized)
    187 #endif
    188 #if defined(OS_ANDROID)
    189     IPC_MESSAGE_HANDLER(ChromeViewMsg_UpdateTopControlsState,
    190                         OnUpdateTopControlsState)
    191     IPC_MESSAGE_HANDLER(ChromeViewMsg_RetrieveMetaTagContent,
    192                         OnRetrieveMetaTagContent)
    193 #endif
    194     IPC_MESSAGE_HANDLER(ChromeViewMsg_GetWebApplicationInfo,
    195                         OnGetWebApplicationInfo)
    196     IPC_MESSAGE_HANDLER(ChromeViewMsg_SetClientSidePhishingDetection,
    197                         OnSetClientSidePhishingDetection)
    198     IPC_MESSAGE_HANDLER(ChromeViewMsg_SetWindowFeatures, OnSetWindowFeatures)
    199     IPC_MESSAGE_UNHANDLED(handled = false)
    200   IPC_END_MESSAGE_MAP()
    201 
    202   return handled;
    203 }
    204 
    205 #if !defined(OS_ANDROID) && !defined(OS_IOS)
    206 void ChromeRenderViewObserver::OnWebUIJavaScript(
    207     const base::string16& javascript) {
    208   webui_javascript_.push_back(javascript);
    209 }
    210 #endif
    211 
    212 #if defined(OS_ANDROID)
    213 void ChromeRenderViewObserver::OnUpdateTopControlsState(
    214     content::TopControlsState constraints,
    215     content::TopControlsState current,
    216     bool animate) {
    217   render_view()->UpdateTopControlsState(constraints, current, animate);
    218 }
    219 
    220 void ChromeRenderViewObserver::OnRetrieveMetaTagContent(
    221     const GURL& expected_url,
    222     const std::string tag_name) {
    223   bool found_tag;
    224   std::string content_str;
    225   bool parsed_successfully = RetrieveMetaTagContent(
    226       render_view()->GetWebView()->mainFrame(),
    227       expected_url,
    228       tag_name,
    229       &found_tag,
    230       &content_str);
    231 
    232   Send(new ChromeViewHostMsg_DidRetrieveMetaTagContent(
    233       routing_id(),
    234       parsed_successfully && found_tag,
    235       tag_name,
    236       content_str,
    237       expected_url));
    238 }
    239 #endif
    240 
    241 void ChromeRenderViewObserver::OnGetWebApplicationInfo() {
    242   WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
    243   DCHECK(main_frame);
    244 
    245   WebApplicationInfo web_app_info;
    246   web_apps::ParseWebAppFromWebDocument(main_frame, &web_app_info);
    247 
    248   // The warning below is specific to mobile but it doesn't hurt to show it even
    249   // if the Chromium build is running on a desktop. It will get more exposition.
    250   if (web_app_info.mobile_capable ==
    251         WebApplicationInfo::MOBILE_CAPABLE_APPLE) {
    252     blink::WebConsoleMessage message(
    253         blink::WebConsoleMessage::LevelWarning,
    254         "<meta name=\"apple-mobile-web-app-capable\" content=\"yes\"> is "
    255         "deprecated. Please include <meta name=\"mobile-web-app-capable\" "
    256         "content=\"yes\"> - "
    257         "http://developers.google.com/chrome/mobile/docs/installtohomescreen");
    258     main_frame->addMessageToConsole(message);
    259   }
    260 
    261   // Prune out any data URLs in the set of icons.  The browser process expects
    262   // any icon with a data URL to have originated from a favicon.  We don't want
    263   // to decode arbitrary data URLs in the browser process.  See
    264   // http://b/issue?id=1162972
    265   for (std::vector<WebApplicationInfo::IconInfo>::iterator it =
    266           web_app_info.icons.begin(); it != web_app_info.icons.end();) {
    267     if (it->url.SchemeIs(url::kDataScheme))
    268       it = web_app_info.icons.erase(it);
    269     else
    270       ++it;
    271   }
    272 
    273   // Truncate the strings we send to the browser process.
    274   web_app_info.title =
    275       web_app_info.title.substr(0, chrome::kMaxMetaTagAttributeLength);
    276   web_app_info.description =
    277       web_app_info.description.substr(0, chrome::kMaxMetaTagAttributeLength);
    278 
    279   Send(new ChromeViewHostMsg_DidGetWebApplicationInfo(
    280       routing_id(), web_app_info));
    281 }
    282 
    283 void ChromeRenderViewObserver::OnSetWindowFeatures(
    284     const WebWindowFeatures& window_features) {
    285   render_view()->GetWebView()->setWindowFeatures(window_features);
    286 }
    287 
    288 void ChromeRenderViewObserver::Navigate(const GURL& url) {
    289   // Execute cache clear operations that were postponed until a navigation
    290   // event (including tab reload).
    291   if (web_cache_render_process_observer_)
    292     web_cache_render_process_observer_->ExecutePendingClearCache();
    293   // Let translate_helper do any preparatory work for loading a URL.
    294   if (translate_helper_)
    295     translate_helper_->PrepareForUrl(url);
    296 }
    297 
    298 void ChromeRenderViewObserver::OnSetClientSidePhishingDetection(
    299     bool enable_phishing_detection) {
    300 #if defined(FULL_SAFE_BROWSING) && !defined(OS_CHROMEOS)
    301   phishing_classifier_ = enable_phishing_detection ?
    302       safe_browsing::PhishingClassifierDelegate::Create(render_view(), NULL) :
    303       NULL;
    304 #endif
    305 }
    306 
    307 #if defined(ENABLE_EXTENSIONS)
    308 void ChromeRenderViewObserver::OnSetVisuallyDeemphasized(bool deemphasized) {
    309   bool already_deemphasized = !!dimmed_color_overlay_.get();
    310   if (already_deemphasized == deemphasized)
    311     return;
    312 
    313   if (deemphasized) {
    314     // 70% opaque grey.
    315     SkColor greyish = SkColorSetARGB(178, 0, 0, 0);
    316     dimmed_color_overlay_.reset(
    317         new WebViewColorOverlay(render_view(), greyish));
    318   } else {
    319     dimmed_color_overlay_.reset();
    320   }
    321 }
    322 #endif
    323 
    324 void ChromeRenderViewObserver::DidStartLoading() {
    325   if ((render_view()->GetEnabledBindings() & content::BINDINGS_POLICY_WEB_UI) &&
    326       !webui_javascript_.empty()) {
    327     for (size_t i = 0; i < webui_javascript_.size(); ++i) {
    328       render_view()->GetMainRenderFrame()->ExecuteJavaScript(
    329           webui_javascript_[i]);
    330     }
    331     webui_javascript_.clear();
    332   }
    333 }
    334 
    335 void ChromeRenderViewObserver::DidStopLoading() {
    336   WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
    337   GURL osdd_url = main_frame->document().openSearchDescriptionURL();
    338   if (!osdd_url.is_empty()) {
    339     Send(new ChromeViewHostMsg_PageHasOSDD(
    340         routing_id(), main_frame->document().url(), osdd_url,
    341         search_provider::AUTODETECTED_PROVIDER));
    342   }
    343 
    344   // Don't capture pages including refresh meta tag.
    345   if (HasRefreshMetaTag(main_frame))
    346     return;
    347 
    348   CapturePageInfoLater(
    349       false,  // preliminary_capture
    350       base::TimeDelta::FromMilliseconds(
    351           render_view()->GetContentStateImmediately() ?
    352               0 : kDelayForCaptureMs));
    353 }
    354 
    355 void ChromeRenderViewObserver::DidCommitProvisionalLoad(
    356     WebLocalFrame* frame, bool is_new_navigation) {
    357   // Don't capture pages being not new, or including refresh meta tag.
    358   if (!is_new_navigation || HasRefreshMetaTag(frame))
    359     return;
    360 
    361   CapturePageInfoLater(
    362       true,  // preliminary_capture
    363       base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs));
    364 }
    365 
    366 void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture,
    367                                                     base::TimeDelta delay) {
    368   capture_timer_.Start(
    369       FROM_HERE,
    370       delay,
    371       base::Bind(&ChromeRenderViewObserver::CapturePageInfo,
    372                  base::Unretained(this),
    373                  preliminary_capture));
    374 }
    375 
    376 void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) {
    377   if (!render_view()->GetWebView())
    378     return;
    379 
    380   WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
    381   if (!main_frame)
    382     return;
    383 
    384   // Don't index/capture pages that are in view source mode.
    385   if (main_frame->isViewSourceModeEnabled())
    386     return;
    387 
    388   // Don't index/capture pages that failed to load.  This only checks the top
    389   // level frame so the thumbnail may contain a frame that failed to load.
    390   WebDataSource* ds = main_frame->dataSource();
    391   if (ds && ds->hasUnreachableURL())
    392     return;
    393 
    394   // Don't index/capture pages that are being prerendered.
    395   if (prerender::PrerenderHelper::IsPrerendering(
    396           render_view()->GetMainRenderFrame())) {
    397     return;
    398   }
    399 
    400   // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the
    401   // translate helper for language detection and possible translation.
    402   base::string16 contents;
    403   base::TimeTicks capture_begin_time = base::TimeTicks::Now();
    404   CaptureText(main_frame, &contents);
    405   UMA_HISTOGRAM_TIMES(kTranslateCaptureText,
    406                       base::TimeTicks::Now() - capture_begin_time);
    407   if (translate_helper_)
    408     translate_helper_->PageCaptured(contents);
    409 
    410   TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");
    411 
    412 #if defined(FULL_SAFE_BROWSING)
    413   // Will swap out the string.
    414   if (phishing_classifier_)
    415     phishing_classifier_->PageCaptured(&contents, preliminary_capture);
    416 #endif
    417 }
    418 
    419 void ChromeRenderViewObserver::CaptureText(WebFrame* frame,
    420                                            base::string16* contents) {
    421   contents->clear();
    422   if (!frame)
    423     return;
    424 
    425 #ifdef TIME_TEXT_RETRIEVAL
    426   double begin = time_util::GetHighResolutionTimeNow();
    427 #endif
    428 
    429   // get the contents of the frame
    430   *contents = frame->contentAsText(kMaxIndexChars);
    431 
    432 #ifdef TIME_TEXT_RETRIEVAL
    433   double end = time_util::GetHighResolutionTimeNow();
    434   char buf[128];
    435   sprintf_s(buf, "%d chars retrieved for indexing in %gms\n",
    436             contents.size(), (end - begin)*1000);
    437   OutputDebugStringA(buf);
    438 #endif
    439 
    440   // When the contents are clipped to the maximum, we don't want to have a
    441   // partial word indexed at the end that might have been clipped. Therefore,
    442   // terminate the string at the last space to ensure no words are clipped.
    443   if (contents->size() == kMaxIndexChars) {
    444     size_t last_space_index = contents->find_last_of(base::kWhitespaceUTF16);
    445     if (last_space_index != base::string16::npos)
    446       contents->resize(last_space_index);
    447   }
    448 }
    449 
    450 bool ChromeRenderViewObserver::HasRefreshMetaTag(WebFrame* frame) {
    451   if (!frame)
    452     return false;
    453   WebElement head = frame->document().head();
    454   if (head.isNull() || !head.hasChildNodes())
    455     return false;
    456 
    457   const WebString tag_name(base::ASCIIToUTF16("meta"));
    458   const WebString attribute_name(base::ASCIIToUTF16("http-equiv"));
    459 
    460   WebNodeList children = head.childNodes();
    461   for (size_t i = 0; i < children.length(); ++i) {
    462     WebNode node = children.item(i);
    463     if (!node.isElementNode())
    464       continue;
    465     WebElement element = node.to<WebElement>();
    466     if (!element.hasHTMLTagName(tag_name))
    467       continue;
    468     WebString value = element.getAttribute(attribute_name);
    469     if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh"))
    470       continue;
    471     return true;
    472   }
    473   return false;
    474 }
    475