1 /* 2 * Copyright (C) 2013 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "core/frame/SmartClip.h" 33 34 #include "core/dom/ContainerNode.h" 35 #include "core/dom/Document.h" 36 #include "core/dom/NodeTraversal.h" 37 #include "core/frame/LocalDOMWindow.h" 38 #include "core/frame/FrameView.h" 39 #include "core/html/HTMLFrameOwnerElement.h" 40 #include "core/page/Page.h" 41 #include "core/rendering/RenderObject.h" 42 #include "wtf/text/StringBuilder.h" 43 44 namespace WebCore { 45 46 static IntRect applyScaleWithoutCollapsingToZero(const IntRect& rect, float scale) 47 { 48 IntRect result = rect; 49 result.scale(scale); 50 if (rect.width() > 0 && !result.width()) 51 result.setWidth(1); 52 if (rect.height() > 0 && !result.height()) 53 result.setHeight(1); 54 return result; 55 } 56 57 static Node* nodeInsideFrame(Node* node) 58 { 59 if (node->isFrameOwnerElement()) 60 return toHTMLFrameOwnerElement(node)->contentDocument(); 61 return 0; 62 } 63 64 IntRect SmartClipData::rect() const 65 { 66 return m_rect; 67 } 68 69 const String& SmartClipData::clipData() const 70 { 71 return m_string; 72 } 73 74 SmartClip::SmartClip(PassRefPtr<LocalFrame> frame) 75 : m_frame(frame) 76 { 77 } 78 79 SmartClipData SmartClip::dataForRect(const IntRect& cropRect) 80 { 81 IntRect resizedCropRect = applyScaleWithoutCollapsingToZero(cropRect, 1 / pageScaleFactor()); 82 83 Node* bestNode = findBestOverlappingNode(m_frame->document(), resizedCropRect); 84 if (!bestNode) 85 return SmartClipData(); 86 87 if (Node* nodeFromFrame = nodeInsideFrame(bestNode)) { 88 // FIXME: This code only hit-tests a single iframe. It seems like we ought support nested frames. 89 if (Node* bestNodeInFrame = findBestOverlappingNode(nodeFromFrame, resizedCropRect)) 90 bestNode = bestNodeInFrame; 91 } 92 93 WillBeHeapVector<RawPtrWillBeMember<Node> > hitNodes; 94 collectOverlappingChildNodes(bestNode, resizedCropRect, hitNodes); 95 96 if (hitNodes.isEmpty() || hitNodes.size() == bestNode->countChildren()) { 97 hitNodes.clear(); 98 hitNodes.append(bestNode); 99 } 100 101 // Unite won't work with the empty rect, so we initialize to the first rect. 102 IntRect unitedRects = hitNodes[0]->pixelSnappedBoundingBox(); 103 StringBuilder collectedText; 104 for (size_t i = 0; i < hitNodes.size(); ++i) { 105 collectedText.append(extractTextFromNode(hitNodes[i])); 106 unitedRects.unite(hitNodes[i]->pixelSnappedBoundingBox()); 107 } 108 109 return SmartClipData(bestNode, convertRectToWindow(unitedRects), collectedText.toString()); 110 } 111 112 float SmartClip::pageScaleFactor() 113 { 114 return m_frame->page()->pageScaleFactor(); 115 } 116 117 // This function is a bit of a mystery. If you understand what it does, please 118 // consider adding a more descriptive name. 119 Node* SmartClip::minNodeContainsNodes(Node* minNode, Node* newNode) 120 { 121 if (!newNode) 122 return minNode; 123 if (!minNode) 124 return newNode; 125 126 IntRect minNodeRect = minNode->pixelSnappedBoundingBox(); 127 IntRect newNodeRect = newNode->pixelSnappedBoundingBox(); 128 129 Node* parentMinNode = minNode->parentNode(); 130 Node* parentNewNode = newNode->parentNode(); 131 132 if (minNodeRect.contains(newNodeRect)) { 133 if (parentMinNode && parentNewNode && parentNewNode->parentNode() == parentMinNode) 134 return parentMinNode; 135 return minNode; 136 } 137 138 if (newNodeRect.contains(minNodeRect)) { 139 if (parentMinNode && parentNewNode && parentMinNode->parentNode() == parentNewNode) 140 return parentNewNode; 141 return newNode; 142 } 143 144 // This loop appears to find the nearest ancestor of minNode (in DOM order) 145 // that contains the newNodeRect. It's very unclear to me why that's an 146 // interesting node to find. Presumably this loop will often just return 147 // the documentElement. 148 Node* node = minNode; 149 while (node) { 150 if (node->renderer()) { 151 IntRect nodeRect = node->pixelSnappedBoundingBox(); 152 if (nodeRect.contains(newNodeRect)) { 153 return node; 154 } 155 } 156 node = node->parentNode(); 157 } 158 159 return 0; 160 } 161 162 Node* SmartClip::findBestOverlappingNode(Node* rootNode, const IntRect& cropRect) 163 { 164 if (!rootNode) 165 return 0; 166 167 IntRect resizedCropRect = rootNode->document().view()->windowToContents(cropRect); 168 169 Node* node = rootNode; 170 Node* minNode = 0; 171 172 while (node) { 173 IntRect nodeRect = node->pixelSnappedBoundingBox(); 174 175 if (node->isElementNode() && equalIgnoringCase(toElement(node)->fastGetAttribute(HTMLNames::aria_hiddenAttr), "true")) { 176 node = NodeTraversal::nextSkippingChildren(*node, rootNode); 177 continue; 178 } 179 180 RenderObject* renderer = node->renderer(); 181 if (renderer && !nodeRect.isEmpty()) { 182 if (renderer->isText() 183 || renderer->isRenderImage() 184 || node->isFrameOwnerElement() 185 || (renderer->style()->hasBackgroundImage() && !shouldSkipBackgroundImage(node))) { 186 if (resizedCropRect.intersects(nodeRect)) { 187 minNode = minNodeContainsNodes(minNode, node); 188 } else { 189 node = NodeTraversal::nextSkippingChildren(*node, rootNode); 190 continue; 191 } 192 } 193 } 194 node = NodeTraversal::next(*node, rootNode); 195 } 196 197 return minNode; 198 } 199 200 // This function appears to heuristically guess whether to include a background 201 // image in the smart clip. It seems to want to include sprites created from 202 // CSS background images but to skip actual backgrounds. 203 bool SmartClip::shouldSkipBackgroundImage(Node* node) 204 { 205 ASSERT(node); 206 // Apparently we're only interested in background images on spans and divs. 207 if (!isHTMLSpanElement(*node) && !isHTMLDivElement(*node)) 208 return true; 209 210 // This check actually makes a bit of sense. If you're going to sprite an 211 // image out of a CSS background, you're probably going to specify a height 212 // or a width. On the other hand, if we've got a legit background image, 213 // it's very likely the height or the width will be set to auto. 214 RenderObject* renderer = node->renderer(); 215 if (renderer && (renderer->style()->logicalHeight().isAuto() || renderer->style()->logicalWidth().isAuto())) 216 return true; 217 218 return false; 219 } 220 221 void SmartClip::collectOverlappingChildNodes(Node* parentNode, const IntRect& cropRect, WillBeHeapVector<RawPtrWillBeMember<Node> >& hitNodes) 222 { 223 if (!parentNode) 224 return; 225 IntRect resizedCropRect = parentNode->document().view()->windowToContents(cropRect); 226 for (Node* child = parentNode->firstChild(); child; child = child->nextSibling()) { 227 IntRect childRect = child->pixelSnappedBoundingBox(); 228 if (resizedCropRect.intersects(childRect)) 229 hitNodes.append(child); 230 } 231 } 232 233 IntRect SmartClip::convertRectToWindow(const IntRect& nodeRect) 234 { 235 IntRect result = m_frame->document()->view()->contentsToWindow(nodeRect); 236 result.scale(pageScaleFactor()); 237 return result; 238 } 239 240 String SmartClip::extractTextFromNode(Node* node) 241 { 242 // Science has proven that no text nodes are ever positioned at y == -99999. 243 int prevYPos = -99999; 244 245 StringBuilder result; 246 for (Node* currentNode = node; currentNode; currentNode = NodeTraversal::next(*currentNode, node)) { 247 RenderStyle* style = currentNode->computedStyle(); 248 if (style && style->userSelect() == SELECT_NONE) 249 continue; 250 251 if (Node* nodeFromFrame = nodeInsideFrame(currentNode)) 252 result.append(extractTextFromNode(nodeFromFrame)); 253 254 IntRect nodeRect = currentNode->pixelSnappedBoundingBox(); 255 if (currentNode->renderer() && !nodeRect.isEmpty()) { 256 if (currentNode->isTextNode()) { 257 String nodeValue = currentNode->nodeValue(); 258 259 // It's unclear why we blacklist solitary "\n" node values. 260 // Maybe we're trying to ignore <br> tags somehow? 261 if (nodeValue == "\n") 262 nodeValue = ""; 263 264 if (nodeRect.y() != prevYPos) { 265 prevYPos = nodeRect.y(); 266 result.append('\n'); 267 } 268 269 result.append(nodeValue); 270 } 271 } 272 } 273 274 return result.toString(); 275 } 276 277 } // namespace WebCore 278