1 /* 2 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 20 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25 #include "config.h" 26 #include "TextDocument.h" 27 28 #include "Element.h" 29 #include "HTMLNames.h" 30 #include "HTMLViewSourceDocument.h" 31 #include "SegmentedString.h" 32 #include "Text.h" 33 #include "XMLTokenizer.h" 34 35 using namespace std; 36 37 namespace WebCore { 38 39 using namespace HTMLNames; 40 41 class TextTokenizer : public Tokenizer { 42 public: 43 TextTokenizer(Document*); 44 virtual ~TextTokenizer(); 45 TextTokenizer(HTMLViewSourceDocument*); 46 47 virtual void write(const SegmentedString&, bool appendData); 48 virtual void finish(); 49 virtual bool isWaitingForScripts() const; 50 51 inline void checkBuffer(int len = 10) 52 { 53 if ((m_dest - m_buffer) > m_size - len) { 54 // Enlarge buffer 55 int newSize = std::max(m_size * 2, m_size + len); 56 int oldOffset = m_dest - m_buffer; 57 m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar))); 58 m_dest = m_buffer + oldOffset; 59 m_size = newSize; 60 } 61 } 62 63 private: 64 Document* m_doc; 65 Element* m_preElement; 66 67 bool m_skipLF; 68 69 int m_size; 70 UChar* m_buffer; 71 UChar* m_dest; 72 }; 73 74 TextTokenizer::TextTokenizer(Document* doc) 75 : m_doc(doc) 76 , m_preElement(0) 77 , m_skipLF(false) 78 { 79 // Allocate buffer 80 m_size = 254; 81 m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size)); 82 m_dest = m_buffer; 83 } 84 85 TextTokenizer::TextTokenizer(HTMLViewSourceDocument* doc) 86 : Tokenizer(true) 87 , m_doc(doc) 88 , m_preElement(0) 89 , m_skipLF(false) 90 { 91 // Allocate buffer 92 m_size = 254; 93 m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size)); 94 m_dest = m_buffer; 95 } 96 97 TextTokenizer::~TextTokenizer() 98 { 99 // finish() should have been called to prevent any leaks 100 ASSERT(!m_buffer); 101 } 102 103 void TextTokenizer::write(const SegmentedString& s, bool) 104 { 105 ExceptionCode ec; 106 107 m_dest = m_buffer; 108 109 SegmentedString str = s; 110 while (!str.isEmpty()) { 111 UChar c = *str; 112 113 if (c == '\r') { 114 *m_dest++ = '\n'; 115 116 // possibly skip an LF in the case of an CRLF sequence 117 m_skipLF = true; 118 } else if (c == '\n') { 119 if (!m_skipLF) 120 *m_dest++ = c; 121 else 122 m_skipLF = false; 123 } else { 124 *m_dest++ = c; 125 m_skipLF = false; 126 } 127 128 str.advance(); 129 130 // Maybe enlarge the buffer 131 checkBuffer(); 132 } 133 134 if (!m_preElement && !inViewSourceMode()) { 135 RefPtr<Element> rootElement = m_doc->createElement(htmlTag, false); 136 m_doc->appendChild(rootElement, ec); 137 138 RefPtr<Element> body = m_doc->createElement(bodyTag, false); 139 rootElement->appendChild(body, ec); 140 141 RefPtr<Element> preElement = m_doc->createElement(preTag, false); 142 preElement->setAttribute("style", "word-wrap: break-word; white-space: pre-wrap;", ec); 143 144 body->appendChild(preElement, ec); 145 146 m_preElement = preElement.get(); 147 } 148 149 String string = String(m_buffer, m_dest - m_buffer); 150 if (inViewSourceMode()) { 151 static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceText(string); 152 return; 153 } 154 155 unsigned charsLeft = string.length(); 156 while (charsLeft) { 157 // split large text to nodes of manageable size 158 RefPtr<Text> text = Text::createWithLengthLimit(m_doc, string, charsLeft); 159 m_preElement->appendChild(text, ec); 160 } 161 } 162 163 void TextTokenizer::finish() 164 { 165 if (!m_preElement) 166 write(SegmentedString(), true); // Create document structure for an empty text document. 167 m_preElement = 0; 168 fastFree(m_buffer); 169 m_buffer = 0; 170 m_dest = 0; 171 172 m_doc->finishedParsing(); 173 } 174 175 bool TextTokenizer::isWaitingForScripts() const 176 { 177 // A text document is never waiting for scripts 178 return false; 179 } 180 181 TextDocument::TextDocument(Frame* frame) 182 : HTMLDocument(frame) 183 { 184 } 185 186 Tokenizer* TextDocument::createTokenizer() 187 { 188 return new TextTokenizer(this); 189 } 190 191 Tokenizer* createTextTokenizer(HTMLViewSourceDocument* document) 192 { 193 return new TextTokenizer(document); 194 } 195 196 } 197