WebCore/loader/TextDocument.cpp

/*
 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"
#include "TextDocument.h"

#include "Element.h"
#include "HTMLNames.h"
#include "HTMLViewSourceDocument.h"
#include "SegmentedString.h"
#include "Text.h"
#include "XMLTokenizer.h"

using namespace std;

namespace WebCore {

using namespace HTMLNames;

class TextTokenizer : public Tokenizer {
public:
    TextTokenizer(Document*);
    virtual ~TextTokenizer();
    TextTokenizer(HTMLViewSourceDocument*);

    virtual void write(const SegmentedString&, bool appendData);
    virtual void finish();
    virtual bool isWaitingForScripts() const;

    inline void checkBuffer(int len = 10)
    {
        if ((m_dest - m_buffer) > m_size - len) {
            // Enlarge buffer
            int newSize = std::max(m_size * 2, m_size + len);
            int oldOffset = m_dest - m_buffer;
            m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
            m_dest = m_buffer + oldOffset;
            m_size = newSize;
        }
    }

private:
    Document* m_doc;
    Element* m_preElement;

    bool m_skipLF;

    int m_size;
    UChar* m_buffer;
    UChar* m_dest;
};

TextTokenizer::TextTokenizer(Document* doc)
    : m_doc(doc)
    , m_preElement(0)
    , m_skipLF(false)
{
    // Allocate buffer
    m_size = 254;
    m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
    m_dest = m_buffer;
}

TextTokenizer::TextTokenizer(HTMLViewSourceDocument* doc)
    : Tokenizer(true)
    , m_doc(doc)
    , m_preElement(0)
    , m_skipLF(false)
{
    // Allocate buffer
    m_size = 254;
    m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * m_size));
    m_dest = m_buffer;
}

TextTokenizer::~TextTokenizer()
{
    // finish() should have been called to prevent any leaks
    ASSERT(!m_buffer);
}

void TextTokenizer::write(const SegmentedString& s, bool)
{
    ExceptionCode ec;

    m_dest = m_buffer;

    SegmentedString str = s;
    while (!str.isEmpty()) {
        UChar c = *str;

        if (c == '\r') {
            *m_dest++ = '\n';

            // possibly skip an LF in the case of an CRLF sequence
            m_skipLF = true;
        } else if (c == '\n') {
            if (!m_skipLF)
                *m_dest++ = c;
            else
                m_skipLF = false;
        } else {
            *m_dest++ = c;
            m_skipLF = false;
        }

        str.advance();

        // Maybe enlarge the buffer
        checkBuffer();
    }

    if (!m_preElement && !inViewSourceMode()) {
        RefPtr<Element> rootElement = m_doc->createElement(htmlTag, false);
        m_doc->appendChild(rootElement, ec);

        RefPtr<Element> body = m_doc->createElement(bodyTag, false);
        rootElement->appendChild(body, ec);

        RefPtr<Element> preElement = m_doc->createElement(preTag, false);
        preElement->setAttribute("style", "word-wrap: break-word; white-space: pre-wrap;", ec);

        body->appendChild(preElement, ec);

        m_preElement = preElement.get();
    }

    String string = String(m_buffer, m_dest - m_buffer);
    if (inViewSourceMode()) {
        static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceText(string);
        return;
    }

    unsigned charsLeft = string.length();
    while (charsLeft) {
        // split large text to nodes of manageable size
        RefPtr<Text> text = Text::createWithLengthLimit(m_doc, string, charsLeft);
        m_preElement->appendChild(text, ec);
    }
}

void TextTokenizer::finish()
{
    if (!m_preElement)
        write(SegmentedString(), true); // Create document structure for an empty text document.
    m_preElement = 0;
    fastFree(m_buffer);
    m_buffer = 0;
    m_dest = 0;

    m_doc->finishedParsing();
}

bool TextTokenizer::isWaitingForScripts() const
{
    // A text document is never waiting for scripts
    return false;
}

TextDocument::TextDocument(Frame* frame)
    : HTMLDocument(frame)
{
}

Tokenizer* TextDocument::createTokenizer()
{
    return new TextTokenizer(this);
}

Tokenizer* createTextTokenizer(HTMLViewSourceDocument* document)
{
    return new TextTokenizer(document);
}

}