Home | History | Annotate | Download | only in dom
      1 /**
      2  * This file is part of the DOM implementation for KDE.
      3  *
      4  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      5  * Copyright (C) 2005, 2006 Apple Computer, Inc.
      6  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      7  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      8  * Copyright (C) 2007 The Android Open Source Project
      9  *
     10  * This library is free software; you can redistribute it and/or
     11  * modify it under the terms of the GNU Library General Public
     12  * License as published by the Free Software Foundation; either
     13  * version 2 of the License, or (at your option) any later version.
     14  *
     15  * This library is distributed in the hope that it will be useful,
     16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18  * Library General Public License for more details.
     19  *
     20  * You should have received a copy of the GNU Library General Public License
     21  * along with this library; see the file COPYING.LIB.  If not, write to
     22  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     23  * Boston, MA 02111-1307, USA.
     24  */
     25 
     26 #include "config.h"
     27 #include "XMLTokenizer.h"
     28 
     29 #include "CDATASection.h"
     30 #include "CachedScript.h"
     31 #include "Comment.h"
     32 #include "CString.h"
     33 #include "DocLoader.h"
     34 #include "Document.h"
     35 #include "DocumentFragment.h"
     36 #include "Frame.h"
     37 #include "FrameLoader.h"
     38 #include "FrameView.h"
     39 #include "HTMLNames.h"
     40 #include "HTMLScriptElement.h"
     41 #include "HTMLTableSectionElement.h"
     42 #include "HTMLTokenizer.h"
     43 #include "ProcessingInstruction.h"
     44 #include "EventNames.h"
     45 
     46 // strndup is not available everywhere, so here is a portable version <reed>
     47 static char* portable_strndup(const char src[], size_t len)
     48 {
     49     char* origDst = (char*)malloc(len + 1);
     50     if (NULL == origDst)
     51         return NULL;
     52 
     53     char* dst = origDst;
     54     while (len-- > 0) {
     55         if ((*dst++ = *src++) == 0)
     56             return origDst;
     57     }
     58     *dst = 0;
     59     return origDst;
     60 }
     61 
     62 namespace WebCore {
     63 
     64 using namespace EventNames;
     65 using namespace HTMLNames;
     66 
     67 const int maxErrors = 25;
     68 
     69 class PendingCallbacks {
     70 public:
     71     PendingCallbacks() {
     72         m_callbacks.setAutoDelete(true);
     73     }
     74 
     75     void appendStartElementNSCallback(const XML_Char* name, const XML_Char** atts) {
     76         PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
     77 
     78         callback->name = strdup(name);
     79         callback->count = 0;
     80         while (atts[callback->count])
     81             callback->count++;
     82         callback->atts = (XML_Char**)malloc(sizeof(XML_Char*) * (callback->count+1));
     83         for (int i=0; i<callback->count; i++)
     84             callback->atts[i] = strdup(atts[i]);
     85         callback->atts[callback->count] = NULL;
     86 
     87         m_callbacks.append(callback);
     88     }
     89 
     90     void appendEndElementNSCallback() {
     91         PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
     92 
     93         m_callbacks.append(callback);
     94     }
     95 
     96     void appendCharactersCallback(const XML_Char* s, int len) {
     97         PendingCharactersCallback* callback = new PendingCharactersCallback;
     98 
     99         callback->s = portable_strndup(s, len);
    100         callback->len = len;
    101 
    102         m_callbacks.append(callback);
    103     }
    104 
    105     void appendProcessingInstructionCallback(const XML_Char* target, const XML_Char* data) {
    106         PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
    107 
    108         callback->target = strdup(target);
    109         callback->data = strdup(data);
    110 
    111         m_callbacks.append(callback);
    112     }
    113 
    114     void appendStartCDATABlockCallback() {
    115         PendingStartCDATABlockCallback* callback = new PendingStartCDATABlockCallback;
    116 
    117         m_callbacks.append(callback);
    118     }
    119 
    120     void appendEndCDATABlockCallback() {
    121         PendingEndCDATABlockCallback* callback = new PendingEndCDATABlockCallback;
    122 
    123         m_callbacks.append(callback);
    124     }
    125 
    126     void appendCommentCallback(const XML_Char* s) {
    127         PendingCommentCallback* callback = new PendingCommentCallback;
    128 
    129         callback->s = strdup(s);
    130 
    131         m_callbacks.append(callback);
    132     }
    133 
    134     void appendErrorCallback(XMLTokenizer::ErrorType type, const char* message, int lineNumber, int columnNumber) {
    135         PendingErrorCallback* callback = new PendingErrorCallback;
    136 
    137         callback->message = strdup(message);
    138         callback->type = type;
    139         callback->lineNumber = lineNumber;
    140         callback->columnNumber = columnNumber;
    141 
    142         m_callbacks.append(callback);
    143     }
    144 
    145     void callAndRemoveFirstCallback(XMLTokenizer* tokenizer) {
    146         PendingCallback* cb = m_callbacks.getFirst();
    147 
    148         cb->call(tokenizer);
    149         m_callbacks.removeFirst();
    150     }
    151 
    152     bool isEmpty() const { return m_callbacks.isEmpty(); }
    153 
    154 private:
    155     struct PendingCallback {
    156 
    157         virtual ~PendingCallback() { }
    158 
    159         virtual void call(XMLTokenizer* tokenizer) = 0;
    160     };
    161 
    162     struct PendingStartElementNSCallback : public PendingCallback {
    163         virtual ~PendingStartElementNSCallback() {
    164             free(name);
    165             for (int i=0; i<count; i++)
    166                 free(atts[i]);
    167             free(atts);
    168         }
    169 
    170         virtual void call(XMLTokenizer* tokenizer) {
    171             tokenizer->startElementNs(name, (const XML_Char**)(atts));
    172         }
    173 
    174         XML_Char* name;
    175         int count;
    176         XML_Char** atts;
    177     };
    178 
    179     struct PendingEndElementNSCallback : public PendingCallback {
    180         virtual void call(XMLTokenizer* tokenizer) {
    181             tokenizer->endElementNs();
    182         }
    183     };
    184 
    185     struct PendingCharactersCallback : public PendingCallback {
    186         virtual ~PendingCharactersCallback() {
    187             free(s);
    188         }
    189 
    190         virtual void call(XMLTokenizer* tokenizer) {
    191             tokenizer->characters(s, len);
    192         }
    193 
    194         XML_Char* s;
    195         int len;
    196     };
    197 
    198     struct PendingProcessingInstructionCallback : public PendingCallback {
    199         virtual ~PendingProcessingInstructionCallback() {
    200             free(target);
    201             free(data);
    202         }
    203 
    204         virtual void call(XMLTokenizer* tokenizer) {
    205             tokenizer->processingInstruction(target, data);
    206         }
    207 
    208         XML_Char* target;
    209         XML_Char* data;
    210     };
    211 
    212     struct PendingStartCDATABlockCallback : public PendingCallback {
    213         virtual void call(XMLTokenizer* tokenizer) {
    214             tokenizer->startCdata();
    215         }
    216     };
    217 
    218     struct PendingEndCDATABlockCallback : public PendingCallback {
    219         virtual void call(XMLTokenizer* tokenizer) {
    220             tokenizer->endCdata();
    221         }
    222     };
    223 
    224     struct PendingCommentCallback : public PendingCallback {
    225         virtual ~PendingCommentCallback() {
    226             free(s);
    227         }
    228 
    229         virtual void call(XMLTokenizer* tokenizer) {
    230             tokenizer->comment(s);
    231         }
    232 
    233         XML_Char* s;
    234     };
    235 
    236     struct PendingErrorCallback: public PendingCallback {
    237         virtual ~PendingErrorCallback() {
    238             free (message);
    239         }
    240 
    241         virtual void call(XMLTokenizer* tokenizer) {
    242             tokenizer->error(type, message, lineNumber, columnNumber);
    243         }
    244 
    245         XMLTokenizer::ErrorType type;
    246         char* message;
    247         int lineNumber;
    248         int columnNumber;
    249     };
    250 
    251 public:
    252     DeprecatedPtrList<PendingCallback> m_callbacks;
    253 };
    254 
    255 // --------------------------------
    256 
    257 XMLTokenizer::XMLTokenizer(Document *_doc, FrameView *_view)
    258     : m_doc(_doc)
    259     , m_view(_view)
    260     , m_parser(0)
    261     , m_currentNode(_doc)
    262     , m_currentNodeIsReferenced(false)
    263     , m_sawError(false)
    264     , m_sawXSLTransform(false)
    265     , m_sawFirstElement(false)
    266     , m_parserPaused(false)
    267     , m_requestingScript(false)
    268     , m_finishCalled(false)
    269     , m_errorCount(0)
    270     , m_pendingScript(0)
    271     , m_scriptStartLine(0)
    272     , m_parsingFragment(false)
    273     , m_pendingCallbacks(new PendingCallbacks)
    274 {
    275 }
    276 
    277 XMLTokenizer::XMLTokenizer(DocumentFragment *fragment, Element *parentElement)
    278     : m_doc(fragment->document())
    279     , m_view(0)
    280     , m_parser(0)
    281     , m_currentNode(fragment)
    282     , m_currentNodeIsReferenced(fragment)
    283     , m_sawError(false)
    284     , m_sawXSLTransform(false)
    285     , m_sawFirstElement(false)
    286     , m_parserPaused(false)
    287     , m_requestingScript(false)
    288     , m_finishCalled(false)
    289     , m_errorCount(0)
    290     , m_pendingScript(0)
    291     , m_scriptStartLine(0)
    292     , m_parsingFragment(true)
    293     , m_pendingCallbacks(new PendingCallbacks)
    294 {
    295     if (fragment)
    296         fragment->ref();
    297     if (m_doc)
    298         m_doc->ref();
    299 
    300     // Add namespaces based on the parent node
    301     Vector<Element*> elemStack;
    302     while (parentElement) {
    303         elemStack.append(parentElement);
    304 
    305         Node* n = parentElement->parentNode();
    306         if (!n || !n->isElementNode())
    307             break;
    308         parentElement = static_cast<Element*>(n);
    309     }
    310 
    311     if (elemStack.isEmpty())
    312         return;
    313 
    314     for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
    315         if (NamedAttrMap* attrs = element->attributes()) {
    316             for (unsigned i = 0; i < attrs->length(); i++) {
    317                 Attribute* attr = attrs->attributeItem(i);
    318                 if (attr->localName() == "xmlns")
    319                     m_defaultNamespaceURI = attr->value();
    320                 else if (attr->prefix() == "xmlns")
    321                     m_prefixToNamespaceMap.set(attr->localName(), attr->value());
    322             }
    323         }
    324     }
    325 }
    326 
    327 XMLTokenizer::~XMLTokenizer()
    328 {
    329     setCurrentNode(0);
    330     if (m_parsingFragment && m_doc)
    331         m_doc->deref();
    332     if (m_pendingScript)
    333         m_pendingScript->deref(this);
    334 }
    335 
    336 void XMLTokenizer::setCurrentNode(Node* n)
    337 {
    338     bool nodeNeedsReference = n && n != m_doc;
    339     if (nodeNeedsReference)
    340         n->ref();
    341     if (m_currentNodeIsReferenced)
    342         m_currentNode->deref();
    343     m_currentNode = n;
    344     m_currentNodeIsReferenced = nodeNeedsReference;
    345 }
    346 
    347 // use space instead of ':' as separator because ':' can be inside an uri
    348 const XML_Char tripletSep=' ';
    349 
    350 inline DeprecatedString toQString(const XML_Char* str, unsigned int len)
    351 {
    352     return DeprecatedString::fromUtf8(reinterpret_cast<const char *>(str), len);
    353 }
    354 
    355 inline DeprecatedString toQString(const XML_Char* str)
    356 {
    357     return DeprecatedString::fromUtf8(str ? reinterpret_cast<const char *>(str) : "");
    358 }
    359 
    360 // triplet is formatted as URI + sep + local_name + sep + prefix.
    361 static inline void splitTriplet(const XML_Char *name, String &uri, String &localname, String &prefix)
    362 {
    363     String string[3];
    364     int found = 0;
    365     const char *start = reinterpret_cast<const char *>(name);
    366 
    367     while(start && (found < 3)) {
    368         char *next = strchr(start, tripletSep);
    369         if (next) {
    370             string[found++] = toQString(start, (next-start));
    371             start = next+1;
    372         } else {
    373             string[found++] = toQString(start);
    374             break;
    375         }
    376     }
    377 
    378     switch(found) {
    379     case 1:
    380         localname = string[0];
    381         break;
    382     case 2:
    383         uri = string[0];
    384         localname = string[1];
    385         break;
    386     case 3:
    387         uri = string[0];
    388         localname = string[1];
    389         prefix = string[2];
    390         break;
    391     }
    392 }
    393 
    394 static inline void handleElementNamespaces(Element *newElement, const String &uri, const String &prefix, ExceptionCode &exceptioncode)
    395 {
    396     if (uri.isEmpty())
    397         return;
    398 
    399     String namespaceQName("xmlns");
    400     if(!prefix.isEmpty())
    401         namespaceQName += String(":")+ prefix;
    402     newElement->setAttributeNS(String("http://www.w3.org/2000/xmlns/"), namespaceQName, uri, exceptioncode);
    403 }
    404 
    405 static inline void handleElementAttributes(Element *newElement, const XML_Char **atts, ExceptionCode &exceptioncode)
    406 {
    407     for (int i = 0; atts[i]; i += 2) {
    408         String attrURI, attrLocalName, attrPrefix;
    409         splitTriplet(atts[i], attrURI, attrLocalName, attrPrefix);
    410         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + String(":") + attrLocalName;
    411         String attrValue = toQString(atts[i+1]);
    412         newElement->setAttributeNS(attrURI, attrQName, attrValue, exceptioncode);
    413         if (exceptioncode) // exception while setting attributes
    414             return;
    415     }
    416 }
    417 
    418 void XMLTokenizer::startElementNs(const XML_Char *name, const XML_Char **atts)
    419 {
    420     if (m_parserStopped)
    421         return;
    422 
    423     if (m_parserPaused) {
    424         m_pendingCallbacks->appendStartElementNSCallback(name, atts);
    425         return;
    426     }
    427 
    428     m_sawFirstElement = true;
    429 
    430     exitText();
    431 
    432     String uri, localName, prefix;
    433     splitTriplet(name, uri, localName, prefix);
    434     String qName = prefix.isEmpty() ? localName : prefix + ":" + localName;
    435 
    436     if (m_parsingFragment && uri.isEmpty()) {
    437         if (!prefix.isEmpty())
    438             uri = String(m_prefixToNamespaceMap.get(prefix.impl()));
    439         else
    440             uri = m_defaultNamespaceURI;
    441     }
    442 
    443     ExceptionCode ec = 0;
    444     RefPtr<Element> newElement = m_doc->createElementNS(uri, qName, ec);
    445     if (!newElement) {
    446         stopParsing();
    447         return;
    448     }
    449 
    450     handleElementNamespaces(newElement.get(), uri, prefix, ec);
    451     if (ec) {
    452         stopParsing();
    453         return;
    454     }
    455 
    456     handleElementAttributes(newElement.get(), atts, ec);
    457     if (ec) {
    458         stopParsing();
    459         return;
    460     }
    461 
    462     if (newElement->hasTagName(scriptTag))
    463         static_cast<HTMLScriptElement*>(newElement.get())->setCreatedByParser(true);
    464 
    465     if (newElement->hasTagName(HTMLNames::scriptTag))
    466         m_scriptStartLine = lineNumber();
    467 
    468     if (!m_currentNode->addChild(newElement.get())) {
    469         stopParsing();
    470         return;
    471     }
    472 
    473     setCurrentNode(newElement.get());
    474     if (m_view && !newElement->attached())
    475         newElement->attach();
    476 }
    477 
    478 void XMLTokenizer::endElementNs()
    479 {
    480     if (m_parserStopped)
    481         return;
    482 
    483     if (m_parserPaused) {
    484         m_pendingCallbacks->appendEndElementNSCallback();
    485         return;
    486     }
    487 
    488     exitText();
    489 
    490     Node* n = m_currentNode;
    491     RefPtr<Node> parent = n->parentNode();
    492     n->finishedParsing();
    493 
    494     // don't load external scripts for standalone documents (for now)
    495     if (n->isElementNode() && m_view && static_cast<Element*>(n)->hasTagName(scriptTag)) {
    496         ASSERT(!m_pendingScript);
    497 
    498         m_requestingScript = true;
    499 
    500         Element* scriptElement = static_cast<Element*>(n);
    501         String scriptHref;
    502 
    503         if (static_cast<Element*>(n)->hasTagName(scriptTag))
    504             scriptHref = scriptElement->getAttribute(srcAttr);
    505 
    506         if (!scriptHref.isEmpty()) {
    507             // we have a src attribute
    508             const AtomicString& charset = scriptElement->getAttribute(charsetAttr);
    509             if ((m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, charset))) {
    510                 m_scriptElement = scriptElement;
    511                 m_pendingScript->ref(this);
    512 
    513                 // m_pendingScript will be 0 if script was already loaded and ref() executed it
    514                 if (m_pendingScript)
    515                     pauseParsing();
    516             } else
    517                 m_scriptElement = 0;
    518 
    519         } else {
    520             String scriptCode = "";
    521             for (Node* child = scriptElement->firstChild(); child; child = child->nextSibling()) {
    522                 if (child->isTextNode() || child->nodeType() == Node::CDATA_SECTION_NODE)
    523                     scriptCode += static_cast<CharacterData*>(child)->data();
    524             }
    525             m_view->frame()->loader()->executeScript(m_doc->URL(), m_scriptStartLine - 1, scriptCode);
    526         }
    527 
    528         m_requestingScript = false;
    529     }
    530 
    531     setCurrentNode(parent.get());
    532 }
    533 
    534 void XMLTokenizer::characters(const XML_Char *s, int len)
    535 {
    536     if (m_parserStopped)
    537         return;
    538 
    539     if (m_parserPaused) {
    540         m_pendingCallbacks->appendCharactersCallback(s, len);
    541         return;
    542     }
    543 
    544     if (m_currentNode->isTextNode() || enterText()) {
    545         ExceptionCode ec = 0;
    546         static_cast<Text*>(m_currentNode)->appendData(toQString(s, len), ec);
    547     }
    548 }
    549 
    550 bool XMLTokenizer::enterText()
    551 {
    552     RefPtr<Node> newNode = new Text(m_doc, "");
    553     if (!m_currentNode->addChild(newNode.get()))
    554         return false;
    555     setCurrentNode(newNode.get());
    556     return true;
    557 }
    558 
    559 void XMLTokenizer::exitText()
    560 {
    561     if (m_parserStopped)
    562         return;
    563 
    564     if (!m_currentNode || !m_currentNode->isTextNode())
    565         return;
    566 
    567     if (m_view && m_currentNode && !m_currentNode->attached())
    568         m_currentNode->attach();
    569 
    570     // FIXME: What's the right thing to do if the parent is really 0?
    571     // Just leaving the current node set to the text node doesn't make much sense.
    572     if (Node* par = m_currentNode->parentNode())
    573         setCurrentNode(par);
    574 }
    575 
    576 void XMLTokenizer::processingInstruction(const XML_Char *target, const XML_Char *data)
    577 {
    578     if (m_parserStopped)
    579         return;
    580 
    581     if (m_parserPaused) {
    582         m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
    583         return;
    584     }
    585 
    586     exitText();
    587 
    588     // ### handle exceptions
    589     int exception = 0;
    590     RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction(
    591         toQString(target), toQString(data), exception);
    592     if (exception)
    593         return;
    594 
    595     if (!m_currentNode->addChild(pi.get()))
    596         return;
    597     if (m_view && !pi->attached())
    598         pi->attach();
    599 
    600     // don't load stylesheets for standalone documents
    601     if (m_doc->frame()) {
    602         m_sawXSLTransform = !m_sawFirstElement && !pi->checkStyleSheet();
    603         if (m_sawXSLTransform)
    604             stopParsing();
    605     }
    606 }
    607 
    608 void XMLTokenizer::comment(const XML_Char *s)
    609 {
    610     if (m_parserStopped)
    611         return;
    612 
    613     if (m_parserPaused) {
    614         m_pendingCallbacks->appendCommentCallback(s);
    615         return;
    616     }
    617 
    618     exitText();
    619 
    620     RefPtr<Node> newNode = m_doc->createComment(toQString(s));
    621     m_currentNode->addChild(newNode.get());
    622     if (m_view && !newNode->attached())
    623         newNode->attach();
    624 }
    625 
    626 void XMLTokenizer::startCdata()
    627 {
    628     if (m_parserStopped)
    629         return;
    630 
    631     if (m_parserPaused) {
    632         m_pendingCallbacks->appendStartCDATABlockCallback();
    633         return;
    634     }
    635 
    636     exitText();
    637 
    638     RefPtr<Node> newNode = new CDATASection(m_doc, "");
    639     if (!m_currentNode->addChild(newNode.get()))
    640         return;
    641     if (m_view && !newNode->attached())
    642         newNode->attach();
    643     setCurrentNode(newNode.get());
    644 }
    645 
    646 void XMLTokenizer::endCdata()
    647 {
    648     if (m_parserStopped)
    649         return;
    650 
    651     if (m_parserPaused) {
    652         m_pendingCallbacks->appendEndCDATABlockCallback();
    653         return;
    654     }
    655 
    656     if (m_currentNode->parentNode() != 0)
    657         setCurrentNode(m_currentNode->parentNode());
    658 }
    659 
    660 static void XMLCALL startElementHandler(void *userdata, const XML_Char *name, const XML_Char **atts)
    661 {
    662     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
    663     tokenizer->startElementNs(name, atts);
    664 }
    665 
    666 static void XMLCALL endElementHandler(void *userdata, const XML_Char *name)
    667 {
    668     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
    669     tokenizer->endElementNs();
    670 }
    671 
    672 static void charactersHandler(void *userdata, const XML_Char *s, int len)
    673 {
    674     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
    675     tokenizer->characters(s, len);
    676 }
    677 
    678 static void processingInstructionHandler(void *userdata, const XML_Char *target, const XML_Char *data)
    679 {
    680     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
    681     tokenizer->processingInstruction(target, data);
    682 }
    683 
    684 static void commentHandler(void *userdata, const XML_Char *comment)
    685 {
    686     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
    687     tokenizer->comment(comment);
    688 }
    689 
    690 static void startCdataHandler(void *userdata)
    691 {
    692     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
    693     tokenizer->startCdata();
    694 }
    695 
    696 static void endCdataHandler(void *userdata)
    697 {
    698     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
    699     tokenizer->endCdata();
    700 }
    701 
    702 static int unknownEncodingHandler(void *userdata, const XML_Char *name, XML_Encoding *info)
    703 {
    704     // Expat doesn't like latin1 so we have to build this map
    705     // to do conversion correctly.
    706     // FIXME: Create a wrapper for expat that looks like libxml.
    707     if (strcasecmp(name, "latin1") == 0)
    708     {
    709         for (int i=0; i<256; i++) {
    710             info->map[i] = i;
    711         }
    712         return XML_STATUS_OK;
    713     }
    714     return XML_STATUS_ERROR;
    715 }
    716 
    717 bool XMLTokenizer::write(const SegmentedString&s, bool /*appendData*/ )
    718 {
    719     String parseString = s.toString();
    720 
    721     if (m_parserStopped || m_sawXSLTransform)
    722         return false;
    723 
    724     if (m_parserPaused) {
    725         m_pendingSrc.append(s);
    726         return false;
    727     }
    728 
    729     if (!m_parser) {
    730         static const UChar BOM = 0xFEFF;
    731         static const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    732         m_parser = XML_ParserCreateNS(BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE", tripletSep);
    733         XML_SetUserData(m_parser, (void *)this);
    734         XML_SetReturnNSTriplet(m_parser, true);
    735 
    736         XML_SetStartElementHandler(m_parser, startElementHandler);
    737         XML_SetEndElementHandler(m_parser, endElementHandler);
    738         XML_SetCharacterDataHandler(m_parser, charactersHandler);
    739         XML_SetProcessingInstructionHandler(m_parser, processingInstructionHandler);
    740         XML_SetCommentHandler(m_parser, commentHandler);
    741         XML_SetStartCdataSectionHandler(m_parser, startCdataHandler);
    742         XML_SetEndCdataSectionHandler(m_parser, endCdataHandler);
    743         XML_SetUnknownEncodingHandler(m_parser, unknownEncodingHandler, NULL);
    744     }
    745 
    746     enum XML_Status result = XML_Parse(m_parser, (const char*)parseString.characters(), sizeof(UChar) * parseString.length(), false);
    747     if (result == XML_STATUS_ERROR) {
    748         reportError();
    749         return false;
    750     }
    751 
    752     return true;
    753 }
    754 
    755 void XMLTokenizer::end()
    756 {
    757     if (m_parser) {
    758         XML_Parse(m_parser, 0, 0, true);
    759         XML_ParserFree(m_parser);
    760         m_parser = 0;
    761     }
    762 
    763     if (m_sawError)
    764         insertErrorMessageBlock();
    765     else {
    766         exitText();
    767         m_doc->updateStyleSelector();
    768     }
    769 
    770     setCurrentNode(0);
    771     m_doc->finishedParsing();
    772 }
    773 
    774 void XMLTokenizer::finish()
    775 {
    776     if (m_parserPaused)
    777         m_finishCalled = true;
    778     else
    779         end();
    780 }
    781 
    782 void XMLTokenizer::reportError()
    783 {
    784     ErrorType type = nonFatal;
    785     enum XML_Error code = XML_GetErrorCode(m_parser);
    786     switch (code) {
    787         case XML_ERROR_NO_MEMORY:
    788             type = fatal;
    789             break;
    790         case XML_ERROR_FINISHED:
    791             type = warning;
    792             break;
    793         default:
    794             type = nonFatal;
    795     }
    796     error(type, XML_ErrorString(code), lineNumber(), columnNumber());
    797 }
    798 
    799 void XMLTokenizer::error(ErrorType type, const char* m, int lineNumber, int columnNumber)
    800 {
    801     if (type == fatal || m_errorCount < maxErrors) {
    802         switch (type) {
    803             case warning:
    804                 m_errorMessages += String::format("warning on line %d at column %d: %s", lineNumber, columnNumber, m);
    805                 break;
    806             case fatal:
    807             case nonFatal:
    808                 m_errorMessages += String::format("error on line %d at column %d: %s", lineNumber, columnNumber, m);
    809         }
    810         ++m_errorCount;
    811     }
    812 
    813     if (type != warning)
    814         m_sawError = true;
    815 
    816     if (type == fatal)
    817         stopParsing();
    818 }
    819 
    820 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages)
    821 {
    822     ExceptionCode ec = 0;
    823     RefPtr<Element> reportElement = doc->createElementNS(xhtmlNamespaceURI, "parsererror", ec);
    824     reportElement->setAttribute(styleAttr, "display:block; pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
    825 
    826     RefPtr<Element> h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
    827     reportElement->appendChild(h3.get(), ec);
    828     h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec);
    829 
    830     RefPtr<Element> fixed = doc->createElementNS(xhtmlNamespaceURI, "div", ec);
    831     reportElement->appendChild(fixed.get(), ec);
    832     fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px");
    833     fixed->appendChild(doc->createTextNode(errorMessages), ec);
    834 
    835     h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
    836     reportElement->appendChild(h3.get(), ec);
    837     h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec);
    838 
    839     return reportElement;
    840 }
    841 
    842 void XMLTokenizer::insertErrorMessageBlock()
    843 {
    844     // One or more errors occurred during parsing of the code. Display an error block to the user above
    845     // the normal content (the DOM tree is created manually and includes line/col info regarding
    846     // where the errors are located)
    847 
    848     // Create elements for display
    849     ExceptionCode ec = 0;
    850     Document* doc = m_doc;
    851     Node* documentElement = doc->documentElement();
    852     if (!documentElement) {
    853         RefPtr<Node> rootElement = doc->createElementNS(xhtmlNamespaceURI, "html", ec);
    854         doc->appendChild(rootElement, ec);
    855         RefPtr<Node> body = doc->createElementNS(xhtmlNamespaceURI, "body", ec);
    856         rootElement->appendChild(body, ec);
    857         documentElement = body.get();
    858     }
    859 
    860     RefPtr<Element> reportElement = createXHTMLParserErrorHeader(doc, m_errorMessages);
    861     documentElement->insertBefore(reportElement, documentElement->firstChild(), ec);
    862     doc->updateRendering();
    863 }
    864 
    865 void XMLTokenizer::notifyFinished(CachedResource *finishedObj)
    866 {
    867     ASSERT(m_pendingScript == finishedObj);
    868 
    869     String cachedScriptUrl = m_pendingScript->url();
    870     String scriptSource = m_pendingScript->script();
    871     bool errorOccurred = m_pendingScript->errorOccurred();
    872     m_pendingScript->deref(this);
    873     m_pendingScript = 0;
    874 
    875     RefPtr<Element> e = m_scriptElement;
    876     m_scriptElement = 0;
    877 
    878     if (errorOccurred)
    879         EventTargetNodeCast(e.get())->dispatchHTMLEvent(errorEvent, true, false);
    880     else {
    881         m_view->frame()->loader()->executeScript(cachedScriptUrl, 0, scriptSource);
    882         EventTargetNodeCast(e.get())->dispatchHTMLEvent(loadEvent, false, false);
    883     }
    884 
    885     m_scriptElement = 0;
    886 
    887     if (!m_requestingScript)
    888         resumeParsing();
    889 }
    890 
    891 bool XMLTokenizer::isWaitingForScripts() const
    892 {
    893     return m_pendingScript != 0;
    894 }
    895 
    896 Tokenizer *newXMLTokenizer(Document *d, FrameView *v)
    897 {
    898     return new XMLTokenizer(d, v);
    899 }
    900 
    901 int XMLTokenizer::lineNumber() const
    902 {
    903     return XML_GetCurrentLineNumber(m_parser);
    904 }
    905 
    906 int XMLTokenizer::columnNumber() const
    907 {
    908     return XML_GetCurrentColumnNumber(m_parser);
    909 }
    910 
    911 void XMLTokenizer::stopParsing()
    912 {
    913     Tokenizer::stopParsing();
    914     if (m_parser)
    915         XML_StopParser(m_parser, 0);
    916 }
    917 
    918 void XMLTokenizer::pauseParsing()
    919 {
    920     if (m_parsingFragment)
    921         return;
    922 
    923     m_parserPaused = true;
    924 }
    925 
    926 void XMLTokenizer::resumeParsing()
    927 {
    928     ASSERT(m_parserPaused);
    929 
    930     m_parserPaused = false;
    931 
    932     // First, execute any pending callbacks
    933     while (!m_pendingCallbacks->isEmpty()) {
    934         m_pendingCallbacks->callAndRemoveFirstCallback(this);
    935 
    936         // A callback paused the parser
    937         if (m_parserPaused)
    938             return;
    939     }
    940 
    941     // Then, write any pending data
    942     SegmentedString rest = m_pendingSrc;
    943     m_pendingSrc.clear();
    944     write(rest, false);
    945 
    946     // Finally, if finish() has been called and write() didn't result
    947     // in any further callbacks being queued, call end()
    948     if (m_finishCalled && m_pendingCallbacks->isEmpty())
    949         end();
    950 }
    951 
    952 // --------------------------------
    953 
    954 bool parseXMLDocumentFragment(const String &string, DocumentFragment *fragment, Element *parent)
    955 {
    956     XMLTokenizer tokenizer(fragment, parent);
    957 
    958     XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
    959     tokenizer.setXMLParser(parser);
    960 
    961     XML_SetUserData(parser, (void *)&tokenizer);
    962     XML_SetReturnNSTriplet(parser, true);
    963 
    964     XML_SetStartElementHandler(parser, startElementHandler);
    965     XML_SetEndElementHandler(parser, endElementHandler);
    966     XML_SetCharacterDataHandler(parser, charactersHandler);
    967     XML_SetProcessingInstructionHandler(parser, processingInstructionHandler);
    968     XML_SetCommentHandler(parser, commentHandler);
    969     XML_SetStartCdataSectionHandler(parser, startCdataHandler);
    970     XML_SetEndCdataSectionHandler(parser, endCdataHandler);
    971 
    972     CString cString = string.utf8();
    973     int result = XML_Parse(parser, cString.data(), cString.length(), true);
    974 
    975     XML_ParserFree(parser);
    976     tokenizer.setXMLParser(0);
    977 
    978     return result != XML_STATUS_ERROR;
    979 }
    980 
    981 // --------------------------------
    982 
    983 struct AttributeParseState {
    984     HashMap<String, String> attributes;
    985     bool gotAttributes;
    986 };
    987 
    988 static void attributesStartElementHandler(void *userData, const XML_Char *name, const XML_Char **atts)
    989 {
    990     if (strcmp(name, "attrs") != 0)
    991         return;
    992 
    993     if (atts[0] == 0 )
    994         return;
    995 
    996     AttributeParseState *state = static_cast<AttributeParseState *>(userData);
    997     state->gotAttributes = true;
    998 
    999     for (int i = 0; atts[i]; i += 2) {
   1000         DeprecatedString attrName = toQString(atts[i]);
   1001         DeprecatedString attrValue = toQString(atts[i+1]);
   1002         state->attributes.set(attrName, attrValue);
   1003     }
   1004 }
   1005 
   1006 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
   1007 {
   1008     AttributeParseState state;
   1009     state.gotAttributes = false;
   1010 
   1011     XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
   1012     XML_SetUserData(parser, (void *)&state);
   1013     XML_SetReturnNSTriplet(parser, true);
   1014 
   1015     XML_SetStartElementHandler(parser, attributesStartElementHandler);
   1016     String input = "<?xml version=\"1.0\"?><attrs " + string.deprecatedString() + " />";
   1017     CString cString = input.deprecatedString().utf8();
   1018     if ( XML_Parse(parser, cString.data(), cString.length(), true) != XML_STATUS_ERROR )
   1019         attrsOK = state.gotAttributes;
   1020     XML_ParserFree(parser);
   1021 
   1022     return state.attributes;
   1023 }
   1024 
   1025 }
   1026