Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
      3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
      4  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/html/parser/HTMLTokenizer.h"
     30 
     31 #include "core/HTMLNames.h"
     32 #include "core/HTMLTokenizerNames.h"
     33 #include "core/html/parser/HTMLEntityParser.h"
     34 #include "core/html/parser/HTMLParserIdioms.h"
     35 #include "core/html/parser/HTMLTreeBuilder.h"
     36 #include "platform/NotImplemented.h"
     37 #include "core/xml/parser/MarkupTokenizerInlines.h"
     38 #include "wtf/ASCIICType.h"
     39 #include "wtf/text/AtomicString.h"
     40 #include "wtf/unicode/Unicode.h"
     41 
     42 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used
     43 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe.
     44 #undef DEFINE_STATIC_LOCAL
     45 
     46 namespace blink {
     47 
     48 using namespace HTMLNames;
     49 
     50 // This has to go in a .cpp file, as the linker doesn't like it being included more than once.
     51 // We don't have an HTMLToken.cpp though, so this is the next best place.
     52 QualifiedName AtomicHTMLToken::nameForAttribute(const HTMLToken::Attribute& attribute) const
     53 {
     54     return QualifiedName(nullAtom, AtomicString(attribute.name), nullAtom);
     55 }
     56 
     57 bool AtomicHTMLToken::usesName() const
     58 {
     59     return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE;
     60 }
     61 
     62 bool AtomicHTMLToken::usesAttributes() const
     63 {
     64     return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
     65 }
     66 
     67 static inline UChar toLowerCase(UChar cc)
     68 {
     69     ASSERT(isASCIIUpper(cc));
     70     const int lowerCaseOffset = 0x20;
     71     return cc + lowerCaseOffset;
     72 }
     73 
     74 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const String& string)
     75 {
     76     if (vector.size() != string.length())
     77         return false;
     78 
     79     if (!string.length())
     80         return true;
     81 
     82     return equal(string.impl(), vector.data(), vector.size());
     83 }
     84 
     85 static inline bool isEndTagBufferingState(HTMLTokenizer::State state)
     86 {
     87     switch (state) {
     88     case HTMLTokenizer::RCDATAEndTagOpenState:
     89     case HTMLTokenizer::RCDATAEndTagNameState:
     90     case HTMLTokenizer::RAWTEXTEndTagOpenState:
     91     case HTMLTokenizer::RAWTEXTEndTagNameState:
     92     case HTMLTokenizer::ScriptDataEndTagOpenState:
     93     case HTMLTokenizer::ScriptDataEndTagNameState:
     94     case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
     95     case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
     96         return true;
     97     default:
     98         return false;
     99     }
    100 }
    101 
    102 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
    103 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
    104 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
    105 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
    106 
    107 HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
    108     : m_inputStreamPreprocessor(this)
    109     , m_options(options)
    110 {
    111     reset();
    112 }
    113 
    114 HTMLTokenizer::~HTMLTokenizer()
    115 {
    116 }
    117 
    118 void HTMLTokenizer::reset()
    119 {
    120     m_state = HTMLTokenizer::DataState;
    121     m_token = 0;
    122     m_forceNullCharacterReplacement = false;
    123     m_shouldAllowCDATA = false;
    124     m_additionalAllowedCharacter = '\0';
    125 }
    126 
    127 bool HTMLTokenizer::canCreateCheckpoint() const
    128 {
    129     if (!m_appropriateEndTagName.isEmpty())
    130         return false;
    131     if (!m_temporaryBuffer.isEmpty())
    132         return false;
    133     if (!m_bufferedEndTagName.isEmpty())
    134         return false;
    135     return true;
    136 }
    137 
    138 void HTMLTokenizer::createCheckpoint(Checkpoint& result) const
    139 {
    140     ASSERT(canCreateCheckpoint());
    141     result.options = m_options;
    142     result.state = m_state;
    143     result.additionalAllowedCharacter = m_additionalAllowedCharacter;
    144     result.skipNextNewLine = m_inputStreamPreprocessor.skipNextNewLine();
    145     result.shouldAllowCDATA = m_shouldAllowCDATA;
    146 }
    147 
    148 void HTMLTokenizer::restoreFromCheckpoint(const Checkpoint& checkpoint)
    149 {
    150     m_token = 0;
    151     m_options = checkpoint.options;
    152     m_state = checkpoint.state;
    153     m_additionalAllowedCharacter = checkpoint.additionalAllowedCharacter;
    154     m_inputStreamPreprocessor.reset(checkpoint.skipNextNewLine);
    155     m_shouldAllowCDATA = checkpoint.shouldAllowCDATA;
    156 }
    157 
    158 inline bool HTMLTokenizer::processEntity(SegmentedString& source)
    159 {
    160     bool notEnoughCharacters = false;
    161     DecodedHTMLEntity decodedEntity;
    162     bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
    163     if (notEnoughCharacters)
    164         return false;
    165     if (!success) {
    166         ASSERT(decodedEntity.isEmpty());
    167         bufferCharacter('&');
    168     } else {
    169         for (unsigned i = 0; i < decodedEntity.length; ++i)
    170             bufferCharacter(decodedEntity.data[i]);
    171     }
    172     return true;
    173 }
    174 
    175 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
    176 {
    177     ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
    178     source.advanceAndUpdateLineNumber();
    179     if (m_token->type() == HTMLToken::Character)
    180         return true;
    181     m_token->beginEndTag(m_bufferedEndTagName);
    182     m_bufferedEndTagName.clear();
    183     m_appropriateEndTagName.clear();
    184     m_temporaryBuffer.clear();
    185     return false;
    186 }
    187 
    188 #define FLUSH_AND_ADVANCE_TO(stateName)                                    \
    189     do {                                                                   \
    190         m_state = HTMLTokenizer::stateName;                           \
    191         if (flushBufferedEndTag(source))                                   \
    192             return true;                                                   \
    193         if (source.isEmpty()                                               \
    194             || !m_inputStreamPreprocessor.peek(source))                    \
    195             return haveBufferedCharacterToken();                           \
    196         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
    197         goto stateName;                                                    \
    198     } while (false)
    199 
    200 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, HTMLTokenizer::State state)
    201 {
    202     m_state = state;
    203     flushBufferedEndTag(source);
    204     return true;
    205 }
    206 
    207 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
    208 {
    209     // If we have a token in progress, then we're supposed to be called back
    210     // with the same token so we can finish it.
    211     ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
    212     m_token = &token;
    213 
    214     if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
    215         // FIXME: This should call flushBufferedEndTag().
    216         // We started an end tag during our last iteration.
    217         m_token->beginEndTag(m_bufferedEndTagName);
    218         m_bufferedEndTagName.clear();
    219         m_appropriateEndTagName.clear();
    220         m_temporaryBuffer.clear();
    221         if (m_state == HTMLTokenizer::DataState) {
    222             // We're back in the data state, so we must be done with the tag.
    223             return true;
    224         }
    225     }
    226 
    227     if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
    228         return haveBufferedCharacterToken();
    229     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
    230 
    231     // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
    232     switch (m_state) {
    233     HTML_BEGIN_STATE(DataState) {
    234         if (cc == '&')
    235             HTML_ADVANCE_TO(CharacterReferenceInDataState);
    236         else if (cc == '<') {
    237             if (m_token->type() == HTMLToken::Character) {
    238                 // We have a bunch of character tokens queued up that we
    239                 // are emitting lazily here.
    240                 return true;
    241             }
    242             HTML_ADVANCE_TO(TagOpenState);
    243         } else if (cc == kEndOfFileMarker)
    244             return emitEndOfFile(source);
    245         else {
    246             bufferCharacter(cc);
    247             HTML_ADVANCE_TO(DataState);
    248         }
    249     }
    250     END_STATE()
    251 
    252     HTML_BEGIN_STATE(CharacterReferenceInDataState) {
    253         if (!processEntity(source))
    254             return haveBufferedCharacterToken();
    255         HTML_SWITCH_TO(DataState);
    256     }
    257     END_STATE()
    258 
    259     HTML_BEGIN_STATE(RCDATAState) {
    260         if (cc == '&')
    261             HTML_ADVANCE_TO(CharacterReferenceInRCDATAState);
    262         else if (cc == '<')
    263             HTML_ADVANCE_TO(RCDATALessThanSignState);
    264         else if (cc == kEndOfFileMarker)
    265             return emitEndOfFile(source);
    266         else {
    267             bufferCharacter(cc);
    268             HTML_ADVANCE_TO(RCDATAState);
    269         }
    270     }
    271     END_STATE()
    272 
    273     HTML_BEGIN_STATE(CharacterReferenceInRCDATAState) {
    274         if (!processEntity(source))
    275             return haveBufferedCharacterToken();
    276         HTML_SWITCH_TO(RCDATAState);
    277     }
    278     END_STATE()
    279 
    280     HTML_BEGIN_STATE(RAWTEXTState) {
    281         if (cc == '<')
    282             HTML_ADVANCE_TO(RAWTEXTLessThanSignState);
    283         else if (cc == kEndOfFileMarker)
    284             return emitEndOfFile(source);
    285         else {
    286             bufferCharacter(cc);
    287             HTML_ADVANCE_TO(RAWTEXTState);
    288         }
    289     }
    290     END_STATE()
    291 
    292     HTML_BEGIN_STATE(ScriptDataState) {
    293         if (cc == '<')
    294             HTML_ADVANCE_TO(ScriptDataLessThanSignState);
    295         else if (cc == kEndOfFileMarker)
    296             return emitEndOfFile(source);
    297         else {
    298             bufferCharacter(cc);
    299             HTML_ADVANCE_TO(ScriptDataState);
    300         }
    301     }
    302     END_STATE()
    303 
    304     HTML_BEGIN_STATE(PLAINTEXTState) {
    305         if (cc == kEndOfFileMarker)
    306             return emitEndOfFile(source);
    307         bufferCharacter(cc);
    308         HTML_ADVANCE_TO(PLAINTEXTState);
    309     }
    310     END_STATE()
    311 
    312     HTML_BEGIN_STATE(TagOpenState) {
    313         if (cc == '!')
    314             HTML_ADVANCE_TO(MarkupDeclarationOpenState);
    315         else if (cc == '/')
    316             HTML_ADVANCE_TO(EndTagOpenState);
    317         else if (isASCIIUpper(cc)) {
    318             m_token->beginStartTag(toLowerCase(cc));
    319             HTML_ADVANCE_TO(TagNameState);
    320         } else if (isASCIILower(cc)) {
    321             m_token->beginStartTag(cc);
    322             HTML_ADVANCE_TO(TagNameState);
    323         } else if (cc == '?') {
    324             parseError();
    325             // The spec consumes the current character before switching
    326             // to the bogus comment state, but it's easier to implement
    327             // if we reconsume the current character.
    328             HTML_RECONSUME_IN(BogusCommentState);
    329         } else {
    330             parseError();
    331             bufferCharacter('<');
    332             HTML_RECONSUME_IN(DataState);
    333         }
    334     }
    335     END_STATE()
    336 
    337     HTML_BEGIN_STATE(EndTagOpenState) {
    338         if (isASCIIUpper(cc)) {
    339             m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
    340             m_appropriateEndTagName.clear();
    341             HTML_ADVANCE_TO(TagNameState);
    342         } else if (isASCIILower(cc)) {
    343             m_token->beginEndTag(static_cast<LChar>(cc));
    344             m_appropriateEndTagName.clear();
    345             HTML_ADVANCE_TO(TagNameState);
    346         } else if (cc == '>') {
    347             parseError();
    348             HTML_ADVANCE_TO(DataState);
    349         } else if (cc == kEndOfFileMarker) {
    350             parseError();
    351             bufferCharacter('<');
    352             bufferCharacter('/');
    353             HTML_RECONSUME_IN(DataState);
    354         } else {
    355             parseError();
    356             HTML_RECONSUME_IN(BogusCommentState);
    357         }
    358     }
    359     END_STATE()
    360 
    361     HTML_BEGIN_STATE(TagNameState) {
    362         if (isTokenizerWhitespace(cc))
    363             HTML_ADVANCE_TO(BeforeAttributeNameState);
    364         else if (cc == '/')
    365             HTML_ADVANCE_TO(SelfClosingStartTagState);
    366         else if (cc == '>')
    367             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    368         else if (isASCIIUpper(cc)) {
    369             m_token->appendToName(toLowerCase(cc));
    370             HTML_ADVANCE_TO(TagNameState);
    371         } else if (cc == kEndOfFileMarker) {
    372             parseError();
    373             HTML_RECONSUME_IN(DataState);
    374         } else {
    375             m_token->appendToName(cc);
    376             HTML_ADVANCE_TO(TagNameState);
    377         }
    378     }
    379     END_STATE()
    380 
    381     HTML_BEGIN_STATE(RCDATALessThanSignState) {
    382         if (cc == '/') {
    383             m_temporaryBuffer.clear();
    384             ASSERT(m_bufferedEndTagName.isEmpty());
    385             HTML_ADVANCE_TO(RCDATAEndTagOpenState);
    386         } else {
    387             bufferCharacter('<');
    388             HTML_RECONSUME_IN(RCDATAState);
    389         }
    390     }
    391     END_STATE()
    392 
    393     HTML_BEGIN_STATE(RCDATAEndTagOpenState) {
    394         if (isASCIIUpper(cc)) {
    395             m_temporaryBuffer.append(static_cast<LChar>(cc));
    396             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    397             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    398         } else if (isASCIILower(cc)) {
    399             m_temporaryBuffer.append(static_cast<LChar>(cc));
    400             addToPossibleEndTag(static_cast<LChar>(cc));
    401             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    402         } else {
    403             bufferCharacter('<');
    404             bufferCharacter('/');
    405             HTML_RECONSUME_IN(RCDATAState);
    406         }
    407     }
    408     END_STATE()
    409 
    410     HTML_BEGIN_STATE(RCDATAEndTagNameState) {
    411         if (isASCIIUpper(cc)) {
    412             m_temporaryBuffer.append(static_cast<LChar>(cc));
    413             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    414             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    415         } else if (isASCIILower(cc)) {
    416             m_temporaryBuffer.append(static_cast<LChar>(cc));
    417             addToPossibleEndTag(static_cast<LChar>(cc));
    418             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    419         } else {
    420             if (isTokenizerWhitespace(cc)) {
    421                 if (isAppropriateEndTag()) {
    422                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    423                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    424                 }
    425             } else if (cc == '/') {
    426                 if (isAppropriateEndTag()) {
    427                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    428                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    429                 }
    430             } else if (cc == '>') {
    431                 if (isAppropriateEndTag()) {
    432                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    433                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    434                 }
    435             }
    436             bufferCharacter('<');
    437             bufferCharacter('/');
    438             m_token->appendToCharacter(m_temporaryBuffer);
    439             m_bufferedEndTagName.clear();
    440             m_temporaryBuffer.clear();
    441             HTML_RECONSUME_IN(RCDATAState);
    442         }
    443     }
    444     END_STATE()
    445 
    446     HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {
    447         if (cc == '/') {
    448             m_temporaryBuffer.clear();
    449             ASSERT(m_bufferedEndTagName.isEmpty());
    450             HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);
    451         } else {
    452             bufferCharacter('<');
    453             HTML_RECONSUME_IN(RAWTEXTState);
    454         }
    455     }
    456     END_STATE()
    457 
    458     HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {
    459         if (isASCIIUpper(cc)) {
    460             m_temporaryBuffer.append(static_cast<LChar>(cc));
    461             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    462             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    463         } else if (isASCIILower(cc)) {
    464             m_temporaryBuffer.append(static_cast<LChar>(cc));
    465             addToPossibleEndTag(static_cast<LChar>(cc));
    466             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    467         } else {
    468             bufferCharacter('<');
    469             bufferCharacter('/');
    470             HTML_RECONSUME_IN(RAWTEXTState);
    471         }
    472     }
    473     END_STATE()
    474 
    475     HTML_BEGIN_STATE(RAWTEXTEndTagNameState) {
    476         if (isASCIIUpper(cc)) {
    477             m_temporaryBuffer.append(static_cast<LChar>(cc));
    478             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    479             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    480         } else if (isASCIILower(cc)) {
    481             m_temporaryBuffer.append(static_cast<LChar>(cc));
    482             addToPossibleEndTag(static_cast<LChar>(cc));
    483             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    484         } else {
    485             if (isTokenizerWhitespace(cc)) {
    486                 if (isAppropriateEndTag()) {
    487                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    488                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    489                 }
    490             } else if (cc == '/') {
    491                 if (isAppropriateEndTag()) {
    492                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    493                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    494                 }
    495             } else if (cc == '>') {
    496                 if (isAppropriateEndTag()) {
    497                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    498                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    499                 }
    500             }
    501             bufferCharacter('<');
    502             bufferCharacter('/');
    503             m_token->appendToCharacter(m_temporaryBuffer);
    504             m_bufferedEndTagName.clear();
    505             m_temporaryBuffer.clear();
    506             HTML_RECONSUME_IN(RAWTEXTState);
    507         }
    508     }
    509     END_STATE()
    510 
    511     HTML_BEGIN_STATE(ScriptDataLessThanSignState) {
    512         if (cc == '/') {
    513             m_temporaryBuffer.clear();
    514             ASSERT(m_bufferedEndTagName.isEmpty());
    515             HTML_ADVANCE_TO(ScriptDataEndTagOpenState);
    516         } else if (cc == '!') {
    517             bufferCharacter('<');
    518             bufferCharacter('!');
    519             HTML_ADVANCE_TO(ScriptDataEscapeStartState);
    520         } else {
    521             bufferCharacter('<');
    522             HTML_RECONSUME_IN(ScriptDataState);
    523         }
    524     }
    525     END_STATE()
    526 
    527     HTML_BEGIN_STATE(ScriptDataEndTagOpenState) {
    528         if (isASCIIUpper(cc)) {
    529             m_temporaryBuffer.append(static_cast<LChar>(cc));
    530             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    531             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    532         } else if (isASCIILower(cc)) {
    533             m_temporaryBuffer.append(static_cast<LChar>(cc));
    534             addToPossibleEndTag(static_cast<LChar>(cc));
    535             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    536         } else {
    537             bufferCharacter('<');
    538             bufferCharacter('/');
    539             HTML_RECONSUME_IN(ScriptDataState);
    540         }
    541     }
    542     END_STATE()
    543 
    544     HTML_BEGIN_STATE(ScriptDataEndTagNameState) {
    545         if (isASCIIUpper(cc)) {
    546             m_temporaryBuffer.append(static_cast<LChar>(cc));
    547             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    548             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    549         } else if (isASCIILower(cc)) {
    550             m_temporaryBuffer.append(static_cast<LChar>(cc));
    551             addToPossibleEndTag(static_cast<LChar>(cc));
    552             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    553         } else {
    554             if (isTokenizerWhitespace(cc)) {
    555                 if (isAppropriateEndTag()) {
    556                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    557                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    558                 }
    559             } else if (cc == '/') {
    560                 if (isAppropriateEndTag()) {
    561                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    562                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    563                 }
    564             } else if (cc == '>') {
    565                 if (isAppropriateEndTag()) {
    566                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    567                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    568                 }
    569             }
    570             bufferCharacter('<');
    571             bufferCharacter('/');
    572             m_token->appendToCharacter(m_temporaryBuffer);
    573             m_bufferedEndTagName.clear();
    574             m_temporaryBuffer.clear();
    575             HTML_RECONSUME_IN(ScriptDataState);
    576         }
    577     }
    578     END_STATE()
    579 
    580     HTML_BEGIN_STATE(ScriptDataEscapeStartState) {
    581         if (cc == '-') {
    582             bufferCharacter(cc);
    583             HTML_ADVANCE_TO(ScriptDataEscapeStartDashState);
    584         } else
    585             HTML_RECONSUME_IN(ScriptDataState);
    586     }
    587     END_STATE()
    588 
    589     HTML_BEGIN_STATE(ScriptDataEscapeStartDashState) {
    590         if (cc == '-') {
    591             bufferCharacter(cc);
    592             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    593         } else
    594             HTML_RECONSUME_IN(ScriptDataState);
    595     }
    596     END_STATE()
    597 
    598     HTML_BEGIN_STATE(ScriptDataEscapedState) {
    599         if (cc == '-') {
    600             bufferCharacter(cc);
    601             HTML_ADVANCE_TO(ScriptDataEscapedDashState);
    602         } else if (cc == '<')
    603             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    604         else if (cc == kEndOfFileMarker) {
    605             parseError();
    606             HTML_RECONSUME_IN(DataState);
    607         } else {
    608             bufferCharacter(cc);
    609             HTML_ADVANCE_TO(ScriptDataEscapedState);
    610         }
    611     }
    612     END_STATE()
    613 
    614     HTML_BEGIN_STATE(ScriptDataEscapedDashState) {
    615         if (cc == '-') {
    616             bufferCharacter(cc);
    617             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    618         } else if (cc == '<')
    619             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    620         else if (cc == kEndOfFileMarker) {
    621             parseError();
    622             HTML_RECONSUME_IN(DataState);
    623         } else {
    624             bufferCharacter(cc);
    625             HTML_ADVANCE_TO(ScriptDataEscapedState);
    626         }
    627     }
    628     END_STATE()
    629 
    630     HTML_BEGIN_STATE(ScriptDataEscapedDashDashState) {
    631         if (cc == '-') {
    632             bufferCharacter(cc);
    633             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    634         } else if (cc == '<')
    635             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    636         else if (cc == '>') {
    637             bufferCharacter(cc);
    638             HTML_ADVANCE_TO(ScriptDataState);
    639         } else if (cc == kEndOfFileMarker) {
    640             parseError();
    641             HTML_RECONSUME_IN(DataState);
    642         } else {
    643             bufferCharacter(cc);
    644             HTML_ADVANCE_TO(ScriptDataEscapedState);
    645         }
    646     }
    647     END_STATE()
    648 
    649     HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
    650         if (cc == '/') {
    651             m_temporaryBuffer.clear();
    652             ASSERT(m_bufferedEndTagName.isEmpty());
    653             HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
    654         } else if (isASCIIUpper(cc)) {
    655             bufferCharacter('<');
    656             bufferCharacter(cc);
    657             m_temporaryBuffer.clear();
    658             m_temporaryBuffer.append(toLowerCase(cc));
    659             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    660         } else if (isASCIILower(cc)) {
    661             bufferCharacter('<');
    662             bufferCharacter(cc);
    663             m_temporaryBuffer.clear();
    664             m_temporaryBuffer.append(static_cast<LChar>(cc));
    665             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    666         } else {
    667             bufferCharacter('<');
    668             HTML_RECONSUME_IN(ScriptDataEscapedState);
    669         }
    670     }
    671     END_STATE()
    672 
    673     HTML_BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
    674         if (isASCIIUpper(cc)) {
    675             m_temporaryBuffer.append(static_cast<LChar>(cc));
    676             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    677             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    678         } else if (isASCIILower(cc)) {
    679             m_temporaryBuffer.append(static_cast<LChar>(cc));
    680             addToPossibleEndTag(static_cast<LChar>(cc));
    681             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    682         } else {
    683             bufferCharacter('<');
    684             bufferCharacter('/');
    685             HTML_RECONSUME_IN(ScriptDataEscapedState);
    686         }
    687     }
    688     END_STATE()
    689 
    690     HTML_BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
    691         if (isASCIIUpper(cc)) {
    692             m_temporaryBuffer.append(static_cast<LChar>(cc));
    693             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    694             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    695         } else if (isASCIILower(cc)) {
    696             m_temporaryBuffer.append(static_cast<LChar>(cc));
    697             addToPossibleEndTag(static_cast<LChar>(cc));
    698             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    699         } else {
    700             if (isTokenizerWhitespace(cc)) {
    701                 if (isAppropriateEndTag()) {
    702                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    703                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    704                 }
    705             } else if (cc == '/') {
    706                 if (isAppropriateEndTag()) {
    707                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    708                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    709                 }
    710             } else if (cc == '>') {
    711                 if (isAppropriateEndTag()) {
    712                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    713                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    714                 }
    715             }
    716             bufferCharacter('<');
    717             bufferCharacter('/');
    718             m_token->appendToCharacter(m_temporaryBuffer);
    719             m_bufferedEndTagName.clear();
    720             m_temporaryBuffer.clear();
    721             HTML_RECONSUME_IN(ScriptDataEscapedState);
    722         }
    723     }
    724     END_STATE()
    725 
    726     HTML_BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
    727         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
    728             bufferCharacter(cc);
    729             if (temporaryBufferIs(scriptTag.localName()))
    730                 HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    731             else
    732                 HTML_ADVANCE_TO(ScriptDataEscapedState);
    733         } else if (isASCIIUpper(cc)) {
    734             bufferCharacter(cc);
    735             m_temporaryBuffer.append(toLowerCase(cc));
    736             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    737         } else if (isASCIILower(cc)) {
    738             bufferCharacter(cc);
    739             m_temporaryBuffer.append(static_cast<LChar>(cc));
    740             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    741         } else
    742             HTML_RECONSUME_IN(ScriptDataEscapedState);
    743     }
    744     END_STATE()
    745 
    746     HTML_BEGIN_STATE(ScriptDataDoubleEscapedState) {
    747         if (cc == '-') {
    748             bufferCharacter(cc);
    749             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashState);
    750         } else if (cc == '<') {
    751             bufferCharacter(cc);
    752             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    753         } else if (cc == kEndOfFileMarker) {
    754             parseError();
    755             HTML_RECONSUME_IN(DataState);
    756         } else {
    757             bufferCharacter(cc);
    758             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    759         }
    760     }
    761     END_STATE()
    762 
    763     HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
    764         if (cc == '-') {
    765             bufferCharacter(cc);
    766             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
    767         } else if (cc == '<') {
    768             bufferCharacter(cc);
    769             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    770         } else if (cc == kEndOfFileMarker) {
    771             parseError();
    772             HTML_RECONSUME_IN(DataState);
    773         } else {
    774             bufferCharacter(cc);
    775             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    776         }
    777     }
    778     END_STATE()
    779 
    780     HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
    781         if (cc == '-') {
    782             bufferCharacter(cc);
    783             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
    784         } else if (cc == '<') {
    785             bufferCharacter(cc);
    786             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    787         } else if (cc == '>') {
    788             bufferCharacter(cc);
    789             HTML_ADVANCE_TO(ScriptDataState);
    790         } else if (cc == kEndOfFileMarker) {
    791             parseError();
    792             HTML_RECONSUME_IN(DataState);
    793         } else {
    794             bufferCharacter(cc);
    795             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    796         }
    797     }
    798     END_STATE()
    799 
    800     HTML_BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
    801         if (cc == '/') {
    802             bufferCharacter(cc);
    803             m_temporaryBuffer.clear();
    804             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    805         } else
    806             HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
    807     }
    808     END_STATE()
    809 
    810     HTML_BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
    811         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
    812             bufferCharacter(cc);
    813             if (temporaryBufferIs(scriptTag.localName()))
    814                 HTML_ADVANCE_TO(ScriptDataEscapedState);
    815             else
    816                 HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    817         } else if (isASCIIUpper(cc)) {
    818             bufferCharacter(cc);
    819             m_temporaryBuffer.append(toLowerCase(cc));
    820             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    821         } else if (isASCIILower(cc)) {
    822             bufferCharacter(cc);
    823             m_temporaryBuffer.append(static_cast<LChar>(cc));
    824             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    825         } else
    826             HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
    827     }
    828     END_STATE()
    829 
    830     HTML_BEGIN_STATE(BeforeAttributeNameState) {
    831         if (isTokenizerWhitespace(cc))
    832             HTML_ADVANCE_TO(BeforeAttributeNameState);
    833         else if (cc == '/')
    834             HTML_ADVANCE_TO(SelfClosingStartTagState);
    835         else if (cc == '>')
    836             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    837         else if (isASCIIUpper(cc)) {
    838             m_token->addNewAttribute();
    839             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    840             m_token->appendToAttributeName(toLowerCase(cc));
    841             HTML_ADVANCE_TO(AttributeNameState);
    842         } else if (cc == kEndOfFileMarker) {
    843             parseError();
    844             HTML_RECONSUME_IN(DataState);
    845         } else {
    846             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
    847                 parseError();
    848             m_token->addNewAttribute();
    849             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    850             m_token->appendToAttributeName(cc);
    851             HTML_ADVANCE_TO(AttributeNameState);
    852         }
    853     }
    854     END_STATE()
    855 
    856     HTML_BEGIN_STATE(AttributeNameState) {
    857         if (isTokenizerWhitespace(cc)) {
    858             m_token->endAttributeName(source.numberOfCharactersConsumed());
    859             HTML_ADVANCE_TO(AfterAttributeNameState);
    860         } else if (cc == '/') {
    861             m_token->endAttributeName(source.numberOfCharactersConsumed());
    862             HTML_ADVANCE_TO(SelfClosingStartTagState);
    863         } else if (cc == '=') {
    864             m_token->endAttributeName(source.numberOfCharactersConsumed());
    865             HTML_ADVANCE_TO(BeforeAttributeValueState);
    866         } else if (cc == '>') {
    867             m_token->endAttributeName(source.numberOfCharactersConsumed());
    868             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    869         } else if (isASCIIUpper(cc)) {
    870             m_token->appendToAttributeName(toLowerCase(cc));
    871             HTML_ADVANCE_TO(AttributeNameState);
    872         } else if (cc == kEndOfFileMarker) {
    873             parseError();
    874             m_token->endAttributeName(source.numberOfCharactersConsumed());
    875             HTML_RECONSUME_IN(DataState);
    876         } else {
    877             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
    878                 parseError();
    879             m_token->appendToAttributeName(cc);
    880             HTML_ADVANCE_TO(AttributeNameState);
    881         }
    882     }
    883     END_STATE()
    884 
    885     HTML_BEGIN_STATE(AfterAttributeNameState) {
    886         if (isTokenizerWhitespace(cc))
    887             HTML_ADVANCE_TO(AfterAttributeNameState);
    888         else if (cc == '/')
    889             HTML_ADVANCE_TO(SelfClosingStartTagState);
    890         else if (cc == '=')
    891             HTML_ADVANCE_TO(BeforeAttributeValueState);
    892         else if (cc == '>')
    893             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    894         else if (isASCIIUpper(cc)) {
    895             m_token->addNewAttribute();
    896             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    897             m_token->appendToAttributeName(toLowerCase(cc));
    898             HTML_ADVANCE_TO(AttributeNameState);
    899         } else if (cc == kEndOfFileMarker) {
    900             parseError();
    901             HTML_RECONSUME_IN(DataState);
    902         } else {
    903             if (cc == '"' || cc == '\'' || cc == '<')
    904                 parseError();
    905             m_token->addNewAttribute();
    906             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    907             m_token->appendToAttributeName(cc);
    908             HTML_ADVANCE_TO(AttributeNameState);
    909         }
    910     }
    911     END_STATE()
    912 
    913     HTML_BEGIN_STATE(BeforeAttributeValueState) {
    914         if (isTokenizerWhitespace(cc))
    915             HTML_ADVANCE_TO(BeforeAttributeValueState);
    916         else if (cc == '"') {
    917             m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    918             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
    919         } else if (cc == '&') {
    920             m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    921             HTML_RECONSUME_IN(AttributeValueUnquotedState);
    922         } else if (cc == '\'') {
    923             m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    924             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
    925         } else if (cc == '>') {
    926             parseError();
    927             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    928         } else if (cc == kEndOfFileMarker) {
    929             parseError();
    930             HTML_RECONSUME_IN(DataState);
    931         } else {
    932             if (cc == '<' || cc == '=' || cc == '`')
    933                 parseError();
    934             m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    935             m_token->appendToAttributeValue(cc);
    936             HTML_ADVANCE_TO(AttributeValueUnquotedState);
    937         }
    938     }
    939     END_STATE()
    940 
    941     HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
    942         if (cc == '"') {
    943             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    944             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
    945         } else if (cc == '&') {
    946             m_additionalAllowedCharacter = '"';
    947             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    948         } else if (cc == kEndOfFileMarker) {
    949             parseError();
    950             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    951             HTML_RECONSUME_IN(DataState);
    952         } else {
    953             m_token->appendToAttributeValue(cc);
    954             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
    955         }
    956     }
    957     END_STATE()
    958 
    959     HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
    960         if (cc == '\'') {
    961             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    962             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
    963         } else if (cc == '&') {
    964             m_additionalAllowedCharacter = '\'';
    965             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    966         } else if (cc == kEndOfFileMarker) {
    967             parseError();
    968             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    969             HTML_RECONSUME_IN(DataState);
    970         } else {
    971             m_token->appendToAttributeValue(cc);
    972             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
    973         }
    974     }
    975     END_STATE()
    976 
    977     HTML_BEGIN_STATE(AttributeValueUnquotedState) {
    978         if (isTokenizerWhitespace(cc)) {
    979             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    980             HTML_ADVANCE_TO(BeforeAttributeNameState);
    981         } else if (cc == '&') {
    982             m_additionalAllowedCharacter = '>';
    983             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    984         } else if (cc == '>') {
    985             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    986             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    987         } else if (cc == kEndOfFileMarker) {
    988             parseError();
    989             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    990             HTML_RECONSUME_IN(DataState);
    991         } else {
    992             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
    993                 parseError();
    994             m_token->appendToAttributeValue(cc);
    995             HTML_ADVANCE_TO(AttributeValueUnquotedState);
    996         }
    997     }
    998     END_STATE()
    999 
   1000     HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
   1001         bool notEnoughCharacters = false;
   1002         DecodedHTMLEntity decodedEntity;
   1003         bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
   1004         if (notEnoughCharacters)
   1005             return haveBufferedCharacterToken();
   1006         if (!success) {
   1007             ASSERT(decodedEntity.isEmpty());
   1008             m_token->appendToAttributeValue('&');
   1009         } else {
   1010             for (unsigned i = 0; i < decodedEntity.length; ++i)
   1011                 m_token->appendToAttributeValue(decodedEntity.data[i]);
   1012         }
   1013         // We're supposed to switch back to the attribute value state that
   1014         // we were in when we were switched into this state. Rather than
   1015         // keeping track of this explictly, we observe that the previous
   1016         // state can be determined by m_additionalAllowedCharacter.
   1017         if (m_additionalAllowedCharacter == '"')
   1018             HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
   1019         else if (m_additionalAllowedCharacter == '\'')
   1020             HTML_SWITCH_TO(AttributeValueSingleQuotedState);
   1021         else if (m_additionalAllowedCharacter == '>')
   1022             HTML_SWITCH_TO(AttributeValueUnquotedState);
   1023         else
   1024             ASSERT_NOT_REACHED();
   1025     }
   1026     END_STATE()
   1027 
   1028     HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
   1029         if (isTokenizerWhitespace(cc))
   1030             HTML_ADVANCE_TO(BeforeAttributeNameState);
   1031         else if (cc == '/')
   1032             HTML_ADVANCE_TO(SelfClosingStartTagState);
   1033         else if (cc == '>')
   1034             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1035         else if (cc == kEndOfFileMarker) {
   1036             parseError();
   1037             HTML_RECONSUME_IN(DataState);
   1038         } else {
   1039             parseError();
   1040             HTML_RECONSUME_IN(BeforeAttributeNameState);
   1041         }
   1042     }
   1043     END_STATE()
   1044 
   1045     HTML_BEGIN_STATE(SelfClosingStartTagState) {
   1046         if (cc == '>') {
   1047             m_token->setSelfClosing();
   1048             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1049         } else if (cc == kEndOfFileMarker) {
   1050             parseError();
   1051             HTML_RECONSUME_IN(DataState);
   1052         } else {
   1053             parseError();
   1054             HTML_RECONSUME_IN(BeforeAttributeNameState);
   1055         }
   1056     }
   1057     END_STATE()
   1058 
   1059     HTML_BEGIN_STATE(BogusCommentState) {
   1060         m_token->beginComment();
   1061         HTML_RECONSUME_IN(ContinueBogusCommentState);
   1062     }
   1063     END_STATE()
   1064 
   1065     HTML_BEGIN_STATE(ContinueBogusCommentState) {
   1066         if (cc == '>')
   1067             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1068         else if (cc == kEndOfFileMarker)
   1069             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1070         else {
   1071             m_token->appendToComment(cc);
   1072             HTML_ADVANCE_TO(ContinueBogusCommentState);
   1073         }
   1074     }
   1075     END_STATE()
   1076 
   1077     HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
   1078         if (cc == '-') {
   1079             SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::dashDash);
   1080             if (result == SegmentedString::DidMatch) {
   1081                 source.advanceAndASSERT('-');
   1082                 source.advanceAndASSERT('-');
   1083                 m_token->beginComment();
   1084                 HTML_SWITCH_TO(CommentStartState);
   1085             } else if (result == SegmentedString::NotEnoughCharacters)
   1086                 return haveBufferedCharacterToken();
   1087         } else if (cc == 'D' || cc == 'd') {
   1088             SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(HTMLTokenizerNames::doctype);
   1089             if (result == SegmentedString::DidMatch) {
   1090                 advanceStringAndASSERTIgnoringCase(source, "doctype");
   1091                 HTML_SWITCH_TO(DOCTYPEState);
   1092             } else if (result == SegmentedString::NotEnoughCharacters)
   1093                 return haveBufferedCharacterToken();
   1094         } else if (cc == '[' && shouldAllowCDATA()) {
   1095             SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::cdata);
   1096             if (result == SegmentedString::DidMatch) {
   1097                 advanceStringAndASSERT(source, "[CDATA[");
   1098                 HTML_SWITCH_TO(CDATASectionState);
   1099             } else if (result == SegmentedString::NotEnoughCharacters)
   1100                 return haveBufferedCharacterToken();
   1101         }
   1102         parseError();
   1103         HTML_RECONSUME_IN(BogusCommentState);
   1104     }
   1105     END_STATE()
   1106 
   1107     HTML_BEGIN_STATE(CommentStartState) {
   1108         if (cc == '-')
   1109             HTML_ADVANCE_TO(CommentStartDashState);
   1110         else if (cc == '>') {
   1111             parseError();
   1112             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1113         } else if (cc == kEndOfFileMarker) {
   1114             parseError();
   1115             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1116         } else {
   1117             m_token->appendToComment(cc);
   1118             HTML_ADVANCE_TO(CommentState);
   1119         }
   1120     }
   1121     END_STATE()
   1122 
   1123     HTML_BEGIN_STATE(CommentStartDashState) {
   1124         if (cc == '-')
   1125             HTML_ADVANCE_TO(CommentEndState);
   1126         else if (cc == '>') {
   1127             parseError();
   1128             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1129         } else if (cc == kEndOfFileMarker) {
   1130             parseError();
   1131             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1132         } else {
   1133             m_token->appendToComment('-');
   1134             m_token->appendToComment(cc);
   1135             HTML_ADVANCE_TO(CommentState);
   1136         }
   1137     }
   1138     END_STATE()
   1139 
   1140     HTML_BEGIN_STATE(CommentState) {
   1141         if (cc == '-')
   1142             HTML_ADVANCE_TO(CommentEndDashState);
   1143         else if (cc == kEndOfFileMarker) {
   1144             parseError();
   1145             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1146         } else {
   1147             m_token->appendToComment(cc);
   1148             HTML_ADVANCE_TO(CommentState);
   1149         }
   1150     }
   1151     END_STATE()
   1152 
   1153     HTML_BEGIN_STATE(CommentEndDashState) {
   1154         if (cc == '-')
   1155             HTML_ADVANCE_TO(CommentEndState);
   1156         else if (cc == kEndOfFileMarker) {
   1157             parseError();
   1158             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1159         } else {
   1160             m_token->appendToComment('-');
   1161             m_token->appendToComment(cc);
   1162             HTML_ADVANCE_TO(CommentState);
   1163         }
   1164     }
   1165     END_STATE()
   1166 
   1167     HTML_BEGIN_STATE(CommentEndState) {
   1168         if (cc == '>')
   1169             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1170         else if (cc == '!') {
   1171             parseError();
   1172             HTML_ADVANCE_TO(CommentEndBangState);
   1173         } else if (cc == '-') {
   1174             parseError();
   1175             m_token->appendToComment('-');
   1176             HTML_ADVANCE_TO(CommentEndState);
   1177         } else if (cc == kEndOfFileMarker) {
   1178             parseError();
   1179             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1180         } else {
   1181             parseError();
   1182             m_token->appendToComment('-');
   1183             m_token->appendToComment('-');
   1184             m_token->appendToComment(cc);
   1185             HTML_ADVANCE_TO(CommentState);
   1186         }
   1187     }
   1188     END_STATE()
   1189 
   1190     HTML_BEGIN_STATE(CommentEndBangState) {
   1191         if (cc == '-') {
   1192             m_token->appendToComment('-');
   1193             m_token->appendToComment('-');
   1194             m_token->appendToComment('!');
   1195             HTML_ADVANCE_TO(CommentEndDashState);
   1196         } else if (cc == '>')
   1197             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1198         else if (cc == kEndOfFileMarker) {
   1199             parseError();
   1200             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1201         } else {
   1202             m_token->appendToComment('-');
   1203             m_token->appendToComment('-');
   1204             m_token->appendToComment('!');
   1205             m_token->appendToComment(cc);
   1206             HTML_ADVANCE_TO(CommentState);
   1207         }
   1208     }
   1209     END_STATE()
   1210 
   1211     HTML_BEGIN_STATE(DOCTYPEState) {
   1212         if (isTokenizerWhitespace(cc))
   1213             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
   1214         else if (cc == kEndOfFileMarker) {
   1215             parseError();
   1216             m_token->beginDOCTYPE();
   1217             m_token->setForceQuirks();
   1218             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1219         } else {
   1220             parseError();
   1221             HTML_RECONSUME_IN(BeforeDOCTYPENameState);
   1222         }
   1223     }
   1224     END_STATE()
   1225 
   1226     HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
   1227         if (isTokenizerWhitespace(cc))
   1228             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
   1229         else if (isASCIIUpper(cc)) {
   1230             m_token->beginDOCTYPE(toLowerCase(cc));
   1231             HTML_ADVANCE_TO(DOCTYPENameState);
   1232         } else if (cc == '>') {
   1233             parseError();
   1234             m_token->beginDOCTYPE();
   1235             m_token->setForceQuirks();
   1236             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1237         } else if (cc == kEndOfFileMarker) {
   1238             parseError();
   1239             m_token->beginDOCTYPE();
   1240             m_token->setForceQuirks();
   1241             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1242         } else {
   1243             m_token->beginDOCTYPE(cc);
   1244             HTML_ADVANCE_TO(DOCTYPENameState);
   1245         }
   1246     }
   1247     END_STATE()
   1248 
   1249     HTML_BEGIN_STATE(DOCTYPENameState) {
   1250         if (isTokenizerWhitespace(cc))
   1251             HTML_ADVANCE_TO(AfterDOCTYPENameState);
   1252         else if (cc == '>')
   1253             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1254         else if (isASCIIUpper(cc)) {
   1255             m_token->appendToName(toLowerCase(cc));
   1256             HTML_ADVANCE_TO(DOCTYPENameState);
   1257         } else if (cc == kEndOfFileMarker) {
   1258             parseError();
   1259             m_token->setForceQuirks();
   1260             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1261         } else {
   1262             m_token->appendToName(cc);
   1263             HTML_ADVANCE_TO(DOCTYPENameState);
   1264         }
   1265     }
   1266     END_STATE()
   1267 
   1268     HTML_BEGIN_STATE(AfterDOCTYPENameState) {
   1269         if (isTokenizerWhitespace(cc))
   1270             HTML_ADVANCE_TO(AfterDOCTYPENameState);
   1271         if (cc == '>')
   1272             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1273         else if (cc == kEndOfFileMarker) {
   1274             parseError();
   1275             m_token->setForceQuirks();
   1276             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1277         } else {
   1278             if (cc == 'P' || cc == 'p') {
   1279                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(HTMLTokenizerNames::publicString);
   1280                 if (result == SegmentedString::DidMatch) {
   1281                     advanceStringAndASSERTIgnoringCase(source, "public");
   1282                     HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
   1283                 } else if (result == SegmentedString::NotEnoughCharacters)
   1284                     return haveBufferedCharacterToken();
   1285             } else if (cc == 'S' || cc == 's') {
   1286                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(HTMLTokenizerNames::system);
   1287                 if (result == SegmentedString::DidMatch) {
   1288                     advanceStringAndASSERTIgnoringCase(source, "system");
   1289                     HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
   1290                 } else if (result == SegmentedString::NotEnoughCharacters)
   1291                     return haveBufferedCharacterToken();
   1292             }
   1293             parseError();
   1294             m_token->setForceQuirks();
   1295             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1296         }
   1297     }
   1298     END_STATE()
   1299 
   1300     HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
   1301         if (isTokenizerWhitespace(cc))
   1302             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
   1303         else if (cc == '"') {
   1304             parseError();
   1305             m_token->setPublicIdentifierToEmptyString();
   1306             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1307         } else if (cc == '\'') {
   1308             parseError();
   1309             m_token->setPublicIdentifierToEmptyString();
   1310             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1311         } else if (cc == '>') {
   1312             parseError();
   1313             m_token->setForceQuirks();
   1314             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1315         } else if (cc == kEndOfFileMarker) {
   1316             parseError();
   1317             m_token->setForceQuirks();
   1318             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1319         } else {
   1320             parseError();
   1321             m_token->setForceQuirks();
   1322             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1323         }
   1324     }
   1325     END_STATE()
   1326 
   1327     HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
   1328         if (isTokenizerWhitespace(cc))
   1329             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
   1330         else if (cc == '"') {
   1331             m_token->setPublicIdentifierToEmptyString();
   1332             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1333         } else if (cc == '\'') {
   1334             m_token->setPublicIdentifierToEmptyString();
   1335             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1336         } else if (cc == '>') {
   1337             parseError();
   1338             m_token->setForceQuirks();
   1339             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1340         } else if (cc == kEndOfFileMarker) {
   1341             parseError();
   1342             m_token->setForceQuirks();
   1343             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1344         } else {
   1345             parseError();
   1346             m_token->setForceQuirks();
   1347             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1348         }
   1349     }
   1350     END_STATE()
   1351 
   1352     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
   1353         if (cc == '"')
   1354             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
   1355         else if (cc == '>') {
   1356             parseError();
   1357             m_token->setForceQuirks();
   1358             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1359         } else if (cc == kEndOfFileMarker) {
   1360             parseError();
   1361             m_token->setForceQuirks();
   1362             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1363         } else {
   1364             m_token->appendToPublicIdentifier(cc);
   1365             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1366         }
   1367     }
   1368     END_STATE()
   1369 
   1370     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
   1371         if (cc == '\'')
   1372             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
   1373         else if (cc == '>') {
   1374             parseError();
   1375             m_token->setForceQuirks();
   1376             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1377         } else if (cc == kEndOfFileMarker) {
   1378             parseError();
   1379             m_token->setForceQuirks();
   1380             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1381         } else {
   1382             m_token->appendToPublicIdentifier(cc);
   1383             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1384         }
   1385     }
   1386     END_STATE()
   1387 
   1388     HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
   1389         if (isTokenizerWhitespace(cc))
   1390             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
   1391         else if (cc == '>')
   1392             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1393         else if (cc == '"') {
   1394             parseError();
   1395             m_token->setSystemIdentifierToEmptyString();
   1396             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1397         } else if (cc == '\'') {
   1398             parseError();
   1399             m_token->setSystemIdentifierToEmptyString();
   1400             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1401         } else if (cc == kEndOfFileMarker) {
   1402             parseError();
   1403             m_token->setForceQuirks();
   1404             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1405         } else {
   1406             parseError();
   1407             m_token->setForceQuirks();
   1408             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1409         }
   1410     }
   1411     END_STATE()
   1412 
   1413     HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
   1414         if (isTokenizerWhitespace(cc))
   1415             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
   1416         else if (cc == '>')
   1417             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1418         else if (cc == '"') {
   1419             m_token->setSystemIdentifierToEmptyString();
   1420             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1421         } else if (cc == '\'') {
   1422             m_token->setSystemIdentifierToEmptyString();
   1423             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1424         } else if (cc == kEndOfFileMarker) {
   1425             parseError();
   1426             m_token->setForceQuirks();
   1427             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1428         } else {
   1429             parseError();
   1430             m_token->setForceQuirks();
   1431             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1432         }
   1433     }
   1434     END_STATE()
   1435 
   1436     HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
   1437         if (isTokenizerWhitespace(cc))
   1438             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
   1439         else if (cc == '"') {
   1440             parseError();
   1441             m_token->setSystemIdentifierToEmptyString();
   1442             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1443         } else if (cc == '\'') {
   1444             parseError();
   1445             m_token->setSystemIdentifierToEmptyString();
   1446             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1447         } else if (cc == '>') {
   1448             parseError();
   1449             m_token->setForceQuirks();
   1450             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1451         } else if (cc == kEndOfFileMarker) {
   1452             parseError();
   1453             m_token->setForceQuirks();
   1454             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1455         } else {
   1456             parseError();
   1457             m_token->setForceQuirks();
   1458             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1459         }
   1460     }
   1461     END_STATE()
   1462 
   1463     HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
   1464         if (isTokenizerWhitespace(cc))
   1465             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
   1466         if (cc == '"') {
   1467             m_token->setSystemIdentifierToEmptyString();
   1468             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1469         } else if (cc == '\'') {
   1470             m_token->setSystemIdentifierToEmptyString();
   1471             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1472         } else if (cc == '>') {
   1473             parseError();
   1474             m_token->setForceQuirks();
   1475             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1476         } else if (cc == kEndOfFileMarker) {
   1477             parseError();
   1478             m_token->setForceQuirks();
   1479             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1480         } else {
   1481             parseError();
   1482             m_token->setForceQuirks();
   1483             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1484         }
   1485     }
   1486     END_STATE()
   1487 
   1488     HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
   1489         if (cc == '"')
   1490             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1491         else if (cc == '>') {
   1492             parseError();
   1493             m_token->setForceQuirks();
   1494             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1495         } else if (cc == kEndOfFileMarker) {
   1496             parseError();
   1497             m_token->setForceQuirks();
   1498             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1499         } else {
   1500             m_token->appendToSystemIdentifier(cc);
   1501             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1502         }
   1503     }
   1504     END_STATE()
   1505 
   1506     HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
   1507         if (cc == '\'')
   1508             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1509         else if (cc == '>') {
   1510             parseError();
   1511             m_token->setForceQuirks();
   1512             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1513         } else if (cc == kEndOfFileMarker) {
   1514             parseError();
   1515             m_token->setForceQuirks();
   1516             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1517         } else {
   1518             m_token->appendToSystemIdentifier(cc);
   1519             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1520         }
   1521     }
   1522     END_STATE()
   1523 
   1524     HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
   1525         if (isTokenizerWhitespace(cc))
   1526             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1527         else if (cc == '>')
   1528             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1529         else if (cc == kEndOfFileMarker) {
   1530             parseError();
   1531             m_token->setForceQuirks();
   1532             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1533         } else {
   1534             parseError();
   1535             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1536         }
   1537     }
   1538     END_STATE()
   1539 
   1540     HTML_BEGIN_STATE(BogusDOCTYPEState) {
   1541         if (cc == '>')
   1542             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1543         else if (cc == kEndOfFileMarker)
   1544             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1545         HTML_ADVANCE_TO(BogusDOCTYPEState);
   1546     }
   1547     END_STATE()
   1548 
   1549     HTML_BEGIN_STATE(CDATASectionState) {
   1550         if (cc == ']')
   1551             HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
   1552         else if (cc == kEndOfFileMarker)
   1553             HTML_RECONSUME_IN(DataState);
   1554         else {
   1555             bufferCharacter(cc);
   1556             HTML_ADVANCE_TO(CDATASectionState);
   1557         }
   1558     }
   1559     END_STATE()
   1560 
   1561     HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
   1562         if (cc == ']')
   1563             HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
   1564         else {
   1565             bufferCharacter(']');
   1566             HTML_RECONSUME_IN(CDATASectionState);
   1567         }
   1568     }
   1569 
   1570     HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
   1571         if (cc == '>')
   1572             HTML_ADVANCE_TO(DataState);
   1573         else {
   1574             bufferCharacter(']');
   1575             bufferCharacter(']');
   1576             HTML_RECONSUME_IN(CDATASectionState);
   1577         }
   1578     }
   1579     END_STATE()
   1580 
   1581     }
   1582 
   1583     ASSERT_NOT_REACHED();
   1584     return false;
   1585 }
   1586 
   1587 String HTMLTokenizer::bufferedCharacters() const
   1588 {
   1589     // FIXME: Add an assert about m_state.
   1590     StringBuilder characters;
   1591     characters.reserveCapacity(numberOfBufferedCharacters());
   1592     characters.append('<');
   1593     characters.append('/');
   1594     characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size());
   1595     return characters.toString();
   1596 }
   1597 
   1598 void HTMLTokenizer::updateStateFor(const String& tagName)
   1599 {
   1600     if (threadSafeMatch(tagName, textareaTag) || threadSafeMatch(tagName, titleTag))
   1601         setState(HTMLTokenizer::RCDATAState);
   1602     else if (threadSafeMatch(tagName, plaintextTag))
   1603         setState(HTMLTokenizer::PLAINTEXTState);
   1604     else if (threadSafeMatch(tagName, scriptTag))
   1605         setState(HTMLTokenizer::ScriptDataState);
   1606     else if (threadSafeMatch(tagName, styleTag)
   1607         || threadSafeMatch(tagName, iframeTag)
   1608         || threadSafeMatch(tagName, xmpTag)
   1609         || (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled)
   1610         || threadSafeMatch(tagName, noframesTag)
   1611         || (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))
   1612         setState(HTMLTokenizer::RAWTEXTState);
   1613 }
   1614 
   1615 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
   1616 {
   1617     return vectorEqualsString(m_temporaryBuffer, expectedString);
   1618 }
   1619 
   1620 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc)
   1621 {
   1622     ASSERT(isEndTagBufferingState(m_state));
   1623     m_bufferedEndTagName.append(cc);
   1624 }
   1625 
   1626 inline bool HTMLTokenizer::isAppropriateEndTag()
   1627 {
   1628     if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
   1629         return false;
   1630 
   1631     size_t numCharacters = m_bufferedEndTagName.size();
   1632 
   1633     for (size_t i = 0; i < numCharacters; i++) {
   1634         if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
   1635             return false;
   1636     }
   1637 
   1638     return true;
   1639 }
   1640 
   1641 inline void HTMLTokenizer::parseError()
   1642 {
   1643     notImplemented();
   1644 }
   1645 
   1646 }
   1647