Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
      3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
      4  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/html/parser/HTMLTokenizer.h"
     30 
     31 #include "HTMLNames.h"
     32 #include "core/html/parser/HTMLEntityParser.h"
     33 #include "core/html/parser/HTMLToken.h"
     34 #include "core/html/parser/HTMLTreeBuilder.h"
     35 #include "core/platform/NotImplemented.h"
     36 #include "core/xml/parser/MarkupTokenizerInlines.h"
     37 #include "wtf/ASCIICType.h"
     38 #include "wtf/text/AtomicString.h"
     39 #include "wtf/unicode/Unicode.h"
     40 
     41 using namespace WTF;
     42 
     43 namespace WebCore {
     44 
     45 using namespace HTMLNames;
     46 
     47 // This has to go in a .cpp file, as the linker doesn't like it being included more than once.
     48 // We don't have an HTMLToken.cpp though, so this is the next best place.
     49 QualifiedName AtomicHTMLToken::nameForAttribute(const HTMLToken::Attribute& attribute) const
     50 {
     51     return QualifiedName(nullAtom, AtomicString(attribute.name), nullAtom);
     52 }
     53 
     54 bool AtomicHTMLToken::usesName() const
     55 {
     56     return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE;
     57 }
     58 
     59 bool AtomicHTMLToken::usesAttributes() const
     60 {
     61     return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
     62 }
     63 
     64 static inline UChar toLowerCase(UChar cc)
     65 {
     66     ASSERT(isASCIIUpper(cc));
     67     const int lowerCaseOffset = 0x20;
     68     return cc + lowerCaseOffset;
     69 }
     70 
     71 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const String& string)
     72 {
     73     if (vector.size() != string.length())
     74         return false;
     75 
     76     if (!string.length())
     77         return true;
     78 
     79     return equal(string.impl(), vector.data(), vector.size());
     80 }
     81 
     82 static inline bool isEndTagBufferingState(HTMLTokenizer::State state)
     83 {
     84     switch (state) {
     85     case HTMLTokenizer::RCDATAEndTagOpenState:
     86     case HTMLTokenizer::RCDATAEndTagNameState:
     87     case HTMLTokenizer::RAWTEXTEndTagOpenState:
     88     case HTMLTokenizer::RAWTEXTEndTagNameState:
     89     case HTMLTokenizer::ScriptDataEndTagOpenState:
     90     case HTMLTokenizer::ScriptDataEndTagNameState:
     91     case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
     92     case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
     93         return true;
     94     default:
     95         return false;
     96     }
     97 }
     98 
     99 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
    100 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
    101 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
    102 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
    103 
    104 HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
    105     : m_inputStreamPreprocessor(this)
    106     , m_options(options)
    107 {
    108     reset();
    109 }
    110 
    111 HTMLTokenizer::~HTMLTokenizer()
    112 {
    113 }
    114 
    115 void HTMLTokenizer::reset()
    116 {
    117     m_state = HTMLTokenizer::DataState;
    118     m_token = 0;
    119     m_forceNullCharacterReplacement = false;
    120     m_shouldAllowCDATA = false;
    121     m_additionalAllowedCharacter = '\0';
    122 }
    123 
    124 bool HTMLTokenizer::canCreateCheckpoint() const
    125 {
    126     if (!m_appropriateEndTagName.isEmpty())
    127         return false;
    128     if (!m_temporaryBuffer.isEmpty())
    129         return false;
    130     if (!m_bufferedEndTagName.isEmpty())
    131         return false;
    132     return true;
    133 }
    134 
    135 void HTMLTokenizer::createCheckpoint(Checkpoint& result) const
    136 {
    137     ASSERT(canCreateCheckpoint());
    138     result.options = m_options;
    139     result.state = m_state;
    140     result.additionalAllowedCharacter = m_additionalAllowedCharacter;
    141     result.skipNextNewLine = m_inputStreamPreprocessor.skipNextNewLine();
    142     result.shouldAllowCDATA = m_shouldAllowCDATA;
    143 }
    144 
    145 void HTMLTokenizer::restoreFromCheckpoint(const Checkpoint& checkpoint)
    146 {
    147     m_token = 0;
    148     m_options = checkpoint.options;
    149     m_state = checkpoint.state;
    150     m_additionalAllowedCharacter = checkpoint.additionalAllowedCharacter;
    151     m_inputStreamPreprocessor.reset(checkpoint.skipNextNewLine);
    152     m_shouldAllowCDATA = checkpoint.shouldAllowCDATA;
    153 }
    154 
    155 inline bool HTMLTokenizer::processEntity(SegmentedString& source)
    156 {
    157     bool notEnoughCharacters = false;
    158     DecodedHTMLEntity decodedEntity;
    159     bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
    160     if (notEnoughCharacters)
    161         return false;
    162     if (!success) {
    163         ASSERT(decodedEntity.isEmpty());
    164         bufferCharacter('&');
    165     } else {
    166         for (unsigned i = 0; i < decodedEntity.length; ++i)
    167             bufferCharacter(decodedEntity.data[i]);
    168     }
    169     return true;
    170 }
    171 
    172 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
    173 {
    174     ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
    175     source.advanceAndUpdateLineNumber();
    176     if (m_token->type() == HTMLToken::Character)
    177         return true;
    178     m_token->beginEndTag(m_bufferedEndTagName);
    179     m_bufferedEndTagName.clear();
    180     m_appropriateEndTagName.clear();
    181     m_temporaryBuffer.clear();
    182     return false;
    183 }
    184 
    185 #define FLUSH_AND_ADVANCE_TO(stateName)                                    \
    186     do {                                                                   \
    187         m_state = HTMLTokenizer::stateName;                           \
    188         if (flushBufferedEndTag(source))                                   \
    189             return true;                                                   \
    190         if (source.isEmpty()                                               \
    191             || !m_inputStreamPreprocessor.peek(source))                    \
    192             return haveBufferedCharacterToken();                           \
    193         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
    194         goto stateName;                                                    \
    195     } while (false)
    196 
    197 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, HTMLTokenizer::State state)
    198 {
    199     m_state = state;
    200     flushBufferedEndTag(source);
    201     return true;
    202 }
    203 
    204 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
    205 {
    206     // If we have a token in progress, then we're supposed to be called back
    207     // with the same token so we can finish it.
    208     ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
    209     m_token = &token;
    210 
    211     if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
    212         // FIXME: This should call flushBufferedEndTag().
    213         // We started an end tag during our last iteration.
    214         m_token->beginEndTag(m_bufferedEndTagName);
    215         m_bufferedEndTagName.clear();
    216         m_appropriateEndTagName.clear();
    217         m_temporaryBuffer.clear();
    218         if (m_state == HTMLTokenizer::DataState) {
    219             // We're back in the data state, so we must be done with the tag.
    220             return true;
    221         }
    222     }
    223 
    224     if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
    225         return haveBufferedCharacterToken();
    226     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
    227 
    228     // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
    229     switch (m_state) {
    230     HTML_BEGIN_STATE(DataState) {
    231         if (cc == '&')
    232             HTML_ADVANCE_TO(CharacterReferenceInDataState);
    233         else if (cc == '<') {
    234             if (m_token->type() == HTMLToken::Character) {
    235                 // We have a bunch of character tokens queued up that we
    236                 // are emitting lazily here.
    237                 return true;
    238             }
    239             HTML_ADVANCE_TO(TagOpenState);
    240         } else if (cc == kEndOfFileMarker)
    241             return emitEndOfFile(source);
    242         else {
    243             bufferCharacter(cc);
    244             HTML_ADVANCE_TO(DataState);
    245         }
    246     }
    247     END_STATE()
    248 
    249     HTML_BEGIN_STATE(CharacterReferenceInDataState) {
    250         if (!processEntity(source))
    251             return haveBufferedCharacterToken();
    252         HTML_SWITCH_TO(DataState);
    253     }
    254     END_STATE()
    255 
    256     HTML_BEGIN_STATE(RCDATAState) {
    257         if (cc == '&')
    258             HTML_ADVANCE_TO(CharacterReferenceInRCDATAState);
    259         else if (cc == '<')
    260             HTML_ADVANCE_TO(RCDATALessThanSignState);
    261         else if (cc == kEndOfFileMarker)
    262             return emitEndOfFile(source);
    263         else {
    264             bufferCharacter(cc);
    265             HTML_ADVANCE_TO(RCDATAState);
    266         }
    267     }
    268     END_STATE()
    269 
    270     HTML_BEGIN_STATE(CharacterReferenceInRCDATAState) {
    271         if (!processEntity(source))
    272             return haveBufferedCharacterToken();
    273         HTML_SWITCH_TO(RCDATAState);
    274     }
    275     END_STATE()
    276 
    277     HTML_BEGIN_STATE(RAWTEXTState) {
    278         if (cc == '<')
    279             HTML_ADVANCE_TO(RAWTEXTLessThanSignState);
    280         else if (cc == kEndOfFileMarker)
    281             return emitEndOfFile(source);
    282         else {
    283             bufferCharacter(cc);
    284             HTML_ADVANCE_TO(RAWTEXTState);
    285         }
    286     }
    287     END_STATE()
    288 
    289     HTML_BEGIN_STATE(ScriptDataState) {
    290         if (cc == '<')
    291             HTML_ADVANCE_TO(ScriptDataLessThanSignState);
    292         else if (cc == kEndOfFileMarker)
    293             return emitEndOfFile(source);
    294         else {
    295             bufferCharacter(cc);
    296             HTML_ADVANCE_TO(ScriptDataState);
    297         }
    298     }
    299     END_STATE()
    300 
    301     HTML_BEGIN_STATE(PLAINTEXTState) {
    302         if (cc == kEndOfFileMarker)
    303             return emitEndOfFile(source);
    304         bufferCharacter(cc);
    305         HTML_ADVANCE_TO(PLAINTEXTState);
    306     }
    307     END_STATE()
    308 
    309     HTML_BEGIN_STATE(TagOpenState) {
    310         if (cc == '!')
    311             HTML_ADVANCE_TO(MarkupDeclarationOpenState);
    312         else if (cc == '/')
    313             HTML_ADVANCE_TO(EndTagOpenState);
    314         else if (isASCIIUpper(cc)) {
    315             m_token->beginStartTag(toLowerCase(cc));
    316             HTML_ADVANCE_TO(TagNameState);
    317         } else if (isASCIILower(cc)) {
    318             m_token->beginStartTag(cc);
    319             HTML_ADVANCE_TO(TagNameState);
    320         } else if (cc == '?') {
    321             parseError();
    322             // The spec consumes the current character before switching
    323             // to the bogus comment state, but it's easier to implement
    324             // if we reconsume the current character.
    325             HTML_RECONSUME_IN(BogusCommentState);
    326         } else {
    327             parseError();
    328             bufferCharacter('<');
    329             HTML_RECONSUME_IN(DataState);
    330         }
    331     }
    332     END_STATE()
    333 
    334     HTML_BEGIN_STATE(EndTagOpenState) {
    335         if (isASCIIUpper(cc)) {
    336             m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
    337             m_appropriateEndTagName.clear();
    338             HTML_ADVANCE_TO(TagNameState);
    339         } else if (isASCIILower(cc)) {
    340             m_token->beginEndTag(static_cast<LChar>(cc));
    341             m_appropriateEndTagName.clear();
    342             HTML_ADVANCE_TO(TagNameState);
    343         } else if (cc == '>') {
    344             parseError();
    345             HTML_ADVANCE_TO(DataState);
    346         } else if (cc == kEndOfFileMarker) {
    347             parseError();
    348             bufferCharacter('<');
    349             bufferCharacter('/');
    350             HTML_RECONSUME_IN(DataState);
    351         } else {
    352             parseError();
    353             HTML_RECONSUME_IN(BogusCommentState);
    354         }
    355     }
    356     END_STATE()
    357 
    358     HTML_BEGIN_STATE(TagNameState) {
    359         if (isTokenizerWhitespace(cc))
    360             HTML_ADVANCE_TO(BeforeAttributeNameState);
    361         else if (cc == '/')
    362             HTML_ADVANCE_TO(SelfClosingStartTagState);
    363         else if (cc == '>')
    364             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    365         else if (isASCIIUpper(cc)) {
    366             m_token->appendToName(toLowerCase(cc));
    367             HTML_ADVANCE_TO(TagNameState);
    368         } else if (cc == kEndOfFileMarker) {
    369             parseError();
    370             HTML_RECONSUME_IN(DataState);
    371         } else {
    372             m_token->appendToName(cc);
    373             HTML_ADVANCE_TO(TagNameState);
    374         }
    375     }
    376     END_STATE()
    377 
    378     HTML_BEGIN_STATE(RCDATALessThanSignState) {
    379         if (cc == '/') {
    380             m_temporaryBuffer.clear();
    381             ASSERT(m_bufferedEndTagName.isEmpty());
    382             HTML_ADVANCE_TO(RCDATAEndTagOpenState);
    383         } else {
    384             bufferCharacter('<');
    385             HTML_RECONSUME_IN(RCDATAState);
    386         }
    387     }
    388     END_STATE()
    389 
    390     HTML_BEGIN_STATE(RCDATAEndTagOpenState) {
    391         if (isASCIIUpper(cc)) {
    392             m_temporaryBuffer.append(static_cast<LChar>(cc));
    393             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    394             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    395         } else if (isASCIILower(cc)) {
    396             m_temporaryBuffer.append(static_cast<LChar>(cc));
    397             addToPossibleEndTag(static_cast<LChar>(cc));
    398             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    399         } else {
    400             bufferCharacter('<');
    401             bufferCharacter('/');
    402             HTML_RECONSUME_IN(RCDATAState);
    403         }
    404     }
    405     END_STATE()
    406 
    407     HTML_BEGIN_STATE(RCDATAEndTagNameState) {
    408         if (isASCIIUpper(cc)) {
    409             m_temporaryBuffer.append(static_cast<LChar>(cc));
    410             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    411             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    412         } else if (isASCIILower(cc)) {
    413             m_temporaryBuffer.append(static_cast<LChar>(cc));
    414             addToPossibleEndTag(static_cast<LChar>(cc));
    415             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    416         } else {
    417             if (isTokenizerWhitespace(cc)) {
    418                 if (isAppropriateEndTag()) {
    419                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    420                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    421                 }
    422             } else if (cc == '/') {
    423                 if (isAppropriateEndTag()) {
    424                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    425                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    426                 }
    427             } else if (cc == '>') {
    428                 if (isAppropriateEndTag()) {
    429                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    430                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    431                 }
    432             }
    433             bufferCharacter('<');
    434             bufferCharacter('/');
    435             m_token->appendToCharacter(m_temporaryBuffer);
    436             m_bufferedEndTagName.clear();
    437             m_temporaryBuffer.clear();
    438             HTML_RECONSUME_IN(RCDATAState);
    439         }
    440     }
    441     END_STATE()
    442 
    443     HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {
    444         if (cc == '/') {
    445             m_temporaryBuffer.clear();
    446             ASSERT(m_bufferedEndTagName.isEmpty());
    447             HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);
    448         } else {
    449             bufferCharacter('<');
    450             HTML_RECONSUME_IN(RAWTEXTState);
    451         }
    452     }
    453     END_STATE()
    454 
    455     HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {
    456         if (isASCIIUpper(cc)) {
    457             m_temporaryBuffer.append(static_cast<LChar>(cc));
    458             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    459             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    460         } else if (isASCIILower(cc)) {
    461             m_temporaryBuffer.append(static_cast<LChar>(cc));
    462             addToPossibleEndTag(static_cast<LChar>(cc));
    463             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    464         } else {
    465             bufferCharacter('<');
    466             bufferCharacter('/');
    467             HTML_RECONSUME_IN(RAWTEXTState);
    468         }
    469     }
    470     END_STATE()
    471 
    472     HTML_BEGIN_STATE(RAWTEXTEndTagNameState) {
    473         if (isASCIIUpper(cc)) {
    474             m_temporaryBuffer.append(static_cast<LChar>(cc));
    475             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    476             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    477         } else if (isASCIILower(cc)) {
    478             m_temporaryBuffer.append(static_cast<LChar>(cc));
    479             addToPossibleEndTag(static_cast<LChar>(cc));
    480             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    481         } else {
    482             if (isTokenizerWhitespace(cc)) {
    483                 if (isAppropriateEndTag()) {
    484                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    485                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    486                 }
    487             } else if (cc == '/') {
    488                 if (isAppropriateEndTag()) {
    489                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    490                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    491                 }
    492             } else if (cc == '>') {
    493                 if (isAppropriateEndTag()) {
    494                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    495                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    496                 }
    497             }
    498             bufferCharacter('<');
    499             bufferCharacter('/');
    500             m_token->appendToCharacter(m_temporaryBuffer);
    501             m_bufferedEndTagName.clear();
    502             m_temporaryBuffer.clear();
    503             HTML_RECONSUME_IN(RAWTEXTState);
    504         }
    505     }
    506     END_STATE()
    507 
    508     HTML_BEGIN_STATE(ScriptDataLessThanSignState) {
    509         if (cc == '/') {
    510             m_temporaryBuffer.clear();
    511             ASSERT(m_bufferedEndTagName.isEmpty());
    512             HTML_ADVANCE_TO(ScriptDataEndTagOpenState);
    513         } else if (cc == '!') {
    514             bufferCharacter('<');
    515             bufferCharacter('!');
    516             HTML_ADVANCE_TO(ScriptDataEscapeStartState);
    517         } else {
    518             bufferCharacter('<');
    519             HTML_RECONSUME_IN(ScriptDataState);
    520         }
    521     }
    522     END_STATE()
    523 
    524     HTML_BEGIN_STATE(ScriptDataEndTagOpenState) {
    525         if (isASCIIUpper(cc)) {
    526             m_temporaryBuffer.append(static_cast<LChar>(cc));
    527             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    528             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    529         } else if (isASCIILower(cc)) {
    530             m_temporaryBuffer.append(static_cast<LChar>(cc));
    531             addToPossibleEndTag(static_cast<LChar>(cc));
    532             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    533         } else {
    534             bufferCharacter('<');
    535             bufferCharacter('/');
    536             HTML_RECONSUME_IN(ScriptDataState);
    537         }
    538     }
    539     END_STATE()
    540 
    541     HTML_BEGIN_STATE(ScriptDataEndTagNameState) {
    542         if (isASCIIUpper(cc)) {
    543             m_temporaryBuffer.append(static_cast<LChar>(cc));
    544             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    545             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    546         } else if (isASCIILower(cc)) {
    547             m_temporaryBuffer.append(static_cast<LChar>(cc));
    548             addToPossibleEndTag(static_cast<LChar>(cc));
    549             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    550         } else {
    551             if (isTokenizerWhitespace(cc)) {
    552                 if (isAppropriateEndTag()) {
    553                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    554                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    555                 }
    556             } else if (cc == '/') {
    557                 if (isAppropriateEndTag()) {
    558                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    559                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    560                 }
    561             } else if (cc == '>') {
    562                 if (isAppropriateEndTag()) {
    563                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    564                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    565                 }
    566             }
    567             bufferCharacter('<');
    568             bufferCharacter('/');
    569             m_token->appendToCharacter(m_temporaryBuffer);
    570             m_bufferedEndTagName.clear();
    571             m_temporaryBuffer.clear();
    572             HTML_RECONSUME_IN(ScriptDataState);
    573         }
    574     }
    575     END_STATE()
    576 
    577     HTML_BEGIN_STATE(ScriptDataEscapeStartState) {
    578         if (cc == '-') {
    579             bufferCharacter(cc);
    580             HTML_ADVANCE_TO(ScriptDataEscapeStartDashState);
    581         } else
    582             HTML_RECONSUME_IN(ScriptDataState);
    583     }
    584     END_STATE()
    585 
    586     HTML_BEGIN_STATE(ScriptDataEscapeStartDashState) {
    587         if (cc == '-') {
    588             bufferCharacter(cc);
    589             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    590         } else
    591             HTML_RECONSUME_IN(ScriptDataState);
    592     }
    593     END_STATE()
    594 
    595     HTML_BEGIN_STATE(ScriptDataEscapedState) {
    596         if (cc == '-') {
    597             bufferCharacter(cc);
    598             HTML_ADVANCE_TO(ScriptDataEscapedDashState);
    599         } else if (cc == '<')
    600             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    601         else if (cc == kEndOfFileMarker) {
    602             parseError();
    603             HTML_RECONSUME_IN(DataState);
    604         } else {
    605             bufferCharacter(cc);
    606             HTML_ADVANCE_TO(ScriptDataEscapedState);
    607         }
    608     }
    609     END_STATE()
    610 
    611     HTML_BEGIN_STATE(ScriptDataEscapedDashState) {
    612         if (cc == '-') {
    613             bufferCharacter(cc);
    614             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    615         } else if (cc == '<')
    616             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    617         else if (cc == kEndOfFileMarker) {
    618             parseError();
    619             HTML_RECONSUME_IN(DataState);
    620         } else {
    621             bufferCharacter(cc);
    622             HTML_ADVANCE_TO(ScriptDataEscapedState);
    623         }
    624     }
    625     END_STATE()
    626 
    627     HTML_BEGIN_STATE(ScriptDataEscapedDashDashState) {
    628         if (cc == '-') {
    629             bufferCharacter(cc);
    630             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    631         } else if (cc == '<')
    632             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    633         else if (cc == '>') {
    634             bufferCharacter(cc);
    635             HTML_ADVANCE_TO(ScriptDataState);
    636         } else if (cc == kEndOfFileMarker) {
    637             parseError();
    638             HTML_RECONSUME_IN(DataState);
    639         } else {
    640             bufferCharacter(cc);
    641             HTML_ADVANCE_TO(ScriptDataEscapedState);
    642         }
    643     }
    644     END_STATE()
    645 
    646     HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
    647         if (cc == '/') {
    648             m_temporaryBuffer.clear();
    649             ASSERT(m_bufferedEndTagName.isEmpty());
    650             HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
    651         } else if (isASCIIUpper(cc)) {
    652             bufferCharacter('<');
    653             bufferCharacter(cc);
    654             m_temporaryBuffer.clear();
    655             m_temporaryBuffer.append(toLowerCase(cc));
    656             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    657         } else if (isASCIILower(cc)) {
    658             bufferCharacter('<');
    659             bufferCharacter(cc);
    660             m_temporaryBuffer.clear();
    661             m_temporaryBuffer.append(static_cast<LChar>(cc));
    662             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    663         } else {
    664             bufferCharacter('<');
    665             HTML_RECONSUME_IN(ScriptDataEscapedState);
    666         }
    667     }
    668     END_STATE()
    669 
    670     HTML_BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
    671         if (isASCIIUpper(cc)) {
    672             m_temporaryBuffer.append(static_cast<LChar>(cc));
    673             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    674             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    675         } else if (isASCIILower(cc)) {
    676             m_temporaryBuffer.append(static_cast<LChar>(cc));
    677             addToPossibleEndTag(static_cast<LChar>(cc));
    678             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    679         } else {
    680             bufferCharacter('<');
    681             bufferCharacter('/');
    682             HTML_RECONSUME_IN(ScriptDataEscapedState);
    683         }
    684     }
    685     END_STATE()
    686 
    687     HTML_BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
    688         if (isASCIIUpper(cc)) {
    689             m_temporaryBuffer.append(static_cast<LChar>(cc));
    690             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    691             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    692         } else if (isASCIILower(cc)) {
    693             m_temporaryBuffer.append(static_cast<LChar>(cc));
    694             addToPossibleEndTag(static_cast<LChar>(cc));
    695             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    696         } else {
    697             if (isTokenizerWhitespace(cc)) {
    698                 if (isAppropriateEndTag()) {
    699                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    700                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    701                 }
    702             } else if (cc == '/') {
    703                 if (isAppropriateEndTag()) {
    704                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    705                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    706                 }
    707             } else if (cc == '>') {
    708                 if (isAppropriateEndTag()) {
    709                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    710                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    711                 }
    712             }
    713             bufferCharacter('<');
    714             bufferCharacter('/');
    715             m_token->appendToCharacter(m_temporaryBuffer);
    716             m_bufferedEndTagName.clear();
    717             m_temporaryBuffer.clear();
    718             HTML_RECONSUME_IN(ScriptDataEscapedState);
    719         }
    720     }
    721     END_STATE()
    722 
    723     HTML_BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
    724         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
    725             bufferCharacter(cc);
    726             if (temporaryBufferIs(scriptTag.localName()))
    727                 HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    728             else
    729                 HTML_ADVANCE_TO(ScriptDataEscapedState);
    730         } else if (isASCIIUpper(cc)) {
    731             bufferCharacter(cc);
    732             m_temporaryBuffer.append(toLowerCase(cc));
    733             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    734         } else if (isASCIILower(cc)) {
    735             bufferCharacter(cc);
    736             m_temporaryBuffer.append(static_cast<LChar>(cc));
    737             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    738         } else
    739             HTML_RECONSUME_IN(ScriptDataEscapedState);
    740     }
    741     END_STATE()
    742 
    743     HTML_BEGIN_STATE(ScriptDataDoubleEscapedState) {
    744         if (cc == '-') {
    745             bufferCharacter(cc);
    746             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashState);
    747         } else if (cc == '<') {
    748             bufferCharacter(cc);
    749             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    750         } else if (cc == kEndOfFileMarker) {
    751             parseError();
    752             HTML_RECONSUME_IN(DataState);
    753         } else {
    754             bufferCharacter(cc);
    755             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    756         }
    757     }
    758     END_STATE()
    759 
    760     HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
    761         if (cc == '-') {
    762             bufferCharacter(cc);
    763             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
    764         } else if (cc == '<') {
    765             bufferCharacter(cc);
    766             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    767         } else if (cc == kEndOfFileMarker) {
    768             parseError();
    769             HTML_RECONSUME_IN(DataState);
    770         } else {
    771             bufferCharacter(cc);
    772             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    773         }
    774     }
    775     END_STATE()
    776 
    777     HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
    778         if (cc == '-') {
    779             bufferCharacter(cc);
    780             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
    781         } else if (cc == '<') {
    782             bufferCharacter(cc);
    783             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    784         } else if (cc == '>') {
    785             bufferCharacter(cc);
    786             HTML_ADVANCE_TO(ScriptDataState);
    787         } else if (cc == kEndOfFileMarker) {
    788             parseError();
    789             HTML_RECONSUME_IN(DataState);
    790         } else {
    791             bufferCharacter(cc);
    792             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    793         }
    794     }
    795     END_STATE()
    796 
    797     HTML_BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
    798         if (cc == '/') {
    799             bufferCharacter(cc);
    800             m_temporaryBuffer.clear();
    801             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    802         } else
    803             HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
    804     }
    805     END_STATE()
    806 
    807     HTML_BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
    808         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
    809             bufferCharacter(cc);
    810             if (temporaryBufferIs(scriptTag.localName()))
    811                 HTML_ADVANCE_TO(ScriptDataEscapedState);
    812             else
    813                 HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    814         } else if (isASCIIUpper(cc)) {
    815             bufferCharacter(cc);
    816             m_temporaryBuffer.append(toLowerCase(cc));
    817             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    818         } else if (isASCIILower(cc)) {
    819             bufferCharacter(cc);
    820             m_temporaryBuffer.append(static_cast<LChar>(cc));
    821             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    822         } else
    823             HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
    824     }
    825     END_STATE()
    826 
    827     HTML_BEGIN_STATE(BeforeAttributeNameState) {
    828         if (isTokenizerWhitespace(cc))
    829             HTML_ADVANCE_TO(BeforeAttributeNameState);
    830         else if (cc == '/')
    831             HTML_ADVANCE_TO(SelfClosingStartTagState);
    832         else if (cc == '>')
    833             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    834         else if (isASCIIUpper(cc)) {
    835             m_token->addNewAttribute();
    836             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    837             m_token->appendToAttributeName(toLowerCase(cc));
    838             HTML_ADVANCE_TO(AttributeNameState);
    839         } else if (cc == kEndOfFileMarker) {
    840             parseError();
    841             HTML_RECONSUME_IN(DataState);
    842         } else {
    843             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
    844                 parseError();
    845             m_token->addNewAttribute();
    846             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    847             m_token->appendToAttributeName(cc);
    848             HTML_ADVANCE_TO(AttributeNameState);
    849         }
    850     }
    851     END_STATE()
    852 
    853     HTML_BEGIN_STATE(AttributeNameState) {
    854         if (isTokenizerWhitespace(cc)) {
    855             m_token->endAttributeName(source.numberOfCharactersConsumed());
    856             HTML_ADVANCE_TO(AfterAttributeNameState);
    857         } else if (cc == '/') {
    858             m_token->endAttributeName(source.numberOfCharactersConsumed());
    859             HTML_ADVANCE_TO(SelfClosingStartTagState);
    860         } else if (cc == '=') {
    861             m_token->endAttributeName(source.numberOfCharactersConsumed());
    862             HTML_ADVANCE_TO(BeforeAttributeValueState);
    863         } else if (cc == '>') {
    864             m_token->endAttributeName(source.numberOfCharactersConsumed());
    865             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    866         } else if (isASCIIUpper(cc)) {
    867             m_token->appendToAttributeName(toLowerCase(cc));
    868             HTML_ADVANCE_TO(AttributeNameState);
    869         } else if (cc == kEndOfFileMarker) {
    870             parseError();
    871             m_token->endAttributeName(source.numberOfCharactersConsumed());
    872             HTML_RECONSUME_IN(DataState);
    873         } else {
    874             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
    875                 parseError();
    876             m_token->appendToAttributeName(cc);
    877             HTML_ADVANCE_TO(AttributeNameState);
    878         }
    879     }
    880     END_STATE()
    881 
    882     HTML_BEGIN_STATE(AfterAttributeNameState) {
    883         if (isTokenizerWhitespace(cc))
    884             HTML_ADVANCE_TO(AfterAttributeNameState);
    885         else if (cc == '/')
    886             HTML_ADVANCE_TO(SelfClosingStartTagState);
    887         else if (cc == '=')
    888             HTML_ADVANCE_TO(BeforeAttributeValueState);
    889         else if (cc == '>')
    890             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    891         else if (isASCIIUpper(cc)) {
    892             m_token->addNewAttribute();
    893             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    894             m_token->appendToAttributeName(toLowerCase(cc));
    895             HTML_ADVANCE_TO(AttributeNameState);
    896         } else if (cc == kEndOfFileMarker) {
    897             parseError();
    898             HTML_RECONSUME_IN(DataState);
    899         } else {
    900             if (cc == '"' || cc == '\'' || cc == '<')
    901                 parseError();
    902             m_token->addNewAttribute();
    903             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    904             m_token->appendToAttributeName(cc);
    905             HTML_ADVANCE_TO(AttributeNameState);
    906         }
    907     }
    908     END_STATE()
    909 
    910     HTML_BEGIN_STATE(BeforeAttributeValueState) {
    911         if (isTokenizerWhitespace(cc))
    912             HTML_ADVANCE_TO(BeforeAttributeValueState);
    913         else if (cc == '"') {
    914             m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    915             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
    916         } else if (cc == '&') {
    917             m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    918             HTML_RECONSUME_IN(AttributeValueUnquotedState);
    919         } else if (cc == '\'') {
    920             m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    921             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
    922         } else if (cc == '>') {
    923             parseError();
    924             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    925         } else if (cc == kEndOfFileMarker) {
    926             parseError();
    927             HTML_RECONSUME_IN(DataState);
    928         } else {
    929             if (cc == '<' || cc == '=' || cc == '`')
    930                 parseError();
    931             m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    932             m_token->appendToAttributeValue(cc);
    933             HTML_ADVANCE_TO(AttributeValueUnquotedState);
    934         }
    935     }
    936     END_STATE()
    937 
    938     HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
    939         if (cc == '"') {
    940             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    941             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
    942         } else if (cc == '&') {
    943             m_additionalAllowedCharacter = '"';
    944             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    945         } else if (cc == kEndOfFileMarker) {
    946             parseError();
    947             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    948             HTML_RECONSUME_IN(DataState);
    949         } else {
    950             m_token->appendToAttributeValue(cc);
    951             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
    952         }
    953     }
    954     END_STATE()
    955 
    956     HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
    957         if (cc == '\'') {
    958             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    959             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
    960         } else if (cc == '&') {
    961             m_additionalAllowedCharacter = '\'';
    962             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    963         } else if (cc == kEndOfFileMarker) {
    964             parseError();
    965             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    966             HTML_RECONSUME_IN(DataState);
    967         } else {
    968             m_token->appendToAttributeValue(cc);
    969             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
    970         }
    971     }
    972     END_STATE()
    973 
    974     HTML_BEGIN_STATE(AttributeValueUnquotedState) {
    975         if (isTokenizerWhitespace(cc)) {
    976             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    977             HTML_ADVANCE_TO(BeforeAttributeNameState);
    978         } else if (cc == '&') {
    979             m_additionalAllowedCharacter = '>';
    980             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    981         } else if (cc == '>') {
    982             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    983             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    984         } else if (cc == kEndOfFileMarker) {
    985             parseError();
    986             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    987             HTML_RECONSUME_IN(DataState);
    988         } else {
    989             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
    990                 parseError();
    991             m_token->appendToAttributeValue(cc);
    992             HTML_ADVANCE_TO(AttributeValueUnquotedState);
    993         }
    994     }
    995     END_STATE()
    996 
    997     HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
    998         bool notEnoughCharacters = false;
    999         DecodedHTMLEntity decodedEntity;
   1000         bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
   1001         if (notEnoughCharacters)
   1002             return haveBufferedCharacterToken();
   1003         if (!success) {
   1004             ASSERT(decodedEntity.isEmpty());
   1005             m_token->appendToAttributeValue('&');
   1006         } else {
   1007             for (unsigned i = 0; i < decodedEntity.length; ++i)
   1008                 m_token->appendToAttributeValue(decodedEntity.data[i]);
   1009         }
   1010         // We're supposed to switch back to the attribute value state that
   1011         // we were in when we were switched into this state. Rather than
   1012         // keeping track of this explictly, we observe that the previous
   1013         // state can be determined by m_additionalAllowedCharacter.
   1014         if (m_additionalAllowedCharacter == '"')
   1015             HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
   1016         else if (m_additionalAllowedCharacter == '\'')
   1017             HTML_SWITCH_TO(AttributeValueSingleQuotedState);
   1018         else if (m_additionalAllowedCharacter == '>')
   1019             HTML_SWITCH_TO(AttributeValueUnquotedState);
   1020         else
   1021             ASSERT_NOT_REACHED();
   1022     }
   1023     END_STATE()
   1024 
   1025     HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
   1026         if (isTokenizerWhitespace(cc))
   1027             HTML_ADVANCE_TO(BeforeAttributeNameState);
   1028         else if (cc == '/')
   1029             HTML_ADVANCE_TO(SelfClosingStartTagState);
   1030         else if (cc == '>')
   1031             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1032         else if (cc == kEndOfFileMarker) {
   1033             parseError();
   1034             HTML_RECONSUME_IN(DataState);
   1035         } else {
   1036             parseError();
   1037             HTML_RECONSUME_IN(BeforeAttributeNameState);
   1038         }
   1039     }
   1040     END_STATE()
   1041 
   1042     HTML_BEGIN_STATE(SelfClosingStartTagState) {
   1043         if (cc == '>') {
   1044             m_token->setSelfClosing();
   1045             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1046         } else if (cc == kEndOfFileMarker) {
   1047             parseError();
   1048             HTML_RECONSUME_IN(DataState);
   1049         } else {
   1050             parseError();
   1051             HTML_RECONSUME_IN(BeforeAttributeNameState);
   1052         }
   1053     }
   1054     END_STATE()
   1055 
   1056     HTML_BEGIN_STATE(BogusCommentState) {
   1057         m_token->beginComment();
   1058         HTML_RECONSUME_IN(ContinueBogusCommentState);
   1059     }
   1060     END_STATE()
   1061 
   1062     HTML_BEGIN_STATE(ContinueBogusCommentState) {
   1063         if (cc == '>')
   1064             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1065         else if (cc == kEndOfFileMarker)
   1066             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1067         else {
   1068             m_token->appendToComment(cc);
   1069             HTML_ADVANCE_TO(ContinueBogusCommentState);
   1070         }
   1071     }
   1072     END_STATE()
   1073 
   1074     HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
   1075         DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
   1076         DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
   1077         DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA["));
   1078         if (cc == '-') {
   1079             SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
   1080             if (result == SegmentedString::DidMatch) {
   1081                 source.advanceAndASSERT('-');
   1082                 source.advanceAndASSERT('-');
   1083                 m_token->beginComment();
   1084                 HTML_SWITCH_TO(CommentStartState);
   1085             } else if (result == SegmentedString::NotEnoughCharacters)
   1086                 return haveBufferedCharacterToken();
   1087         } else if (cc == 'D' || cc == 'd') {
   1088             SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
   1089             if (result == SegmentedString::DidMatch) {
   1090                 advanceStringAndASSERTIgnoringCase(source, "doctype");
   1091                 HTML_SWITCH_TO(DOCTYPEState);
   1092             } else if (result == SegmentedString::NotEnoughCharacters)
   1093                 return haveBufferedCharacterToken();
   1094         } else if (cc == '[' && shouldAllowCDATA()) {
   1095             SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
   1096             if (result == SegmentedString::DidMatch) {
   1097                 advanceStringAndASSERT(source, "[CDATA[");
   1098                 HTML_SWITCH_TO(CDATASectionState);
   1099             } else if (result == SegmentedString::NotEnoughCharacters)
   1100                 return haveBufferedCharacterToken();
   1101         }
   1102         parseError();
   1103         HTML_RECONSUME_IN(BogusCommentState);
   1104     }
   1105     END_STATE()
   1106 
   1107     HTML_BEGIN_STATE(CommentStartState) {
   1108         if (cc == '-')
   1109             HTML_ADVANCE_TO(CommentStartDashState);
   1110         else if (cc == '>') {
   1111             parseError();
   1112             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1113         } else if (cc == kEndOfFileMarker) {
   1114             parseError();
   1115             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1116         } else {
   1117             m_token->appendToComment(cc);
   1118             HTML_ADVANCE_TO(CommentState);
   1119         }
   1120     }
   1121     END_STATE()
   1122 
   1123     HTML_BEGIN_STATE(CommentStartDashState) {
   1124         if (cc == '-')
   1125             HTML_ADVANCE_TO(CommentEndState);
   1126         else if (cc == '>') {
   1127             parseError();
   1128             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1129         } else if (cc == kEndOfFileMarker) {
   1130             parseError();
   1131             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1132         } else {
   1133             m_token->appendToComment('-');
   1134             m_token->appendToComment(cc);
   1135             HTML_ADVANCE_TO(CommentState);
   1136         }
   1137     }
   1138     END_STATE()
   1139 
   1140     HTML_BEGIN_STATE(CommentState) {
   1141         if (cc == '-')
   1142             HTML_ADVANCE_TO(CommentEndDashState);
   1143         else if (cc == kEndOfFileMarker) {
   1144             parseError();
   1145             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1146         } else {
   1147             m_token->appendToComment(cc);
   1148             HTML_ADVANCE_TO(CommentState);
   1149         }
   1150     }
   1151     END_STATE()
   1152 
   1153     HTML_BEGIN_STATE(CommentEndDashState) {
   1154         if (cc == '-')
   1155             HTML_ADVANCE_TO(CommentEndState);
   1156         else if (cc == kEndOfFileMarker) {
   1157             parseError();
   1158             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1159         } else {
   1160             m_token->appendToComment('-');
   1161             m_token->appendToComment(cc);
   1162             HTML_ADVANCE_TO(CommentState);
   1163         }
   1164     }
   1165     END_STATE()
   1166 
   1167     HTML_BEGIN_STATE(CommentEndState) {
   1168         if (cc == '>')
   1169             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1170         else if (cc == '!') {
   1171             parseError();
   1172             HTML_ADVANCE_TO(CommentEndBangState);
   1173         } else if (cc == '-') {
   1174             parseError();
   1175             m_token->appendToComment('-');
   1176             HTML_ADVANCE_TO(CommentEndState);
   1177         } else if (cc == kEndOfFileMarker) {
   1178             parseError();
   1179             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1180         } else {
   1181             parseError();
   1182             m_token->appendToComment('-');
   1183             m_token->appendToComment('-');
   1184             m_token->appendToComment(cc);
   1185             HTML_ADVANCE_TO(CommentState);
   1186         }
   1187     }
   1188     END_STATE()
   1189 
   1190     HTML_BEGIN_STATE(CommentEndBangState) {
   1191         if (cc == '-') {
   1192             m_token->appendToComment('-');
   1193             m_token->appendToComment('-');
   1194             m_token->appendToComment('!');
   1195             HTML_ADVANCE_TO(CommentEndDashState);
   1196         } else if (cc == '>')
   1197             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1198         else if (cc == kEndOfFileMarker) {
   1199             parseError();
   1200             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1201         } else {
   1202             m_token->appendToComment('-');
   1203             m_token->appendToComment('-');
   1204             m_token->appendToComment('!');
   1205             m_token->appendToComment(cc);
   1206             HTML_ADVANCE_TO(CommentState);
   1207         }
   1208     }
   1209     END_STATE()
   1210 
   1211     HTML_BEGIN_STATE(DOCTYPEState) {
   1212         if (isTokenizerWhitespace(cc))
   1213             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
   1214         else if (cc == kEndOfFileMarker) {
   1215             parseError();
   1216             m_token->beginDOCTYPE();
   1217             m_token->setForceQuirks();
   1218             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1219         } else {
   1220             parseError();
   1221             HTML_RECONSUME_IN(BeforeDOCTYPENameState);
   1222         }
   1223     }
   1224     END_STATE()
   1225 
   1226     HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
   1227         if (isTokenizerWhitespace(cc))
   1228             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
   1229         else if (isASCIIUpper(cc)) {
   1230             m_token->beginDOCTYPE(toLowerCase(cc));
   1231             HTML_ADVANCE_TO(DOCTYPENameState);
   1232         } else if (cc == '>') {
   1233             parseError();
   1234             m_token->beginDOCTYPE();
   1235             m_token->setForceQuirks();
   1236             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1237         } else if (cc == kEndOfFileMarker) {
   1238             parseError();
   1239             m_token->beginDOCTYPE();
   1240             m_token->setForceQuirks();
   1241             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1242         } else {
   1243             m_token->beginDOCTYPE(cc);
   1244             HTML_ADVANCE_TO(DOCTYPENameState);
   1245         }
   1246     }
   1247     END_STATE()
   1248 
   1249     HTML_BEGIN_STATE(DOCTYPENameState) {
   1250         if (isTokenizerWhitespace(cc))
   1251             HTML_ADVANCE_TO(AfterDOCTYPENameState);
   1252         else if (cc == '>')
   1253             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1254         else if (isASCIIUpper(cc)) {
   1255             m_token->appendToName(toLowerCase(cc));
   1256             HTML_ADVANCE_TO(DOCTYPENameState);
   1257         } else if (cc == kEndOfFileMarker) {
   1258             parseError();
   1259             m_token->setForceQuirks();
   1260             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1261         } else {
   1262             m_token->appendToName(cc);
   1263             HTML_ADVANCE_TO(DOCTYPENameState);
   1264         }
   1265     }
   1266     END_STATE()
   1267 
   1268     HTML_BEGIN_STATE(AfterDOCTYPENameState) {
   1269         if (isTokenizerWhitespace(cc))
   1270             HTML_ADVANCE_TO(AfterDOCTYPENameState);
   1271         if (cc == '>')
   1272             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1273         else if (cc == kEndOfFileMarker) {
   1274             parseError();
   1275             m_token->setForceQuirks();
   1276             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1277         } else {
   1278             DEFINE_STATIC_LOCAL(String, publicString, ("public"));
   1279             DEFINE_STATIC_LOCAL(String, systemString, ("system"));
   1280             if (cc == 'P' || cc == 'p') {
   1281                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
   1282                 if (result == SegmentedString::DidMatch) {
   1283                     advanceStringAndASSERTIgnoringCase(source, "public");
   1284                     HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
   1285                 } else if (result == SegmentedString::NotEnoughCharacters)
   1286                     return haveBufferedCharacterToken();
   1287             } else if (cc == 'S' || cc == 's') {
   1288                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
   1289                 if (result == SegmentedString::DidMatch) {
   1290                     advanceStringAndASSERTIgnoringCase(source, "system");
   1291                     HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
   1292                 } else if (result == SegmentedString::NotEnoughCharacters)
   1293                     return haveBufferedCharacterToken();
   1294             }
   1295             parseError();
   1296             m_token->setForceQuirks();
   1297             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1298         }
   1299     }
   1300     END_STATE()
   1301 
   1302     HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
   1303         if (isTokenizerWhitespace(cc))
   1304             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
   1305         else if (cc == '"') {
   1306             parseError();
   1307             m_token->setPublicIdentifierToEmptyString();
   1308             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1309         } else if (cc == '\'') {
   1310             parseError();
   1311             m_token->setPublicIdentifierToEmptyString();
   1312             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1313         } else if (cc == '>') {
   1314             parseError();
   1315             m_token->setForceQuirks();
   1316             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1317         } else if (cc == kEndOfFileMarker) {
   1318             parseError();
   1319             m_token->setForceQuirks();
   1320             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1321         } else {
   1322             parseError();
   1323             m_token->setForceQuirks();
   1324             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1325         }
   1326     }
   1327     END_STATE()
   1328 
   1329     HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
   1330         if (isTokenizerWhitespace(cc))
   1331             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
   1332         else if (cc == '"') {
   1333             m_token->setPublicIdentifierToEmptyString();
   1334             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1335         } else if (cc == '\'') {
   1336             m_token->setPublicIdentifierToEmptyString();
   1337             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1338         } else if (cc == '>') {
   1339             parseError();
   1340             m_token->setForceQuirks();
   1341             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1342         } else if (cc == kEndOfFileMarker) {
   1343             parseError();
   1344             m_token->setForceQuirks();
   1345             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1346         } else {
   1347             parseError();
   1348             m_token->setForceQuirks();
   1349             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1350         }
   1351     }
   1352     END_STATE()
   1353 
   1354     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
   1355         if (cc == '"')
   1356             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
   1357         else if (cc == '>') {
   1358             parseError();
   1359             m_token->setForceQuirks();
   1360             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1361         } else if (cc == kEndOfFileMarker) {
   1362             parseError();
   1363             m_token->setForceQuirks();
   1364             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1365         } else {
   1366             m_token->appendToPublicIdentifier(cc);
   1367             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1368         }
   1369     }
   1370     END_STATE()
   1371 
   1372     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
   1373         if (cc == '\'')
   1374             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
   1375         else if (cc == '>') {
   1376             parseError();
   1377             m_token->setForceQuirks();
   1378             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1379         } else if (cc == kEndOfFileMarker) {
   1380             parseError();
   1381             m_token->setForceQuirks();
   1382             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1383         } else {
   1384             m_token->appendToPublicIdentifier(cc);
   1385             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1386         }
   1387     }
   1388     END_STATE()
   1389 
   1390     HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
   1391         if (isTokenizerWhitespace(cc))
   1392             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
   1393         else if (cc == '>')
   1394             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1395         else if (cc == '"') {
   1396             parseError();
   1397             m_token->setSystemIdentifierToEmptyString();
   1398             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1399         } else if (cc == '\'') {
   1400             parseError();
   1401             m_token->setSystemIdentifierToEmptyString();
   1402             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1403         } else if (cc == kEndOfFileMarker) {
   1404             parseError();
   1405             m_token->setForceQuirks();
   1406             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1407         } else {
   1408             parseError();
   1409             m_token->setForceQuirks();
   1410             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1411         }
   1412     }
   1413     END_STATE()
   1414 
   1415     HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
   1416         if (isTokenizerWhitespace(cc))
   1417             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
   1418         else if (cc == '>')
   1419             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1420         else if (cc == '"') {
   1421             m_token->setSystemIdentifierToEmptyString();
   1422             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1423         } else if (cc == '\'') {
   1424             m_token->setSystemIdentifierToEmptyString();
   1425             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1426         } else if (cc == kEndOfFileMarker) {
   1427             parseError();
   1428             m_token->setForceQuirks();
   1429             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1430         } else {
   1431             parseError();
   1432             m_token->setForceQuirks();
   1433             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1434         }
   1435     }
   1436     END_STATE()
   1437 
   1438     HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
   1439         if (isTokenizerWhitespace(cc))
   1440             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
   1441         else if (cc == '"') {
   1442             parseError();
   1443             m_token->setSystemIdentifierToEmptyString();
   1444             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1445         } else if (cc == '\'') {
   1446             parseError();
   1447             m_token->setSystemIdentifierToEmptyString();
   1448             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1449         } else if (cc == '>') {
   1450             parseError();
   1451             m_token->setForceQuirks();
   1452             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1453         } else if (cc == kEndOfFileMarker) {
   1454             parseError();
   1455             m_token->setForceQuirks();
   1456             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1457         } else {
   1458             parseError();
   1459             m_token->setForceQuirks();
   1460             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1461         }
   1462     }
   1463     END_STATE()
   1464 
   1465     HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
   1466         if (isTokenizerWhitespace(cc))
   1467             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
   1468         if (cc == '"') {
   1469             m_token->setSystemIdentifierToEmptyString();
   1470             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1471         } else if (cc == '\'') {
   1472             m_token->setSystemIdentifierToEmptyString();
   1473             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1474         } else if (cc == '>') {
   1475             parseError();
   1476             m_token->setForceQuirks();
   1477             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1478         } else if (cc == kEndOfFileMarker) {
   1479             parseError();
   1480             m_token->setForceQuirks();
   1481             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1482         } else {
   1483             parseError();
   1484             m_token->setForceQuirks();
   1485             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1486         }
   1487     }
   1488     END_STATE()
   1489 
   1490     HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
   1491         if (cc == '"')
   1492             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1493         else if (cc == '>') {
   1494             parseError();
   1495             m_token->setForceQuirks();
   1496             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1497         } else if (cc == kEndOfFileMarker) {
   1498             parseError();
   1499             m_token->setForceQuirks();
   1500             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1501         } else {
   1502             m_token->appendToSystemIdentifier(cc);
   1503             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1504         }
   1505     }
   1506     END_STATE()
   1507 
   1508     HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
   1509         if (cc == '\'')
   1510             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1511         else if (cc == '>') {
   1512             parseError();
   1513             m_token->setForceQuirks();
   1514             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1515         } else if (cc == kEndOfFileMarker) {
   1516             parseError();
   1517             m_token->setForceQuirks();
   1518             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1519         } else {
   1520             m_token->appendToSystemIdentifier(cc);
   1521             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1522         }
   1523     }
   1524     END_STATE()
   1525 
   1526     HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
   1527         if (isTokenizerWhitespace(cc))
   1528             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1529         else if (cc == '>')
   1530             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1531         else if (cc == kEndOfFileMarker) {
   1532             parseError();
   1533             m_token->setForceQuirks();
   1534             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1535         } else {
   1536             parseError();
   1537             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1538         }
   1539     }
   1540     END_STATE()
   1541 
   1542     HTML_BEGIN_STATE(BogusDOCTYPEState) {
   1543         if (cc == '>')
   1544             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1545         else if (cc == kEndOfFileMarker)
   1546             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1547         HTML_ADVANCE_TO(BogusDOCTYPEState);
   1548     }
   1549     END_STATE()
   1550 
   1551     HTML_BEGIN_STATE(CDATASectionState) {
   1552         if (cc == ']')
   1553             HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
   1554         else if (cc == kEndOfFileMarker)
   1555             HTML_RECONSUME_IN(DataState);
   1556         else {
   1557             bufferCharacter(cc);
   1558             HTML_ADVANCE_TO(CDATASectionState);
   1559         }
   1560     }
   1561     END_STATE()
   1562 
   1563     HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
   1564         if (cc == ']')
   1565             HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
   1566         else {
   1567             bufferCharacter(']');
   1568             HTML_RECONSUME_IN(CDATASectionState);
   1569         }
   1570     }
   1571 
   1572     HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
   1573         if (cc == '>')
   1574             HTML_ADVANCE_TO(DataState);
   1575         else {
   1576             bufferCharacter(']');
   1577             bufferCharacter(']');
   1578             HTML_RECONSUME_IN(CDATASectionState);
   1579         }
   1580     }
   1581     END_STATE()
   1582 
   1583     }
   1584 
   1585     ASSERT_NOT_REACHED();
   1586     return false;
   1587 }
   1588 
   1589 String HTMLTokenizer::bufferedCharacters() const
   1590 {
   1591     // FIXME: Add an assert about m_state.
   1592     StringBuilder characters;
   1593     characters.reserveCapacity(numberOfBufferedCharacters());
   1594     characters.append('<');
   1595     characters.append('/');
   1596     characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size());
   1597     return characters.toString();
   1598 }
   1599 
   1600 void HTMLTokenizer::updateStateFor(const AtomicString& tagName)
   1601 {
   1602     if (tagName == textareaTag || tagName == titleTag)
   1603         setState(HTMLTokenizer::RCDATAState);
   1604     else if (tagName == plaintextTag)
   1605         setState(HTMLTokenizer::PLAINTEXTState);
   1606     else if (tagName == scriptTag)
   1607         setState(HTMLTokenizer::ScriptDataState);
   1608     else if (tagName == styleTag
   1609         || tagName == iframeTag
   1610         || tagName == xmpTag
   1611         || (tagName == noembedTag && m_options.pluginsEnabled)
   1612         || tagName == noframesTag
   1613         || (tagName == noscriptTag && m_options.scriptEnabled))
   1614         setState(HTMLTokenizer::RAWTEXTState);
   1615 }
   1616 
   1617 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
   1618 {
   1619     return vectorEqualsString(m_temporaryBuffer, expectedString);
   1620 }
   1621 
   1622 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc)
   1623 {
   1624     ASSERT(isEndTagBufferingState(m_state));
   1625     m_bufferedEndTagName.append(cc);
   1626 }
   1627 
   1628 inline bool HTMLTokenizer::isAppropriateEndTag()
   1629 {
   1630     if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
   1631         return false;
   1632 
   1633     size_t numCharacters = m_bufferedEndTagName.size();
   1634 
   1635     for (size_t i = 0; i < numCharacters; i++) {
   1636         if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
   1637             return false;
   1638     }
   1639 
   1640     return true;
   1641 }
   1642 
   1643 inline void HTMLTokenizer::parseError()
   1644 {
   1645     notImplemented();
   1646 }
   1647 
   1648 }
   1649