Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
      3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
      4  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/html/parser/HTMLTokenizer.h"
     30 
     31 #include "HTMLNames.h"
     32 #include "core/html/parser/HTMLEntityParser.h"
     33 #include "core/html/parser/HTMLTreeBuilder.h"
     34 #include "platform/NotImplemented.h"
     35 #include "core/xml/parser/MarkupTokenizerInlines.h"
     36 #include "wtf/ASCIICType.h"
     37 #include "wtf/text/AtomicString.h"
     38 #include "wtf/unicode/Unicode.h"
     39 
     40 using namespace WTF;
     41 
     42 namespace WebCore {
     43 
     44 using namespace HTMLNames;
     45 
     46 // This has to go in a .cpp file, as the linker doesn't like it being included more than once.
     47 // We don't have an HTMLToken.cpp though, so this is the next best place.
     48 QualifiedName AtomicHTMLToken::nameForAttribute(const HTMLToken::Attribute& attribute) const
     49 {
     50     return QualifiedName(nullAtom, AtomicString(attribute.name), nullAtom);
     51 }
     52 
     53 bool AtomicHTMLToken::usesName() const
     54 {
     55     return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE;
     56 }
     57 
     58 bool AtomicHTMLToken::usesAttributes() const
     59 {
     60     return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
     61 }
     62 
     63 static inline UChar toLowerCase(UChar cc)
     64 {
     65     ASSERT(isASCIIUpper(cc));
     66     const int lowerCaseOffset = 0x20;
     67     return cc + lowerCaseOffset;
     68 }
     69 
     70 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const String& string)
     71 {
     72     if (vector.size() != string.length())
     73         return false;
     74 
     75     if (!string.length())
     76         return true;
     77 
     78     return equal(string.impl(), vector.data(), vector.size());
     79 }
     80 
     81 static inline bool isEndTagBufferingState(HTMLTokenizer::State state)
     82 {
     83     switch (state) {
     84     case HTMLTokenizer::RCDATAEndTagOpenState:
     85     case HTMLTokenizer::RCDATAEndTagNameState:
     86     case HTMLTokenizer::RAWTEXTEndTagOpenState:
     87     case HTMLTokenizer::RAWTEXTEndTagNameState:
     88     case HTMLTokenizer::ScriptDataEndTagOpenState:
     89     case HTMLTokenizer::ScriptDataEndTagNameState:
     90     case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
     91     case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
     92         return true;
     93     default:
     94         return false;
     95     }
     96 }
     97 
     98 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
     99 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
    100 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
    101 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
    102 
    103 HTMLTokenizer::HTMLTokenizer(const HTMLParserOptions& options)
    104     : m_inputStreamPreprocessor(this)
    105     , m_options(options)
    106 {
    107     reset();
    108 }
    109 
    110 HTMLTokenizer::~HTMLTokenizer()
    111 {
    112 }
    113 
    114 void HTMLTokenizer::reset()
    115 {
    116     m_state = HTMLTokenizer::DataState;
    117     m_token = 0;
    118     m_forceNullCharacterReplacement = false;
    119     m_shouldAllowCDATA = false;
    120     m_additionalAllowedCharacter = '\0';
    121 }
    122 
    123 bool HTMLTokenizer::canCreateCheckpoint() const
    124 {
    125     if (!m_appropriateEndTagName.isEmpty())
    126         return false;
    127     if (!m_temporaryBuffer.isEmpty())
    128         return false;
    129     if (!m_bufferedEndTagName.isEmpty())
    130         return false;
    131     return true;
    132 }
    133 
    134 void HTMLTokenizer::createCheckpoint(Checkpoint& result) const
    135 {
    136     ASSERT(canCreateCheckpoint());
    137     result.options = m_options;
    138     result.state = m_state;
    139     result.additionalAllowedCharacter = m_additionalAllowedCharacter;
    140     result.skipNextNewLine = m_inputStreamPreprocessor.skipNextNewLine();
    141     result.shouldAllowCDATA = m_shouldAllowCDATA;
    142 }
    143 
    144 void HTMLTokenizer::restoreFromCheckpoint(const Checkpoint& checkpoint)
    145 {
    146     m_token = 0;
    147     m_options = checkpoint.options;
    148     m_state = checkpoint.state;
    149     m_additionalAllowedCharacter = checkpoint.additionalAllowedCharacter;
    150     m_inputStreamPreprocessor.reset(checkpoint.skipNextNewLine);
    151     m_shouldAllowCDATA = checkpoint.shouldAllowCDATA;
    152 }
    153 
    154 inline bool HTMLTokenizer::processEntity(SegmentedString& source)
    155 {
    156     bool notEnoughCharacters = false;
    157     DecodedHTMLEntity decodedEntity;
    158     bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
    159     if (notEnoughCharacters)
    160         return false;
    161     if (!success) {
    162         ASSERT(decodedEntity.isEmpty());
    163         bufferCharacter('&');
    164     } else {
    165         for (unsigned i = 0; i < decodedEntity.length; ++i)
    166             bufferCharacter(decodedEntity.data[i]);
    167     }
    168     return true;
    169 }
    170 
    171 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
    172 {
    173     ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
    174     source.advanceAndUpdateLineNumber();
    175     if (m_token->type() == HTMLToken::Character)
    176         return true;
    177     m_token->beginEndTag(m_bufferedEndTagName);
    178     m_bufferedEndTagName.clear();
    179     m_appropriateEndTagName.clear();
    180     m_temporaryBuffer.clear();
    181     return false;
    182 }
    183 
    184 #define FLUSH_AND_ADVANCE_TO(stateName)                                    \
    185     do {                                                                   \
    186         m_state = HTMLTokenizer::stateName;                           \
    187         if (flushBufferedEndTag(source))                                   \
    188             return true;                                                   \
    189         if (source.isEmpty()                                               \
    190             || !m_inputStreamPreprocessor.peek(source))                    \
    191             return haveBufferedCharacterToken();                           \
    192         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
    193         goto stateName;                                                    \
    194     } while (false)
    195 
    196 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, HTMLTokenizer::State state)
    197 {
    198     m_state = state;
    199     flushBufferedEndTag(source);
    200     return true;
    201 }
    202 
    203 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
    204 {
    205     // If we have a token in progress, then we're supposed to be called back
    206     // with the same token so we can finish it.
    207     ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
    208     m_token = &token;
    209 
    210     if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
    211         // FIXME: This should call flushBufferedEndTag().
    212         // We started an end tag during our last iteration.
    213         m_token->beginEndTag(m_bufferedEndTagName);
    214         m_bufferedEndTagName.clear();
    215         m_appropriateEndTagName.clear();
    216         m_temporaryBuffer.clear();
    217         if (m_state == HTMLTokenizer::DataState) {
    218             // We're back in the data state, so we must be done with the tag.
    219             return true;
    220         }
    221     }
    222 
    223     if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
    224         return haveBufferedCharacterToken();
    225     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
    226 
    227     // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
    228     switch (m_state) {
    229     HTML_BEGIN_STATE(DataState) {
    230         if (cc == '&')
    231             HTML_ADVANCE_TO(CharacterReferenceInDataState);
    232         else if (cc == '<') {
    233             if (m_token->type() == HTMLToken::Character) {
    234                 // We have a bunch of character tokens queued up that we
    235                 // are emitting lazily here.
    236                 return true;
    237             }
    238             HTML_ADVANCE_TO(TagOpenState);
    239         } else if (cc == kEndOfFileMarker)
    240             return emitEndOfFile(source);
    241         else {
    242             bufferCharacter(cc);
    243             HTML_ADVANCE_TO(DataState);
    244         }
    245     }
    246     END_STATE()
    247 
    248     HTML_BEGIN_STATE(CharacterReferenceInDataState) {
    249         if (!processEntity(source))
    250             return haveBufferedCharacterToken();
    251         HTML_SWITCH_TO(DataState);
    252     }
    253     END_STATE()
    254 
    255     HTML_BEGIN_STATE(RCDATAState) {
    256         if (cc == '&')
    257             HTML_ADVANCE_TO(CharacterReferenceInRCDATAState);
    258         else if (cc == '<')
    259             HTML_ADVANCE_TO(RCDATALessThanSignState);
    260         else if (cc == kEndOfFileMarker)
    261             return emitEndOfFile(source);
    262         else {
    263             bufferCharacter(cc);
    264             HTML_ADVANCE_TO(RCDATAState);
    265         }
    266     }
    267     END_STATE()
    268 
    269     HTML_BEGIN_STATE(CharacterReferenceInRCDATAState) {
    270         if (!processEntity(source))
    271             return haveBufferedCharacterToken();
    272         HTML_SWITCH_TO(RCDATAState);
    273     }
    274     END_STATE()
    275 
    276     HTML_BEGIN_STATE(RAWTEXTState) {
    277         if (cc == '<')
    278             HTML_ADVANCE_TO(RAWTEXTLessThanSignState);
    279         else if (cc == kEndOfFileMarker)
    280             return emitEndOfFile(source);
    281         else {
    282             bufferCharacter(cc);
    283             HTML_ADVANCE_TO(RAWTEXTState);
    284         }
    285     }
    286     END_STATE()
    287 
    288     HTML_BEGIN_STATE(ScriptDataState) {
    289         if (cc == '<')
    290             HTML_ADVANCE_TO(ScriptDataLessThanSignState);
    291         else if (cc == kEndOfFileMarker)
    292             return emitEndOfFile(source);
    293         else {
    294             bufferCharacter(cc);
    295             HTML_ADVANCE_TO(ScriptDataState);
    296         }
    297     }
    298     END_STATE()
    299 
    300     HTML_BEGIN_STATE(PLAINTEXTState) {
    301         if (cc == kEndOfFileMarker)
    302             return emitEndOfFile(source);
    303         bufferCharacter(cc);
    304         HTML_ADVANCE_TO(PLAINTEXTState);
    305     }
    306     END_STATE()
    307 
    308     HTML_BEGIN_STATE(TagOpenState) {
    309         if (cc == '!')
    310             HTML_ADVANCE_TO(MarkupDeclarationOpenState);
    311         else if (cc == '/')
    312             HTML_ADVANCE_TO(EndTagOpenState);
    313         else if (isASCIIUpper(cc)) {
    314             m_token->beginStartTag(toLowerCase(cc));
    315             HTML_ADVANCE_TO(TagNameState);
    316         } else if (isASCIILower(cc)) {
    317             m_token->beginStartTag(cc);
    318             HTML_ADVANCE_TO(TagNameState);
    319         } else if (cc == '?') {
    320             parseError();
    321             // The spec consumes the current character before switching
    322             // to the bogus comment state, but it's easier to implement
    323             // if we reconsume the current character.
    324             HTML_RECONSUME_IN(BogusCommentState);
    325         } else {
    326             parseError();
    327             bufferCharacter('<');
    328             HTML_RECONSUME_IN(DataState);
    329         }
    330     }
    331     END_STATE()
    332 
    333     HTML_BEGIN_STATE(EndTagOpenState) {
    334         if (isASCIIUpper(cc)) {
    335             m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
    336             m_appropriateEndTagName.clear();
    337             HTML_ADVANCE_TO(TagNameState);
    338         } else if (isASCIILower(cc)) {
    339             m_token->beginEndTag(static_cast<LChar>(cc));
    340             m_appropriateEndTagName.clear();
    341             HTML_ADVANCE_TO(TagNameState);
    342         } else if (cc == '>') {
    343             parseError();
    344             HTML_ADVANCE_TO(DataState);
    345         } else if (cc == kEndOfFileMarker) {
    346             parseError();
    347             bufferCharacter('<');
    348             bufferCharacter('/');
    349             HTML_RECONSUME_IN(DataState);
    350         } else {
    351             parseError();
    352             HTML_RECONSUME_IN(BogusCommentState);
    353         }
    354     }
    355     END_STATE()
    356 
    357     HTML_BEGIN_STATE(TagNameState) {
    358         if (isTokenizerWhitespace(cc))
    359             HTML_ADVANCE_TO(BeforeAttributeNameState);
    360         else if (cc == '/')
    361             HTML_ADVANCE_TO(SelfClosingStartTagState);
    362         else if (cc == '>')
    363             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    364         else if (isASCIIUpper(cc)) {
    365             m_token->appendToName(toLowerCase(cc));
    366             HTML_ADVANCE_TO(TagNameState);
    367         } else if (cc == kEndOfFileMarker) {
    368             parseError();
    369             HTML_RECONSUME_IN(DataState);
    370         } else {
    371             m_token->appendToName(cc);
    372             HTML_ADVANCE_TO(TagNameState);
    373         }
    374     }
    375     END_STATE()
    376 
    377     HTML_BEGIN_STATE(RCDATALessThanSignState) {
    378         if (cc == '/') {
    379             m_temporaryBuffer.clear();
    380             ASSERT(m_bufferedEndTagName.isEmpty());
    381             HTML_ADVANCE_TO(RCDATAEndTagOpenState);
    382         } else {
    383             bufferCharacter('<');
    384             HTML_RECONSUME_IN(RCDATAState);
    385         }
    386     }
    387     END_STATE()
    388 
    389     HTML_BEGIN_STATE(RCDATAEndTagOpenState) {
    390         if (isASCIIUpper(cc)) {
    391             m_temporaryBuffer.append(static_cast<LChar>(cc));
    392             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    393             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    394         } else if (isASCIILower(cc)) {
    395             m_temporaryBuffer.append(static_cast<LChar>(cc));
    396             addToPossibleEndTag(static_cast<LChar>(cc));
    397             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    398         } else {
    399             bufferCharacter('<');
    400             bufferCharacter('/');
    401             HTML_RECONSUME_IN(RCDATAState);
    402         }
    403     }
    404     END_STATE()
    405 
    406     HTML_BEGIN_STATE(RCDATAEndTagNameState) {
    407         if (isASCIIUpper(cc)) {
    408             m_temporaryBuffer.append(static_cast<LChar>(cc));
    409             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    410             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    411         } else if (isASCIILower(cc)) {
    412             m_temporaryBuffer.append(static_cast<LChar>(cc));
    413             addToPossibleEndTag(static_cast<LChar>(cc));
    414             HTML_ADVANCE_TO(RCDATAEndTagNameState);
    415         } else {
    416             if (isTokenizerWhitespace(cc)) {
    417                 if (isAppropriateEndTag()) {
    418                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    419                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    420                 }
    421             } else if (cc == '/') {
    422                 if (isAppropriateEndTag()) {
    423                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    424                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    425                 }
    426             } else if (cc == '>') {
    427                 if (isAppropriateEndTag()) {
    428                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    429                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    430                 }
    431             }
    432             bufferCharacter('<');
    433             bufferCharacter('/');
    434             m_token->appendToCharacter(m_temporaryBuffer);
    435             m_bufferedEndTagName.clear();
    436             m_temporaryBuffer.clear();
    437             HTML_RECONSUME_IN(RCDATAState);
    438         }
    439     }
    440     END_STATE()
    441 
    442     HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {
    443         if (cc == '/') {
    444             m_temporaryBuffer.clear();
    445             ASSERT(m_bufferedEndTagName.isEmpty());
    446             HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);
    447         } else {
    448             bufferCharacter('<');
    449             HTML_RECONSUME_IN(RAWTEXTState);
    450         }
    451     }
    452     END_STATE()
    453 
    454     HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {
    455         if (isASCIIUpper(cc)) {
    456             m_temporaryBuffer.append(static_cast<LChar>(cc));
    457             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    458             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    459         } else if (isASCIILower(cc)) {
    460             m_temporaryBuffer.append(static_cast<LChar>(cc));
    461             addToPossibleEndTag(static_cast<LChar>(cc));
    462             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    463         } else {
    464             bufferCharacter('<');
    465             bufferCharacter('/');
    466             HTML_RECONSUME_IN(RAWTEXTState);
    467         }
    468     }
    469     END_STATE()
    470 
    471     HTML_BEGIN_STATE(RAWTEXTEndTagNameState) {
    472         if (isASCIIUpper(cc)) {
    473             m_temporaryBuffer.append(static_cast<LChar>(cc));
    474             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    475             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    476         } else if (isASCIILower(cc)) {
    477             m_temporaryBuffer.append(static_cast<LChar>(cc));
    478             addToPossibleEndTag(static_cast<LChar>(cc));
    479             HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
    480         } else {
    481             if (isTokenizerWhitespace(cc)) {
    482                 if (isAppropriateEndTag()) {
    483                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    484                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    485                 }
    486             } else if (cc == '/') {
    487                 if (isAppropriateEndTag()) {
    488                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    489                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    490                 }
    491             } else if (cc == '>') {
    492                 if (isAppropriateEndTag()) {
    493                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    494                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    495                 }
    496             }
    497             bufferCharacter('<');
    498             bufferCharacter('/');
    499             m_token->appendToCharacter(m_temporaryBuffer);
    500             m_bufferedEndTagName.clear();
    501             m_temporaryBuffer.clear();
    502             HTML_RECONSUME_IN(RAWTEXTState);
    503         }
    504     }
    505     END_STATE()
    506 
    507     HTML_BEGIN_STATE(ScriptDataLessThanSignState) {
    508         if (cc == '/') {
    509             m_temporaryBuffer.clear();
    510             ASSERT(m_bufferedEndTagName.isEmpty());
    511             HTML_ADVANCE_TO(ScriptDataEndTagOpenState);
    512         } else if (cc == '!') {
    513             bufferCharacter('<');
    514             bufferCharacter('!');
    515             HTML_ADVANCE_TO(ScriptDataEscapeStartState);
    516         } else {
    517             bufferCharacter('<');
    518             HTML_RECONSUME_IN(ScriptDataState);
    519         }
    520     }
    521     END_STATE()
    522 
    523     HTML_BEGIN_STATE(ScriptDataEndTagOpenState) {
    524         if (isASCIIUpper(cc)) {
    525             m_temporaryBuffer.append(static_cast<LChar>(cc));
    526             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    527             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    528         } else if (isASCIILower(cc)) {
    529             m_temporaryBuffer.append(static_cast<LChar>(cc));
    530             addToPossibleEndTag(static_cast<LChar>(cc));
    531             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    532         } else {
    533             bufferCharacter('<');
    534             bufferCharacter('/');
    535             HTML_RECONSUME_IN(ScriptDataState);
    536         }
    537     }
    538     END_STATE()
    539 
    540     HTML_BEGIN_STATE(ScriptDataEndTagNameState) {
    541         if (isASCIIUpper(cc)) {
    542             m_temporaryBuffer.append(static_cast<LChar>(cc));
    543             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    544             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    545         } else if (isASCIILower(cc)) {
    546             m_temporaryBuffer.append(static_cast<LChar>(cc));
    547             addToPossibleEndTag(static_cast<LChar>(cc));
    548             HTML_ADVANCE_TO(ScriptDataEndTagNameState);
    549         } else {
    550             if (isTokenizerWhitespace(cc)) {
    551                 if (isAppropriateEndTag()) {
    552                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    553                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    554                 }
    555             } else if (cc == '/') {
    556                 if (isAppropriateEndTag()) {
    557                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    558                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    559                 }
    560             } else if (cc == '>') {
    561                 if (isAppropriateEndTag()) {
    562                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    563                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    564                 }
    565             }
    566             bufferCharacter('<');
    567             bufferCharacter('/');
    568             m_token->appendToCharacter(m_temporaryBuffer);
    569             m_bufferedEndTagName.clear();
    570             m_temporaryBuffer.clear();
    571             HTML_RECONSUME_IN(ScriptDataState);
    572         }
    573     }
    574     END_STATE()
    575 
    576     HTML_BEGIN_STATE(ScriptDataEscapeStartState) {
    577         if (cc == '-') {
    578             bufferCharacter(cc);
    579             HTML_ADVANCE_TO(ScriptDataEscapeStartDashState);
    580         } else
    581             HTML_RECONSUME_IN(ScriptDataState);
    582     }
    583     END_STATE()
    584 
    585     HTML_BEGIN_STATE(ScriptDataEscapeStartDashState) {
    586         if (cc == '-') {
    587             bufferCharacter(cc);
    588             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    589         } else
    590             HTML_RECONSUME_IN(ScriptDataState);
    591     }
    592     END_STATE()
    593 
    594     HTML_BEGIN_STATE(ScriptDataEscapedState) {
    595         if (cc == '-') {
    596             bufferCharacter(cc);
    597             HTML_ADVANCE_TO(ScriptDataEscapedDashState);
    598         } else if (cc == '<')
    599             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    600         else if (cc == kEndOfFileMarker) {
    601             parseError();
    602             HTML_RECONSUME_IN(DataState);
    603         } else {
    604             bufferCharacter(cc);
    605             HTML_ADVANCE_TO(ScriptDataEscapedState);
    606         }
    607     }
    608     END_STATE()
    609 
    610     HTML_BEGIN_STATE(ScriptDataEscapedDashState) {
    611         if (cc == '-') {
    612             bufferCharacter(cc);
    613             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    614         } else if (cc == '<')
    615             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    616         else if (cc == kEndOfFileMarker) {
    617             parseError();
    618             HTML_RECONSUME_IN(DataState);
    619         } else {
    620             bufferCharacter(cc);
    621             HTML_ADVANCE_TO(ScriptDataEscapedState);
    622         }
    623     }
    624     END_STATE()
    625 
    626     HTML_BEGIN_STATE(ScriptDataEscapedDashDashState) {
    627         if (cc == '-') {
    628             bufferCharacter(cc);
    629             HTML_ADVANCE_TO(ScriptDataEscapedDashDashState);
    630         } else if (cc == '<')
    631             HTML_ADVANCE_TO(ScriptDataEscapedLessThanSignState);
    632         else if (cc == '>') {
    633             bufferCharacter(cc);
    634             HTML_ADVANCE_TO(ScriptDataState);
    635         } else if (cc == kEndOfFileMarker) {
    636             parseError();
    637             HTML_RECONSUME_IN(DataState);
    638         } else {
    639             bufferCharacter(cc);
    640             HTML_ADVANCE_TO(ScriptDataEscapedState);
    641         }
    642     }
    643     END_STATE()
    644 
    645     HTML_BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
    646         if (cc == '/') {
    647             m_temporaryBuffer.clear();
    648             ASSERT(m_bufferedEndTagName.isEmpty());
    649             HTML_ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
    650         } else if (isASCIIUpper(cc)) {
    651             bufferCharacter('<');
    652             bufferCharacter(cc);
    653             m_temporaryBuffer.clear();
    654             m_temporaryBuffer.append(toLowerCase(cc));
    655             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    656         } else if (isASCIILower(cc)) {
    657             bufferCharacter('<');
    658             bufferCharacter(cc);
    659             m_temporaryBuffer.clear();
    660             m_temporaryBuffer.append(static_cast<LChar>(cc));
    661             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    662         } else {
    663             bufferCharacter('<');
    664             HTML_RECONSUME_IN(ScriptDataEscapedState);
    665         }
    666     }
    667     END_STATE()
    668 
    669     HTML_BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
    670         if (isASCIIUpper(cc)) {
    671             m_temporaryBuffer.append(static_cast<LChar>(cc));
    672             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    673             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    674         } else if (isASCIILower(cc)) {
    675             m_temporaryBuffer.append(static_cast<LChar>(cc));
    676             addToPossibleEndTag(static_cast<LChar>(cc));
    677             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    678         } else {
    679             bufferCharacter('<');
    680             bufferCharacter('/');
    681             HTML_RECONSUME_IN(ScriptDataEscapedState);
    682         }
    683     }
    684     END_STATE()
    685 
    686     HTML_BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
    687         if (isASCIIUpper(cc)) {
    688             m_temporaryBuffer.append(static_cast<LChar>(cc));
    689             addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
    690             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    691         } else if (isASCIILower(cc)) {
    692             m_temporaryBuffer.append(static_cast<LChar>(cc));
    693             addToPossibleEndTag(static_cast<LChar>(cc));
    694             HTML_ADVANCE_TO(ScriptDataEscapedEndTagNameState);
    695         } else {
    696             if (isTokenizerWhitespace(cc)) {
    697                 if (isAppropriateEndTag()) {
    698                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    699                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
    700                 }
    701             } else if (cc == '/') {
    702                 if (isAppropriateEndTag()) {
    703                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    704                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
    705                 }
    706             } else if (cc == '>') {
    707                 if (isAppropriateEndTag()) {
    708                     m_temporaryBuffer.append(static_cast<LChar>(cc));
    709                     return flushEmitAndResumeIn(source, HTMLTokenizer::DataState);
    710                 }
    711             }
    712             bufferCharacter('<');
    713             bufferCharacter('/');
    714             m_token->appendToCharacter(m_temporaryBuffer);
    715             m_bufferedEndTagName.clear();
    716             m_temporaryBuffer.clear();
    717             HTML_RECONSUME_IN(ScriptDataEscapedState);
    718         }
    719     }
    720     END_STATE()
    721 
    722     HTML_BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
    723         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
    724             bufferCharacter(cc);
    725             if (temporaryBufferIs(scriptTag.localName()))
    726                 HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    727             else
    728                 HTML_ADVANCE_TO(ScriptDataEscapedState);
    729         } else if (isASCIIUpper(cc)) {
    730             bufferCharacter(cc);
    731             m_temporaryBuffer.append(toLowerCase(cc));
    732             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    733         } else if (isASCIILower(cc)) {
    734             bufferCharacter(cc);
    735             m_temporaryBuffer.append(static_cast<LChar>(cc));
    736             HTML_ADVANCE_TO(ScriptDataDoubleEscapeStartState);
    737         } else
    738             HTML_RECONSUME_IN(ScriptDataEscapedState);
    739     }
    740     END_STATE()
    741 
    742     HTML_BEGIN_STATE(ScriptDataDoubleEscapedState) {
    743         if (cc == '-') {
    744             bufferCharacter(cc);
    745             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashState);
    746         } else if (cc == '<') {
    747             bufferCharacter(cc);
    748             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    749         } else if (cc == kEndOfFileMarker) {
    750             parseError();
    751             HTML_RECONSUME_IN(DataState);
    752         } else {
    753             bufferCharacter(cc);
    754             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    755         }
    756     }
    757     END_STATE()
    758 
    759     HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
    760         if (cc == '-') {
    761             bufferCharacter(cc);
    762             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
    763         } else if (cc == '<') {
    764             bufferCharacter(cc);
    765             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    766         } else if (cc == kEndOfFileMarker) {
    767             parseError();
    768             HTML_RECONSUME_IN(DataState);
    769         } else {
    770             bufferCharacter(cc);
    771             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    772         }
    773     }
    774     END_STATE()
    775 
    776     HTML_BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
    777         if (cc == '-') {
    778             bufferCharacter(cc);
    779             HTML_ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
    780         } else if (cc == '<') {
    781             bufferCharacter(cc);
    782             HTML_ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
    783         } else if (cc == '>') {
    784             bufferCharacter(cc);
    785             HTML_ADVANCE_TO(ScriptDataState);
    786         } else if (cc == kEndOfFileMarker) {
    787             parseError();
    788             HTML_RECONSUME_IN(DataState);
    789         } else {
    790             bufferCharacter(cc);
    791             HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    792         }
    793     }
    794     END_STATE()
    795 
    796     HTML_BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
    797         if (cc == '/') {
    798             bufferCharacter(cc);
    799             m_temporaryBuffer.clear();
    800             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    801         } else
    802             HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
    803     }
    804     END_STATE()
    805 
    806     HTML_BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
    807         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
    808             bufferCharacter(cc);
    809             if (temporaryBufferIs(scriptTag.localName()))
    810                 HTML_ADVANCE_TO(ScriptDataEscapedState);
    811             else
    812                 HTML_ADVANCE_TO(ScriptDataDoubleEscapedState);
    813         } else if (isASCIIUpper(cc)) {
    814             bufferCharacter(cc);
    815             m_temporaryBuffer.append(toLowerCase(cc));
    816             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    817         } else if (isASCIILower(cc)) {
    818             bufferCharacter(cc);
    819             m_temporaryBuffer.append(static_cast<LChar>(cc));
    820             HTML_ADVANCE_TO(ScriptDataDoubleEscapeEndState);
    821         } else
    822             HTML_RECONSUME_IN(ScriptDataDoubleEscapedState);
    823     }
    824     END_STATE()
    825 
    826     HTML_BEGIN_STATE(BeforeAttributeNameState) {
    827         if (isTokenizerWhitespace(cc))
    828             HTML_ADVANCE_TO(BeforeAttributeNameState);
    829         else if (cc == '/')
    830             HTML_ADVANCE_TO(SelfClosingStartTagState);
    831         else if (cc == '>')
    832             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    833         else if (isASCIIUpper(cc)) {
    834             m_token->addNewAttribute();
    835             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    836             m_token->appendToAttributeName(toLowerCase(cc));
    837             HTML_ADVANCE_TO(AttributeNameState);
    838         } else if (cc == kEndOfFileMarker) {
    839             parseError();
    840             HTML_RECONSUME_IN(DataState);
    841         } else {
    842             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
    843                 parseError();
    844             m_token->addNewAttribute();
    845             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    846             m_token->appendToAttributeName(cc);
    847             HTML_ADVANCE_TO(AttributeNameState);
    848         }
    849     }
    850     END_STATE()
    851 
    852     HTML_BEGIN_STATE(AttributeNameState) {
    853         if (isTokenizerWhitespace(cc)) {
    854             m_token->endAttributeName(source.numberOfCharactersConsumed());
    855             HTML_ADVANCE_TO(AfterAttributeNameState);
    856         } else if (cc == '/') {
    857             m_token->endAttributeName(source.numberOfCharactersConsumed());
    858             HTML_ADVANCE_TO(SelfClosingStartTagState);
    859         } else if (cc == '=') {
    860             m_token->endAttributeName(source.numberOfCharactersConsumed());
    861             HTML_ADVANCE_TO(BeforeAttributeValueState);
    862         } else if (cc == '>') {
    863             m_token->endAttributeName(source.numberOfCharactersConsumed());
    864             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    865         } else if (isASCIIUpper(cc)) {
    866             m_token->appendToAttributeName(toLowerCase(cc));
    867             HTML_ADVANCE_TO(AttributeNameState);
    868         } else if (cc == kEndOfFileMarker) {
    869             parseError();
    870             m_token->endAttributeName(source.numberOfCharactersConsumed());
    871             HTML_RECONSUME_IN(DataState);
    872         } else {
    873             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
    874                 parseError();
    875             m_token->appendToAttributeName(cc);
    876             HTML_ADVANCE_TO(AttributeNameState);
    877         }
    878     }
    879     END_STATE()
    880 
    881     HTML_BEGIN_STATE(AfterAttributeNameState) {
    882         if (isTokenizerWhitespace(cc))
    883             HTML_ADVANCE_TO(AfterAttributeNameState);
    884         else if (cc == '/')
    885             HTML_ADVANCE_TO(SelfClosingStartTagState);
    886         else if (cc == '=')
    887             HTML_ADVANCE_TO(BeforeAttributeValueState);
    888         else if (cc == '>')
    889             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    890         else if (isASCIIUpper(cc)) {
    891             m_token->addNewAttribute();
    892             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    893             m_token->appendToAttributeName(toLowerCase(cc));
    894             HTML_ADVANCE_TO(AttributeNameState);
    895         } else if (cc == kEndOfFileMarker) {
    896             parseError();
    897             HTML_RECONSUME_IN(DataState);
    898         } else {
    899             if (cc == '"' || cc == '\'' || cc == '<')
    900                 parseError();
    901             m_token->addNewAttribute();
    902             m_token->beginAttributeName(source.numberOfCharactersConsumed());
    903             m_token->appendToAttributeName(cc);
    904             HTML_ADVANCE_TO(AttributeNameState);
    905         }
    906     }
    907     END_STATE()
    908 
    909     HTML_BEGIN_STATE(BeforeAttributeValueState) {
    910         if (isTokenizerWhitespace(cc))
    911             HTML_ADVANCE_TO(BeforeAttributeValueState);
    912         else if (cc == '"') {
    913             m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    914             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
    915         } else if (cc == '&') {
    916             m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    917             HTML_RECONSUME_IN(AttributeValueUnquotedState);
    918         } else if (cc == '\'') {
    919             m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    920             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
    921         } else if (cc == '>') {
    922             parseError();
    923             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    924         } else if (cc == kEndOfFileMarker) {
    925             parseError();
    926             HTML_RECONSUME_IN(DataState);
    927         } else {
    928             if (cc == '<' || cc == '=' || cc == '`')
    929                 parseError();
    930             m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    931             m_token->appendToAttributeValue(cc);
    932             HTML_ADVANCE_TO(AttributeValueUnquotedState);
    933         }
    934     }
    935     END_STATE()
    936 
    937     HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
    938         if (cc == '"') {
    939             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    940             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
    941         } else if (cc == '&') {
    942             m_additionalAllowedCharacter = '"';
    943             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    944         } else if (cc == kEndOfFileMarker) {
    945             parseError();
    946             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    947             HTML_RECONSUME_IN(DataState);
    948         } else {
    949             m_token->appendToAttributeValue(cc);
    950             HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
    951         }
    952     }
    953     END_STATE()
    954 
    955     HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
    956         if (cc == '\'') {
    957             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    958             HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
    959         } else if (cc == '&') {
    960             m_additionalAllowedCharacter = '\'';
    961             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    962         } else if (cc == kEndOfFileMarker) {
    963             parseError();
    964             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    965             HTML_RECONSUME_IN(DataState);
    966         } else {
    967             m_token->appendToAttributeValue(cc);
    968             HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
    969         }
    970     }
    971     END_STATE()
    972 
    973     HTML_BEGIN_STATE(AttributeValueUnquotedState) {
    974         if (isTokenizerWhitespace(cc)) {
    975             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    976             HTML_ADVANCE_TO(BeforeAttributeNameState);
    977         } else if (cc == '&') {
    978             m_additionalAllowedCharacter = '>';
    979             HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
    980         } else if (cc == '>') {
    981             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    982             return emitAndResumeIn(source, HTMLTokenizer::DataState);
    983         } else if (cc == kEndOfFileMarker) {
    984             parseError();
    985             m_token->endAttributeValue(source.numberOfCharactersConsumed());
    986             HTML_RECONSUME_IN(DataState);
    987         } else {
    988             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
    989                 parseError();
    990             m_token->appendToAttributeValue(cc);
    991             HTML_ADVANCE_TO(AttributeValueUnquotedState);
    992         }
    993     }
    994     END_STATE()
    995 
    996     HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
    997         bool notEnoughCharacters = false;
    998         DecodedHTMLEntity decodedEntity;
    999         bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
   1000         if (notEnoughCharacters)
   1001             return haveBufferedCharacterToken();
   1002         if (!success) {
   1003             ASSERT(decodedEntity.isEmpty());
   1004             m_token->appendToAttributeValue('&');
   1005         } else {
   1006             for (unsigned i = 0; i < decodedEntity.length; ++i)
   1007                 m_token->appendToAttributeValue(decodedEntity.data[i]);
   1008         }
   1009         // We're supposed to switch back to the attribute value state that
   1010         // we were in when we were switched into this state. Rather than
   1011         // keeping track of this explictly, we observe that the previous
   1012         // state can be determined by m_additionalAllowedCharacter.
   1013         if (m_additionalAllowedCharacter == '"')
   1014             HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
   1015         else if (m_additionalAllowedCharacter == '\'')
   1016             HTML_SWITCH_TO(AttributeValueSingleQuotedState);
   1017         else if (m_additionalAllowedCharacter == '>')
   1018             HTML_SWITCH_TO(AttributeValueUnquotedState);
   1019         else
   1020             ASSERT_NOT_REACHED();
   1021     }
   1022     END_STATE()
   1023 
   1024     HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
   1025         if (isTokenizerWhitespace(cc))
   1026             HTML_ADVANCE_TO(BeforeAttributeNameState);
   1027         else if (cc == '/')
   1028             HTML_ADVANCE_TO(SelfClosingStartTagState);
   1029         else if (cc == '>')
   1030             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1031         else if (cc == kEndOfFileMarker) {
   1032             parseError();
   1033             HTML_RECONSUME_IN(DataState);
   1034         } else {
   1035             parseError();
   1036             HTML_RECONSUME_IN(BeforeAttributeNameState);
   1037         }
   1038     }
   1039     END_STATE()
   1040 
   1041     HTML_BEGIN_STATE(SelfClosingStartTagState) {
   1042         if (cc == '>') {
   1043             m_token->setSelfClosing();
   1044             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1045         } else if (cc == kEndOfFileMarker) {
   1046             parseError();
   1047             HTML_RECONSUME_IN(DataState);
   1048         } else {
   1049             parseError();
   1050             HTML_RECONSUME_IN(BeforeAttributeNameState);
   1051         }
   1052     }
   1053     END_STATE()
   1054 
   1055     HTML_BEGIN_STATE(BogusCommentState) {
   1056         m_token->beginComment();
   1057         HTML_RECONSUME_IN(ContinueBogusCommentState);
   1058     }
   1059     END_STATE()
   1060 
   1061     HTML_BEGIN_STATE(ContinueBogusCommentState) {
   1062         if (cc == '>')
   1063             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1064         else if (cc == kEndOfFileMarker)
   1065             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1066         else {
   1067             m_token->appendToComment(cc);
   1068             HTML_ADVANCE_TO(ContinueBogusCommentState);
   1069         }
   1070     }
   1071     END_STATE()
   1072 
   1073     HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
   1074         DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
   1075         DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
   1076         DEFINE_STATIC_LOCAL(String, cdataString, ("[CDATA["));
   1077         if (cc == '-') {
   1078             SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
   1079             if (result == SegmentedString::DidMatch) {
   1080                 source.advanceAndASSERT('-');
   1081                 source.advanceAndASSERT('-');
   1082                 m_token->beginComment();
   1083                 HTML_SWITCH_TO(CommentStartState);
   1084             } else if (result == SegmentedString::NotEnoughCharacters)
   1085                 return haveBufferedCharacterToken();
   1086         } else if (cc == 'D' || cc == 'd') {
   1087             SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
   1088             if (result == SegmentedString::DidMatch) {
   1089                 advanceStringAndASSERTIgnoringCase(source, "doctype");
   1090                 HTML_SWITCH_TO(DOCTYPEState);
   1091             } else if (result == SegmentedString::NotEnoughCharacters)
   1092                 return haveBufferedCharacterToken();
   1093         } else if (cc == '[' && shouldAllowCDATA()) {
   1094             SegmentedString::LookAheadResult result = source.lookAhead(cdataString);
   1095             if (result == SegmentedString::DidMatch) {
   1096                 advanceStringAndASSERT(source, "[CDATA[");
   1097                 HTML_SWITCH_TO(CDATASectionState);
   1098             } else if (result == SegmentedString::NotEnoughCharacters)
   1099                 return haveBufferedCharacterToken();
   1100         }
   1101         parseError();
   1102         HTML_RECONSUME_IN(BogusCommentState);
   1103     }
   1104     END_STATE()
   1105 
   1106     HTML_BEGIN_STATE(CommentStartState) {
   1107         if (cc == '-')
   1108             HTML_ADVANCE_TO(CommentStartDashState);
   1109         else if (cc == '>') {
   1110             parseError();
   1111             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1112         } else if (cc == kEndOfFileMarker) {
   1113             parseError();
   1114             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1115         } else {
   1116             m_token->appendToComment(cc);
   1117             HTML_ADVANCE_TO(CommentState);
   1118         }
   1119     }
   1120     END_STATE()
   1121 
   1122     HTML_BEGIN_STATE(CommentStartDashState) {
   1123         if (cc == '-')
   1124             HTML_ADVANCE_TO(CommentEndState);
   1125         else if (cc == '>') {
   1126             parseError();
   1127             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1128         } else if (cc == kEndOfFileMarker) {
   1129             parseError();
   1130             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1131         } else {
   1132             m_token->appendToComment('-');
   1133             m_token->appendToComment(cc);
   1134             HTML_ADVANCE_TO(CommentState);
   1135         }
   1136     }
   1137     END_STATE()
   1138 
   1139     HTML_BEGIN_STATE(CommentState) {
   1140         if (cc == '-')
   1141             HTML_ADVANCE_TO(CommentEndDashState);
   1142         else if (cc == kEndOfFileMarker) {
   1143             parseError();
   1144             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1145         } else {
   1146             m_token->appendToComment(cc);
   1147             HTML_ADVANCE_TO(CommentState);
   1148         }
   1149     }
   1150     END_STATE()
   1151 
   1152     HTML_BEGIN_STATE(CommentEndDashState) {
   1153         if (cc == '-')
   1154             HTML_ADVANCE_TO(CommentEndState);
   1155         else if (cc == kEndOfFileMarker) {
   1156             parseError();
   1157             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1158         } else {
   1159             m_token->appendToComment('-');
   1160             m_token->appendToComment(cc);
   1161             HTML_ADVANCE_TO(CommentState);
   1162         }
   1163     }
   1164     END_STATE()
   1165 
   1166     HTML_BEGIN_STATE(CommentEndState) {
   1167         if (cc == '>')
   1168             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1169         else if (cc == '!') {
   1170             parseError();
   1171             HTML_ADVANCE_TO(CommentEndBangState);
   1172         } else if (cc == '-') {
   1173             parseError();
   1174             m_token->appendToComment('-');
   1175             HTML_ADVANCE_TO(CommentEndState);
   1176         } else if (cc == kEndOfFileMarker) {
   1177             parseError();
   1178             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1179         } else {
   1180             parseError();
   1181             m_token->appendToComment('-');
   1182             m_token->appendToComment('-');
   1183             m_token->appendToComment(cc);
   1184             HTML_ADVANCE_TO(CommentState);
   1185         }
   1186     }
   1187     END_STATE()
   1188 
   1189     HTML_BEGIN_STATE(CommentEndBangState) {
   1190         if (cc == '-') {
   1191             m_token->appendToComment('-');
   1192             m_token->appendToComment('-');
   1193             m_token->appendToComment('!');
   1194             HTML_ADVANCE_TO(CommentEndDashState);
   1195         } else if (cc == '>')
   1196             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1197         else if (cc == kEndOfFileMarker) {
   1198             parseError();
   1199             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1200         } else {
   1201             m_token->appendToComment('-');
   1202             m_token->appendToComment('-');
   1203             m_token->appendToComment('!');
   1204             m_token->appendToComment(cc);
   1205             HTML_ADVANCE_TO(CommentState);
   1206         }
   1207     }
   1208     END_STATE()
   1209 
   1210     HTML_BEGIN_STATE(DOCTYPEState) {
   1211         if (isTokenizerWhitespace(cc))
   1212             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
   1213         else if (cc == kEndOfFileMarker) {
   1214             parseError();
   1215             m_token->beginDOCTYPE();
   1216             m_token->setForceQuirks();
   1217             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1218         } else {
   1219             parseError();
   1220             HTML_RECONSUME_IN(BeforeDOCTYPENameState);
   1221         }
   1222     }
   1223     END_STATE()
   1224 
   1225     HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
   1226         if (isTokenizerWhitespace(cc))
   1227             HTML_ADVANCE_TO(BeforeDOCTYPENameState);
   1228         else if (isASCIIUpper(cc)) {
   1229             m_token->beginDOCTYPE(toLowerCase(cc));
   1230             HTML_ADVANCE_TO(DOCTYPENameState);
   1231         } else if (cc == '>') {
   1232             parseError();
   1233             m_token->beginDOCTYPE();
   1234             m_token->setForceQuirks();
   1235             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1236         } else if (cc == kEndOfFileMarker) {
   1237             parseError();
   1238             m_token->beginDOCTYPE();
   1239             m_token->setForceQuirks();
   1240             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1241         } else {
   1242             m_token->beginDOCTYPE(cc);
   1243             HTML_ADVANCE_TO(DOCTYPENameState);
   1244         }
   1245     }
   1246     END_STATE()
   1247 
   1248     HTML_BEGIN_STATE(DOCTYPENameState) {
   1249         if (isTokenizerWhitespace(cc))
   1250             HTML_ADVANCE_TO(AfterDOCTYPENameState);
   1251         else if (cc == '>')
   1252             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1253         else if (isASCIIUpper(cc)) {
   1254             m_token->appendToName(toLowerCase(cc));
   1255             HTML_ADVANCE_TO(DOCTYPENameState);
   1256         } else if (cc == kEndOfFileMarker) {
   1257             parseError();
   1258             m_token->setForceQuirks();
   1259             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1260         } else {
   1261             m_token->appendToName(cc);
   1262             HTML_ADVANCE_TO(DOCTYPENameState);
   1263         }
   1264     }
   1265     END_STATE()
   1266 
   1267     HTML_BEGIN_STATE(AfterDOCTYPENameState) {
   1268         if (isTokenizerWhitespace(cc))
   1269             HTML_ADVANCE_TO(AfterDOCTYPENameState);
   1270         if (cc == '>')
   1271             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1272         else if (cc == kEndOfFileMarker) {
   1273             parseError();
   1274             m_token->setForceQuirks();
   1275             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1276         } else {
   1277             DEFINE_STATIC_LOCAL(String, publicString, ("public"));
   1278             DEFINE_STATIC_LOCAL(String, systemString, ("system"));
   1279             if (cc == 'P' || cc == 'p') {
   1280                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
   1281                 if (result == SegmentedString::DidMatch) {
   1282                     advanceStringAndASSERTIgnoringCase(source, "public");
   1283                     HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
   1284                 } else if (result == SegmentedString::NotEnoughCharacters)
   1285                     return haveBufferedCharacterToken();
   1286             } else if (cc == 'S' || cc == 's') {
   1287                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
   1288                 if (result == SegmentedString::DidMatch) {
   1289                     advanceStringAndASSERTIgnoringCase(source, "system");
   1290                     HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
   1291                 } else if (result == SegmentedString::NotEnoughCharacters)
   1292                     return haveBufferedCharacterToken();
   1293             }
   1294             parseError();
   1295             m_token->setForceQuirks();
   1296             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1297         }
   1298     }
   1299     END_STATE()
   1300 
   1301     HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
   1302         if (isTokenizerWhitespace(cc))
   1303             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
   1304         else if (cc == '"') {
   1305             parseError();
   1306             m_token->setPublicIdentifierToEmptyString();
   1307             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1308         } else if (cc == '\'') {
   1309             parseError();
   1310             m_token->setPublicIdentifierToEmptyString();
   1311             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1312         } else if (cc == '>') {
   1313             parseError();
   1314             m_token->setForceQuirks();
   1315             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1316         } else if (cc == kEndOfFileMarker) {
   1317             parseError();
   1318             m_token->setForceQuirks();
   1319             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1320         } else {
   1321             parseError();
   1322             m_token->setForceQuirks();
   1323             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1324         }
   1325     }
   1326     END_STATE()
   1327 
   1328     HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
   1329         if (isTokenizerWhitespace(cc))
   1330             HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
   1331         else if (cc == '"') {
   1332             m_token->setPublicIdentifierToEmptyString();
   1333             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1334         } else if (cc == '\'') {
   1335             m_token->setPublicIdentifierToEmptyString();
   1336             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1337         } else if (cc == '>') {
   1338             parseError();
   1339             m_token->setForceQuirks();
   1340             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1341         } else if (cc == kEndOfFileMarker) {
   1342             parseError();
   1343             m_token->setForceQuirks();
   1344             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1345         } else {
   1346             parseError();
   1347             m_token->setForceQuirks();
   1348             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1349         }
   1350     }
   1351     END_STATE()
   1352 
   1353     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
   1354         if (cc == '"')
   1355             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
   1356         else if (cc == '>') {
   1357             parseError();
   1358             m_token->setForceQuirks();
   1359             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1360         } else if (cc == kEndOfFileMarker) {
   1361             parseError();
   1362             m_token->setForceQuirks();
   1363             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1364         } else {
   1365             m_token->appendToPublicIdentifier(cc);
   1366             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
   1367         }
   1368     }
   1369     END_STATE()
   1370 
   1371     HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
   1372         if (cc == '\'')
   1373             HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
   1374         else if (cc == '>') {
   1375             parseError();
   1376             m_token->setForceQuirks();
   1377             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1378         } else if (cc == kEndOfFileMarker) {
   1379             parseError();
   1380             m_token->setForceQuirks();
   1381             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1382         } else {
   1383             m_token->appendToPublicIdentifier(cc);
   1384             HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
   1385         }
   1386     }
   1387     END_STATE()
   1388 
   1389     HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
   1390         if (isTokenizerWhitespace(cc))
   1391             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
   1392         else if (cc == '>')
   1393             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1394         else if (cc == '"') {
   1395             parseError();
   1396             m_token->setSystemIdentifierToEmptyString();
   1397             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1398         } else if (cc == '\'') {
   1399             parseError();
   1400             m_token->setSystemIdentifierToEmptyString();
   1401             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1402         } else if (cc == kEndOfFileMarker) {
   1403             parseError();
   1404             m_token->setForceQuirks();
   1405             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1406         } else {
   1407             parseError();
   1408             m_token->setForceQuirks();
   1409             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1410         }
   1411     }
   1412     END_STATE()
   1413 
   1414     HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
   1415         if (isTokenizerWhitespace(cc))
   1416             HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
   1417         else if (cc == '>')
   1418             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1419         else if (cc == '"') {
   1420             m_token->setSystemIdentifierToEmptyString();
   1421             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1422         } else if (cc == '\'') {
   1423             m_token->setSystemIdentifierToEmptyString();
   1424             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1425         } else if (cc == kEndOfFileMarker) {
   1426             parseError();
   1427             m_token->setForceQuirks();
   1428             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1429         } else {
   1430             parseError();
   1431             m_token->setForceQuirks();
   1432             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1433         }
   1434     }
   1435     END_STATE()
   1436 
   1437     HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
   1438         if (isTokenizerWhitespace(cc))
   1439             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
   1440         else if (cc == '"') {
   1441             parseError();
   1442             m_token->setSystemIdentifierToEmptyString();
   1443             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1444         } else if (cc == '\'') {
   1445             parseError();
   1446             m_token->setSystemIdentifierToEmptyString();
   1447             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1448         } else if (cc == '>') {
   1449             parseError();
   1450             m_token->setForceQuirks();
   1451             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1452         } else if (cc == kEndOfFileMarker) {
   1453             parseError();
   1454             m_token->setForceQuirks();
   1455             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1456         } else {
   1457             parseError();
   1458             m_token->setForceQuirks();
   1459             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1460         }
   1461     }
   1462     END_STATE()
   1463 
   1464     HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
   1465         if (isTokenizerWhitespace(cc))
   1466             HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
   1467         if (cc == '"') {
   1468             m_token->setSystemIdentifierToEmptyString();
   1469             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1470         } else if (cc == '\'') {
   1471             m_token->setSystemIdentifierToEmptyString();
   1472             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1473         } else if (cc == '>') {
   1474             parseError();
   1475             m_token->setForceQuirks();
   1476             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1477         } else if (cc == kEndOfFileMarker) {
   1478             parseError();
   1479             m_token->setForceQuirks();
   1480             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1481         } else {
   1482             parseError();
   1483             m_token->setForceQuirks();
   1484             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1485         }
   1486     }
   1487     END_STATE()
   1488 
   1489     HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
   1490         if (cc == '"')
   1491             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1492         else if (cc == '>') {
   1493             parseError();
   1494             m_token->setForceQuirks();
   1495             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1496         } else if (cc == kEndOfFileMarker) {
   1497             parseError();
   1498             m_token->setForceQuirks();
   1499             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1500         } else {
   1501             m_token->appendToSystemIdentifier(cc);
   1502             HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
   1503         }
   1504     }
   1505     END_STATE()
   1506 
   1507     HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
   1508         if (cc == '\'')
   1509             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1510         else if (cc == '>') {
   1511             parseError();
   1512             m_token->setForceQuirks();
   1513             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1514         } else if (cc == kEndOfFileMarker) {
   1515             parseError();
   1516             m_token->setForceQuirks();
   1517             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1518         } else {
   1519             m_token->appendToSystemIdentifier(cc);
   1520             HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
   1521         }
   1522     }
   1523     END_STATE()
   1524 
   1525     HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
   1526         if (isTokenizerWhitespace(cc))
   1527             HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
   1528         else if (cc == '>')
   1529             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1530         else if (cc == kEndOfFileMarker) {
   1531             parseError();
   1532             m_token->setForceQuirks();
   1533             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1534         } else {
   1535             parseError();
   1536             HTML_ADVANCE_TO(BogusDOCTYPEState);
   1537         }
   1538     }
   1539     END_STATE()
   1540 
   1541     HTML_BEGIN_STATE(BogusDOCTYPEState) {
   1542         if (cc == '>')
   1543             return emitAndResumeIn(source, HTMLTokenizer::DataState);
   1544         else if (cc == kEndOfFileMarker)
   1545             return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
   1546         HTML_ADVANCE_TO(BogusDOCTYPEState);
   1547     }
   1548     END_STATE()
   1549 
   1550     HTML_BEGIN_STATE(CDATASectionState) {
   1551         if (cc == ']')
   1552             HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
   1553         else if (cc == kEndOfFileMarker)
   1554             HTML_RECONSUME_IN(DataState);
   1555         else {
   1556             bufferCharacter(cc);
   1557             HTML_ADVANCE_TO(CDATASectionState);
   1558         }
   1559     }
   1560     END_STATE()
   1561 
   1562     HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
   1563         if (cc == ']')
   1564             HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
   1565         else {
   1566             bufferCharacter(']');
   1567             HTML_RECONSUME_IN(CDATASectionState);
   1568         }
   1569     }
   1570 
   1571     HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
   1572         if (cc == '>')
   1573             HTML_ADVANCE_TO(DataState);
   1574         else {
   1575             bufferCharacter(']');
   1576             bufferCharacter(']');
   1577             HTML_RECONSUME_IN(CDATASectionState);
   1578         }
   1579     }
   1580     END_STATE()
   1581 
   1582     }
   1583 
   1584     ASSERT_NOT_REACHED();
   1585     return false;
   1586 }
   1587 
   1588 String HTMLTokenizer::bufferedCharacters() const
   1589 {
   1590     // FIXME: Add an assert about m_state.
   1591     StringBuilder characters;
   1592     characters.reserveCapacity(numberOfBufferedCharacters());
   1593     characters.append('<');
   1594     characters.append('/');
   1595     characters.append(m_temporaryBuffer.data(), m_temporaryBuffer.size());
   1596     return characters.toString();
   1597 }
   1598 
   1599 void HTMLTokenizer::updateStateFor(const AtomicString& tagName)
   1600 {
   1601     if (tagName == textareaTag || tagName == titleTag)
   1602         setState(HTMLTokenizer::RCDATAState);
   1603     else if (tagName == plaintextTag)
   1604         setState(HTMLTokenizer::PLAINTEXTState);
   1605     else if (tagName == scriptTag)
   1606         setState(HTMLTokenizer::ScriptDataState);
   1607     else if (tagName == styleTag
   1608         || tagName == iframeTag
   1609         || tagName == xmpTag
   1610         || (tagName == noembedTag && m_options.pluginsEnabled)
   1611         || tagName == noframesTag
   1612         || (tagName == noscriptTag && m_options.scriptEnabled))
   1613         setState(HTMLTokenizer::RAWTEXTState);
   1614 }
   1615 
   1616 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
   1617 {
   1618     return vectorEqualsString(m_temporaryBuffer, expectedString);
   1619 }
   1620 
   1621 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc)
   1622 {
   1623     ASSERT(isEndTagBufferingState(m_state));
   1624     m_bufferedEndTagName.append(cc);
   1625 }
   1626 
   1627 inline bool HTMLTokenizer::isAppropriateEndTag()
   1628 {
   1629     if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
   1630         return false;
   1631 
   1632     size_t numCharacters = m_bufferedEndTagName.size();
   1633 
   1634     for (size_t i = 0; i < numCharacters; i++) {
   1635         if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
   1636             return false;
   1637     }
   1638 
   1639     return true;
   1640 }
   1641 
   1642 inline void HTMLTokenizer::parseError()
   1643 {
   1644     notImplemented();
   1645 }
   1646 
   1647 }
   1648