Home | History | Annotate | Download | only in vtt
      1 /*
      2  * Copyright (C) 2011 Google Inc.  All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 
     33 #include "core/html/track/vtt/VTTTokenizer.h"
     34 
     35 #include "core/xml/parser/MarkupTokenizerInlines.h"
     36 #include "wtf/text/StringBuilder.h"
     37 #include "wtf/unicode/CharacterNames.h"
     38 
     39 namespace WebCore {
     40 
     41 #define WEBVTT_BEGIN_STATE(stateName) case stateName: stateName:
     42 #define WEBVTT_ADVANCE_TO(stateName)                               \
     43     do {                                                           \
     44         state = stateName;                                         \
     45         ASSERT(!m_input.isEmpty());                                \
     46         m_inputStreamPreprocessor.advance(m_input);                \
     47         cc = m_inputStreamPreprocessor.nextInputCharacter();       \
     48         goto stateName;                                            \
     49     } while (false)
     50 
     51 template<unsigned charactersCount>
     52 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)[charactersCount])
     53 {
     54     return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersCount - 1);
     55 }
     56 
     57 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass)
     58 {
     59     if (!classes.isEmpty())
     60         classes.append(' ');
     61     classes.append(newClass);
     62 }
     63 
     64 inline bool emitToken(VTTToken& resultToken, const VTTToken& token)
     65 {
     66     resultToken = token;
     67     return true;
     68 }
     69 
     70 inline bool advanceAndEmitToken(SegmentedString& source, VTTToken& resultToken, const VTTToken& token)
     71 {
     72     source.advanceAndUpdateLineNumber();
     73     return emitToken(resultToken, token);
     74 }
     75 
     76 VTTTokenizer::VTTTokenizer(const String& input)
     77     : m_input(input)
     78     , m_inputStreamPreprocessor(this)
     79 {
     80     // Append a EOF marker and close the input "stream".
     81     ASSERT(!m_input.isClosed());
     82     m_input.append(SegmentedString(String(&kEndOfFileMarker, 1)));
     83     m_input.close();
     84 }
     85 
     86 bool VTTTokenizer::nextToken(VTTToken& token)
     87 {
     88     if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input))
     89         return false;
     90 
     91     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
     92     if (cc == kEndOfFileMarker) {
     93         m_inputStreamPreprocessor.advance(m_input);
     94         return false;
     95     }
     96 
     97     StringBuilder buffer;
     98     StringBuilder result;
     99     StringBuilder classes;
    100     enum {
    101         DataState,
    102         EscapeState,
    103         TagState,
    104         StartTagState,
    105         StartTagClassState,
    106         StartTagAnnotationState,
    107         EndTagState,
    108         TimestampTagState,
    109     } state = DataState;
    110 
    111     // 4.8.10.13.4 WebVTT cue text tokenizer
    112     switch (state) {
    113         WEBVTT_BEGIN_STATE(DataState) {
    114             if (cc == '&') {
    115                 buffer.append(static_cast<LChar>(cc));
    116                 WEBVTT_ADVANCE_TO(EscapeState);
    117             } else if (cc == '<') {
    118                 if (result.isEmpty()) {
    119                     WEBVTT_ADVANCE_TO(TagState);
    120                 } else {
    121                     // We don't want to advance input or perform a state transition - just return a (new) token.
    122                     // (On the next call to nextToken we will see '<' again, but take the other branch in this if instead.)
    123                     return emitToken(token, VTTToken::StringToken(result.toString()));
    124                 }
    125             } else if (cc == kEndOfFileMarker) {
    126                 return advanceAndEmitToken(m_input, token, VTTToken::StringToken(result.toString()));
    127             } else {
    128                 result.append(cc);
    129                 WEBVTT_ADVANCE_TO(DataState);
    130             }
    131         }
    132         END_STATE()
    133 
    134         WEBVTT_BEGIN_STATE(EscapeState) {
    135             if (cc == ';') {
    136                 if (equalLiteral(buffer, "&amp")) {
    137                     result.append('&');
    138                 } else if (equalLiteral(buffer, "&lt")) {
    139                     result.append('<');
    140                 } else if (equalLiteral(buffer, "&gt")) {
    141                     result.append('>');
    142                 } else if (equalLiteral(buffer, "&lrm")) {
    143                     result.append(leftToRightMark);
    144                 } else if (equalLiteral(buffer, "&rlm")) {
    145                     result.append(rightToLeftMark);
    146                 } else if (equalLiteral(buffer, "&nbsp")) {
    147                     result.append(noBreakSpace);
    148                 } else {
    149                     buffer.append(static_cast<LChar>(cc));
    150                     result.append(buffer);
    151                 }
    152                 buffer.clear();
    153                 WEBVTT_ADVANCE_TO(DataState);
    154             } else if (isASCIIAlphanumeric(cc)) {
    155                 buffer.append(static_cast<LChar>(cc));
    156                 WEBVTT_ADVANCE_TO(EscapeState);
    157             } else if (cc == '<') {
    158                 result.append(buffer);
    159                 return emitToken(token, VTTToken::StringToken(result.toString()));
    160             } else if (cc == kEndOfFileMarker) {
    161                 result.append(buffer);
    162                 return advanceAndEmitToken(m_input, token, VTTToken::StringToken(result.toString()));
    163             } else {
    164                 result.append(buffer);
    165                 buffer.clear();
    166 
    167                 if (cc == '&') {
    168                     buffer.append(static_cast<LChar>(cc));
    169                     WEBVTT_ADVANCE_TO(EscapeState);
    170                 }
    171                 result.append(cc);
    172                 WEBVTT_ADVANCE_TO(DataState);
    173             }
    174         }
    175         END_STATE()
    176 
    177         WEBVTT_BEGIN_STATE(TagState) {
    178             if (isTokenizerWhitespace(cc)) {
    179                 ASSERT(result.isEmpty());
    180                 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
    181             } else if (cc == '.') {
    182                 ASSERT(result.isEmpty());
    183                 WEBVTT_ADVANCE_TO(StartTagClassState);
    184             } else if (cc == '/') {
    185                 WEBVTT_ADVANCE_TO(EndTagState);
    186             } else if (WTF::isASCIIDigit(cc)) {
    187                 result.append(cc);
    188                 WEBVTT_ADVANCE_TO(TimestampTagState);
    189             } else if (cc == '>' || cc == kEndOfFileMarker) {
    190                 ASSERT(result.isEmpty());
    191                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString()));
    192             } else {
    193                 result.append(cc);
    194                 WEBVTT_ADVANCE_TO(StartTagState);
    195             }
    196         }
    197         END_STATE()
    198 
    199         WEBVTT_BEGIN_STATE(StartTagState) {
    200             if (isTokenizerWhitespace(cc)) {
    201                 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
    202             } else if (cc == '.') {
    203                 WEBVTT_ADVANCE_TO(StartTagClassState);
    204             } else if (cc == '>' || cc == kEndOfFileMarker) {
    205                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString()));
    206             } else {
    207                 result.append(cc);
    208                 WEBVTT_ADVANCE_TO(StartTagState);
    209             }
    210         }
    211         END_STATE()
    212 
    213         WEBVTT_BEGIN_STATE(StartTagClassState) {
    214             if (isTokenizerWhitespace(cc)) {
    215                 addNewClass(classes, buffer);
    216                 buffer.clear();
    217                 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
    218             } else if (cc == '.') {
    219                 addNewClass(classes, buffer);
    220                 buffer.clear();
    221                 WEBVTT_ADVANCE_TO(StartTagClassState);
    222             } else if (cc == '>' || cc == kEndOfFileMarker) {
    223                 addNewClass(classes, buffer);
    224                 buffer.clear();
    225                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString(), classes.toAtomicString()));
    226             } else {
    227                 buffer.append(cc);
    228                 WEBVTT_ADVANCE_TO(StartTagClassState);
    229             }
    230         }
    231         END_STATE()
    232 
    233         WEBVTT_BEGIN_STATE(StartTagAnnotationState) {
    234             if (cc == '>' || cc == kEndOfFileMarker) {
    235                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString(), classes.toAtomicString(), buffer.toAtomicString()));
    236             }
    237             buffer.append(cc);
    238             WEBVTT_ADVANCE_TO(StartTagAnnotationState);
    239         }
    240         END_STATE()
    241 
    242         WEBVTT_BEGIN_STATE(EndTagState) {
    243             if (cc == '>' || cc == kEndOfFileMarker)
    244                 return advanceAndEmitToken(m_input, token, VTTToken::EndTag(result.toString()));
    245             result.append(cc);
    246             WEBVTT_ADVANCE_TO(EndTagState);
    247         }
    248         END_STATE()
    249 
    250         WEBVTT_BEGIN_STATE(TimestampTagState) {
    251             if (cc == '>' || cc == kEndOfFileMarker)
    252                 return advanceAndEmitToken(m_input, token, VTTToken::TimestampTag(result.toString()));
    253             result.append(cc);
    254             WEBVTT_ADVANCE_TO(TimestampTagState);
    255         }
    256         END_STATE()
    257 
    258     }
    259 
    260     ASSERT_NOT_REACHED();
    261     return false;
    262 }
    263 
    264 }
    265 
    266