Home | History | Annotate | Download | only in track
      1 /*
      2  * Copyright (C) 2011 Google Inc.  All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "core/html/track/WebVTTParser.h"
     33 
     34 #include "core/dom/ProcessingInstruction.h"
     35 #include "core/dom/Text.h"
     36 #include "core/html/track/WebVTTElement.h"
     37 #include "core/platform/text/SegmentedString.h"
     38 #include "wtf/text/WTFString.h"
     39 
     40 namespace WebCore {
     41 
     42 const double secondsPerHour = 3600;
     43 const double secondsPerMinute = 60;
     44 const double secondsPerMillisecond = 0.001;
     45 const double malformedTime = -1;
     46 const unsigned bomLength = 3;
     47 const unsigned fileIdentifierLength = 6;
     48 
     49 String WebVTTParser::collectDigits(const String& input, unsigned* position)
     50 {
     51     StringBuilder digits;
     52     while (*position < input.length() && isASCIIDigit(input[*position]))
     53         digits.append(input[(*position)++]);
     54     return digits.toString();
     55 }
     56 
     57 String WebVTTParser::collectWord(const String& input, unsigned* position)
     58 {
     59     StringBuilder string;
     60     while (*position < input.length() && !isASpace(input[*position]))
     61         string.append(input[(*position)++]);
     62     return string.toString();
     63 }
     64 
     65 #if ENABLE(WEBVTT_REGIONS)
     66 float WebVTTParser::parseFloatPercentageValue(const String& value, bool& isValidSetting)
     67 {
     68     // '%' must be present and at the end of the setting value.
     69     if (value.find('%', 1) != value.length() - 1) {
     70         isValidSetting = false;
     71         return 0;
     72     }
     73 
     74     unsigned position = 0;
     75 
     76     StringBuilder floatNumberAsString;
     77     floatNumberAsString.append(WebVTTParser::collectDigits(value, &position));
     78 
     79     if (value[position] == '.') {
     80         floatNumberAsString.append(".");
     81         position++;
     82 
     83         floatNumberAsString.append(WebVTTParser::collectDigits(value, &position));
     84     }
     85     float number = floatNumberAsString.toString().toFloat(&isValidSetting);
     86 
     87     if (isValidSetting && (number <= 0 || number >= 100))
     88         isValidSetting = false;
     89 
     90     return number;
     91 }
     92 
     93 FloatPoint WebVTTParser::parseFloatPercentageValuePair(const String& value, char delimiter, bool& isValidSetting)
     94 {
     95     // The delimiter can't be the first or second value because a pair of
     96     // percentages (x%,y%) implies that at least the first two characters
     97     // are the first percentage value.
     98     size_t delimiterOffset = value.find(delimiter, 2);
     99     if (delimiterOffset == notFound || delimiterOffset == value.length() - 1) {
    100         isValidSetting = false;
    101         return FloatPoint(0, 0);
    102     }
    103 
    104     bool isFirstValueValid;
    105     float firstCoord = parseFloatPercentageValue(value.substring(0, delimiterOffset), isFirstValueValid);
    106 
    107     bool isSecondValueValid;
    108     float secondCoord = parseFloatPercentageValue(value.substring(delimiterOffset + 1, value.length() - 1), isSecondValueValid);
    109 
    110     isValidSetting = isFirstValueValid && isSecondValueValid;
    111     return FloatPoint(firstCoord, secondCoord);
    112 }
    113 #endif
    114 
    115 WebVTTParser::WebVTTParser(WebVTTParserClient* client, ScriptExecutionContext* context)
    116     : m_scriptExecutionContext(context)
    117     , m_state(Initial)
    118     , m_currentStartTime(0)
    119     , m_currentEndTime(0)
    120     , m_tokenizer(WebVTTTokenizer::create())
    121     , m_client(client)
    122 {
    123 }
    124 
    125 void WebVTTParser::getNewCues(Vector<RefPtr<TextTrackCue> >& outputCues)
    126 {
    127     outputCues = m_cuelist;
    128     m_cuelist.clear();
    129 }
    130 
    131 #if ENABLE(WEBVTT_REGIONS)
    132 void WebVTTParser::getNewRegions(Vector<RefPtr<TextTrackRegion> >& outputRegions)
    133 {
    134     outputRegions = m_regionList;
    135     m_regionList.clear();
    136 }
    137 #endif
    138 
    139 void WebVTTParser::parseBytes(const char* data, unsigned length)
    140 {
    141     // 4.8.10.13.3 WHATWG WebVTT Parser algorithm.
    142     // 1-3 - Initial setup.
    143     unsigned position = 0;
    144 
    145     while (position < length) {
    146         String line = collectNextLine(data, length, &position);
    147 
    148         switch (m_state) {
    149         case Initial:
    150             // Buffer up at least 9 bytes before proceeding with checking for the file identifier.
    151             m_identifierData.append(data, length);
    152             if (m_identifierData.size() < bomLength + fileIdentifierLength)
    153                 return;
    154 
    155             // 4-12 - Collect the first line and check for "WEBVTT".
    156             if (!hasRequiredFileIdentifier()) {
    157                 if (m_client)
    158                     m_client->fileFailedToParse();
    159                 return;
    160             }
    161 
    162             m_state = Header;
    163             m_identifierData.clear();
    164             break;
    165 
    166         case Header:
    167             // 13-18 - Allow a header (comment area) under the WEBVTT line.
    168 #if ENABLE(WEBVTT_REGIONS)
    169             if (line.isEmpty()) {
    170                 if (m_client && m_regionList.size())
    171                     m_client->newRegionsParsed();
    172 
    173                 m_state = Id;
    174                 break;
    175             }
    176             collectHeader(line);
    177 
    178             break;
    179 
    180         case Metadata:
    181 #endif
    182             if (line.isEmpty())
    183                 m_state = Id;
    184             break;
    185 
    186         case Id:
    187             // 19-29 - Allow any number of line terminators, then initialize new cue values.
    188             if (line.isEmpty())
    189                 break;
    190             resetCueValues();
    191 
    192             // 30-39 - Check if this line contains an optional identifier or timing data.
    193             m_state = collectCueId(line);
    194             break;
    195 
    196         case TimingsAndSettings:
    197             // 40 - Collect cue timings and settings.
    198             m_state = collectTimingsAndSettings(line);
    199             break;
    200 
    201         case CueText:
    202             // 41-53 - Collect the cue text, create a cue, and add it to the output.
    203             m_state = collectCueText(line, length, position);
    204             break;
    205 
    206         case BadCue:
    207             // 54-62 - Collect and discard the remaining cue.
    208             m_state = ignoreBadCue(line);
    209             break;
    210         }
    211     }
    212 }
    213 
    214 bool WebVTTParser::hasRequiredFileIdentifier()
    215 {
    216     // A WebVTT file identifier consists of an optional BOM character,
    217     // the string "WEBVTT" followed by an optional space or tab character,
    218     // and any number of characters that are not line terminators ...
    219     unsigned position = 0;
    220     if (m_identifierData.size() >= bomLength && m_identifierData[0] == '\xEF' && m_identifierData[1] == '\xBB' && m_identifierData[2] == '\xBF')
    221         position += bomLength;
    222     String line = collectNextLine(m_identifierData.data(), m_identifierData.size(), &position);
    223 
    224     if (line.length() < fileIdentifierLength)
    225         return false;
    226     if (line.substring(0, fileIdentifierLength) != "WEBVTT")
    227         return false;
    228     if (line.length() > fileIdentifierLength && line[fileIdentifierLength] != ' ' && line[fileIdentifierLength] != '\t')
    229         return false;
    230 
    231     return true;
    232 }
    233 
    234 #if ENABLE(WEBVTT_REGIONS)
    235 void WebVTTParser::collectHeader(const String& line)
    236 {
    237     // 4.1 Extension of WebVTT header parsing (11 - 15)
    238     DEFINE_STATIC_LOCAL(const AtomicString, regionHeaderName, ("Region", AtomicString::ConstructFromLiteral));
    239 
    240     // 15.4 If line contains the character ":" (A U+003A COLON), then set metadata's
    241     // name to the substring of line before the first ":" character and
    242     // metadata's value to the substring after this character.
    243     if (!line.contains(":"))
    244         return;
    245 
    246     unsigned colonPosition = line.find(":");
    247     m_currentHeaderName = line.substring(0, colonPosition);
    248 
    249     // 15.5 If metadata's name equals "Region":
    250     if (m_currentHeaderName == regionHeaderName) {
    251         m_currentHeaderValue = line.substring(colonPosition + 1, line.length() - 1);
    252         // 15.5.1 - 15.5.8 Region creation: Let region be a new text track region [...]
    253         createNewRegion();
    254     }
    255 }
    256 #endif
    257 
    258 WebVTTParser::ParseState WebVTTParser::collectCueId(const String& line)
    259 {
    260     if (line.contains("-->"))
    261         return collectTimingsAndSettings(line);
    262     m_currentId = line;
    263     return TimingsAndSettings;
    264 }
    265 
    266 WebVTTParser::ParseState WebVTTParser::collectTimingsAndSettings(const String& line)
    267 {
    268     // 4.8.10.13.3 Collect WebVTT cue timings and settings.
    269     // 1-3 - Let input be the string being parsed and position be a pointer into input
    270     unsigned position = 0;
    271     skipWhiteSpace(line, &position);
    272 
    273     // 4-5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time.
    274     m_currentStartTime = collectTimeStamp(line, &position);
    275     if (m_currentStartTime == malformedTime)
    276         return BadCue;
    277     if (position >= line.length())
    278         return BadCue;
    279 
    280     skipWhiteSpace(line, &position);
    281 
    282     // 6-9 - If the next three characters are not "-->", abort and return failure.
    283     if (line.find("-->", position) == notFound)
    284         return BadCue;
    285     position += 3;
    286     if (position >= line.length())
    287         return BadCue;
    288 
    289     skipWhiteSpace(line, &position);
    290 
    291     // 10-11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time.
    292     m_currentEndTime = collectTimeStamp(line, &position);
    293     if (m_currentEndTime == malformedTime)
    294         return BadCue;
    295     skipWhiteSpace(line, &position);
    296 
    297     // 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue).
    298     m_currentSettings = line.substring(position, line.length()-1);
    299     return CueText;
    300 }
    301 
    302 WebVTTParser::ParseState WebVTTParser::collectCueText(const String& line, unsigned length, unsigned position)
    303 {
    304     if (line.isEmpty()) {
    305         createNewCue();
    306         return Id;
    307     }
    308     if (!m_currentContent.isEmpty())
    309         m_currentContent.append("\n");
    310     m_currentContent.append(line);
    311 
    312     if (position >= length)
    313         createNewCue();
    314 
    315     return CueText;
    316 }
    317 
    318 WebVTTParser::ParseState WebVTTParser::ignoreBadCue(const String& line)
    319 {
    320     if (!line.isEmpty())
    321         return BadCue;
    322     return Id;
    323 }
    324 
    325 PassRefPtr<DocumentFragment>  WebVTTParser::createDocumentFragmentFromCueText(const String& text)
    326 {
    327     // Cue text processing based on
    328     // 4.8.10.13.4 WebVTT cue text parsing rules and
    329     // 4.8.10.13.5 WebVTT cue text DOM construction rules.
    330 
    331     ASSERT(m_scriptExecutionContext->isDocument());
    332     Document* document = toDocument(m_scriptExecutionContext);
    333 
    334     RefPtr<DocumentFragment> fragment = DocumentFragment::create(document);
    335 
    336     if (!text.length()) {
    337         fragment->parserAppendChild(Text::create(document, ""));
    338         return fragment;
    339     }
    340 
    341     m_currentNode = fragment;
    342     m_tokenizer->reset();
    343     m_token.clear();
    344 
    345     m_languageStack.clear();
    346     SegmentedString content(text);
    347     while (m_tokenizer->nextToken(content, m_token))
    348         constructTreeFromToken(document);
    349 
    350     return fragment.release();
    351 }
    352 
    353 void WebVTTParser::createNewCue()
    354 {
    355     if (!m_currentContent.length())
    356         return;
    357 
    358     RefPtr<TextTrackCue> cue = TextTrackCue::create(m_scriptExecutionContext, m_currentStartTime, m_currentEndTime, m_currentContent.toString());
    359     cue->setId(m_currentId);
    360     cue->setCueSettings(m_currentSettings);
    361 
    362     m_cuelist.append(cue);
    363     if (m_client)
    364         m_client->newCuesParsed();
    365 }
    366 
    367 void WebVTTParser::resetCueValues()
    368 {
    369     m_currentId = emptyString();
    370     m_currentSettings = emptyString();
    371     m_currentStartTime = 0;
    372     m_currentEndTime = 0;
    373     m_currentContent.clear();
    374 }
    375 
    376 #if ENABLE(WEBVTT_REGIONS)
    377 void WebVTTParser::createNewRegion()
    378 {
    379     if (!m_currentHeaderValue.length())
    380         return;
    381 
    382     RefPtr<TextTrackRegion> region = TextTrackRegion::create(m_scriptExecutionContext);
    383     region->setRegionSettings(m_currentHeaderValue);
    384 
    385     // 15.5.10 If the text track list of regions regions contains a region
    386     // with the same region identifier value as region, remove that region.
    387     for (size_t i = 0; i < m_regionList.size(); ++i)
    388         if (m_regionList[i]->id() == region->id()) {
    389             m_regionList.remove(i);
    390             break;
    391         }
    392 
    393     m_regionList.append(region);
    394 }
    395 #endif
    396 
    397 double WebVTTParser::collectTimeStamp(const String& line, unsigned* position)
    398 {
    399     // 4.8.10.13.3 Collect a WebVTT timestamp.
    400     // 1-4 - Initial checks, let most significant units be minutes.
    401     enum Mode { minutes, hours };
    402     Mode mode = minutes;
    403     if (*position >= line.length() || !isASCIIDigit(line[*position]))
    404         return malformedTime;
    405 
    406     // 5-6 - Collect a sequence of characters that are 0-9.
    407     String digits1 = collectDigits(line, position);
    408     int value1 = digits1.toInt();
    409 
    410     // 7 - If not 2 characters or value is greater than 59, interpret as hours.
    411     if (digits1.length() != 2 || value1 > 59)
    412         mode = hours;
    413 
    414     // 8-12 - Collect the next sequence of 0-9 after ':' (must be 2 chars).
    415     if (*position >= line.length() || line[(*position)++] != ':')
    416         return malformedTime;
    417     if (*position >= line.length() || !isASCIIDigit(line[(*position)]))
    418         return malformedTime;
    419     String digits2 = collectDigits(line, position);
    420     int value2 = digits2.toInt();
    421     if (digits2.length() != 2)
    422         return malformedTime;
    423 
    424     // 13 - Detect whether this timestamp includes hours.
    425     int value3;
    426     if (mode == hours || (*position < line.length() && line[*position] == ':')) {
    427         if (*position >= line.length() || line[(*position)++] != ':')
    428             return malformedTime;
    429         if (*position >= line.length() || !isASCIIDigit(line[*position]))
    430             return malformedTime;
    431         String digits3 = collectDigits(line, position);
    432         if (digits3.length() != 2)
    433             return malformedTime;
    434         value3 = digits3.toInt();
    435     } else {
    436         value3 = value2;
    437         value2 = value1;
    438         value1 = 0;
    439     }
    440 
    441     // 14-19 - Collect next sequence of 0-9 after '.' (must be 3 chars).
    442     if (*position >= line.length() || line[(*position)++] != '.')
    443         return malformedTime;
    444     if (*position >= line.length() || !isASCIIDigit(line[*position]))
    445         return malformedTime;
    446     String digits4 = collectDigits(line, position);
    447     if (digits4.length() != 3)
    448         return malformedTime;
    449     int value4 = digits4.toInt();
    450     if (value2 > 59 || value3 > 59)
    451         return malformedTime;
    452 
    453     // 20-21 - Calculate result.
    454     return (value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond);
    455 }
    456 
    457 static WebVTTNodeType tokenToNodeType(WebVTTToken& token)
    458 {
    459     switch (token.name().size()) {
    460     case 1:
    461         if (token.name()[0] == 'c')
    462             return WebVTTNodeTypeClass;
    463         if (token.name()[0] == 'v')
    464             return WebVTTNodeTypeVoice;
    465         if (token.name()[0] == 'b')
    466             return WebVTTNodeTypeBold;
    467         if (token.name()[0] == 'i')
    468             return WebVTTNodeTypeItalic;
    469         if (token.name()[0] == 'u')
    470             return WebVTTNodeTypeUnderline;
    471         break;
    472     case 2:
    473         if (token.name()[0] == 'r' && token.name()[1] == 't')
    474             return WebVTTNodeTypeRubyText;
    475         break;
    476     case 4:
    477         if (token.name()[0] == 'r' && token.name()[1] == 'u' && token.name()[2] == 'b' && token.name()[3] == 'y')
    478             return WebVTTNodeTypeRuby;
    479         if (token.name()[0] == 'l' && token.name()[1] == 'a' && token.name()[2] == 'n' && token.name()[3] == 'g')
    480             return WebVTTNodeTypeLanguage;
    481         break;
    482     }
    483     return WebVTTNodeTypeNone;
    484 }
    485 
    486 void WebVTTParser::constructTreeFromToken(Document* document)
    487 {
    488     QualifiedName tagName(nullAtom, AtomicString(m_token.name()), xhtmlNamespaceURI);
    489 
    490     // http://dev.w3.org/html5/webvtt/#webvtt-cue-text-dom-construction-rules
    491 
    492     switch (m_token.type()) {
    493     case WebVTTTokenTypes::Character: {
    494         String content(m_token.characters()); // FIXME: This should be 8bit if possible.
    495         RefPtr<Text> child = Text::create(document, content);
    496         m_currentNode->parserAppendChild(child);
    497         break;
    498     }
    499     case WebVTTTokenTypes::StartTag: {
    500         RefPtr<WebVTTElement> child;
    501         WebVTTNodeType nodeType = tokenToNodeType(m_token);
    502         if (nodeType != WebVTTNodeTypeNone)
    503             child = WebVTTElement::create(nodeType, document);
    504         if (child) {
    505             if (m_token.classes().size() > 0)
    506                 child->setAttribute(classAttr, AtomicString(m_token.classes()));
    507 
    508             if (child->webVTTNodeType() == WebVTTNodeTypeVoice)
    509                 child->setAttribute(WebVTTElement::voiceAttributeName(), AtomicString(m_token.annotation()));
    510             else if (child->webVTTNodeType() == WebVTTNodeTypeLanguage) {
    511                 m_languageStack.append(AtomicString(m_token.annotation()));
    512                 child->setAttribute(WebVTTElement::langAttributeName(), m_languageStack.last());
    513             }
    514             if (!m_languageStack.isEmpty())
    515                 child->setLanguage(m_languageStack.last());
    516             m_currentNode->parserAppendChild(child);
    517             m_currentNode = child;
    518         }
    519         break;
    520     }
    521     case WebVTTTokenTypes::EndTag: {
    522         WebVTTNodeType nodeType = tokenToNodeType(m_token);
    523         if (nodeType != WebVTTNodeTypeNone) {
    524             if (nodeType == WebVTTNodeTypeLanguage && m_currentNode->isWebVTTElement() && toWebVTTElement(m_currentNode.get())->webVTTNodeType() == WebVTTNodeTypeLanguage)
    525                 m_languageStack.removeLast();
    526             if (m_currentNode->parentNode())
    527                 m_currentNode = m_currentNode->parentNode();
    528         }
    529         break;
    530     }
    531     case WebVTTTokenTypes::TimestampTag: {
    532         unsigned position = 0;
    533         String charactersString(StringImpl::create8BitIfPossible(m_token.characters()));
    534         double time = collectTimeStamp(charactersString, &position);
    535         if (time != malformedTime)
    536             m_currentNode->parserAppendChild(ProcessingInstruction::create(document, "timestamp", charactersString));
    537         break;
    538     }
    539     default:
    540         break;
    541     }
    542     m_token.clear();
    543 }
    544 
    545 void WebVTTParser::skipWhiteSpace(const String& line, unsigned* position)
    546 {
    547     while (*position < line.length() && isASpace(line[*position]))
    548         (*position)++;
    549 }
    550 
    551 void WebVTTParser::skipLineTerminator(const char* data, unsigned length, unsigned* position)
    552 {
    553     if (*position >= length)
    554         return;
    555     if (data[*position] == '\r')
    556         (*position)++;
    557     if (*position >= length)
    558         return;
    559     if (data[*position] == '\n')
    560         (*position)++;
    561 }
    562 
    563 String WebVTTParser::collectNextLine(const char* data, unsigned length, unsigned* position)
    564 {
    565     unsigned oldPosition = *position;
    566     while (*position < length && data[*position] != '\r' && data[*position] != '\n')
    567         (*position)++;
    568     String line = String::fromUTF8(data + oldPosition, *position - oldPosition);
    569     skipLineTerminator(data, length, position);
    570     return line;
    571 }
    572 
    573 }
    574 
    575