Home | History | Annotate | Download | only in vtt
      1 /*
      2  * Copyright (C) 2011 Google Inc.  All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "core/html/track/vtt/VTTParser.h"
     33 
     34 #include "core/dom/Document.h"
     35 #include "core/dom/ProcessingInstruction.h"
     36 #include "core/dom/Text.h"
     37 #include "core/html/track/vtt/VTTElement.h"
     38 #include "platform/text/SegmentedString.h"
     39 #include "wtf/text/WTFString.h"
     40 
     41 namespace WebCore {
     42 
     43 const double secondsPerHour = 3600;
     44 const double secondsPerMinute = 60;
     45 const double secondsPerMillisecond = 0.001;
     46 const unsigned fileIdentifierLength = 6;
     47 
     48 static unsigned scanDigits(const String& input, unsigned* position)
     49 {
     50     unsigned startPosition = *position;
     51     while (*position < input.length() && isASCIIDigit(input[*position]))
     52         (*position)++;
     53     return *position - startPosition;
     54 }
     55 
     56 unsigned VTTParser::collectDigitsToInt(const String& input, unsigned* position, int& number)
     57 {
     58     unsigned startPosition = *position;
     59     unsigned numDigits = scanDigits(input, position);
     60     if (!numDigits) {
     61         number = 0;
     62         return 0;
     63     }
     64     bool validNumber;
     65     if (input.is8Bit())
     66         number = charactersToInt(input.characters8() + startPosition, numDigits, &validNumber);
     67     else
     68         number = charactersToInt(input.characters16() + startPosition, numDigits, &validNumber);
     69 
     70     // Since we know that scanDigits only scanned valid (ASCII) digits (and
     71     // hence that's what got passed to charactersToInt()), the remaining
     72     // failure mode for charactersToInt() is overflow, so if |validNumber| is
     73     // not true, then set |number| to the maximum int value.
     74     if (!validNumber)
     75         number = std::numeric_limits<int>::max();
     76     return numDigits;
     77 }
     78 
     79 String VTTParser::collectWord(const String& input, unsigned* position)
     80 {
     81     StringBuilder string;
     82     while (*position < input.length() && !isASpace(input[*position]))
     83         string.append(input[(*position)++]);
     84     return string.toString();
     85 }
     86 
     87 void VTTParser::skipWhiteSpace(const String& line, unsigned* position)
     88 {
     89     while (*position < line.length() && isASpace(line[*position]))
     90         (*position)++;
     91 }
     92 
     93 bool VTTParser::parseFloatPercentageValue(const String& value, float& percentage)
     94 {
     95     // '%' must be present and at the end of the setting value.
     96     if (value.isEmpty() || value[value.length() - 1] != '%')
     97         return false;
     98 
     99     unsigned position = 0;
    100     unsigned digitsBeforeDot = scanDigits(value, &position);
    101     unsigned digitsAfterDot = 0;
    102     if (value[position] == '.') {
    103         position++;
    104 
    105         digitsAfterDot = scanDigits(value, &position);
    106     }
    107 
    108     // At least one digit required.
    109     if (!digitsBeforeDot && !digitsAfterDot)
    110         return false;
    111 
    112     float number = value.toFloat();
    113     if (number < 0 || number > 100)
    114         return false;
    115 
    116     percentage = number;
    117     return true;
    118 }
    119 
    120 bool VTTParser::parseFloatPercentageValuePair(const String& value, char delimiter, FloatPoint& valuePair)
    121 {
    122     // The delimiter can't be the first or second value because a pair of
    123     // percentages (x%,y%) implies that at least the first two characters
    124     // are the first percentage value.
    125     size_t delimiterOffset = value.find(delimiter, 2);
    126     if (delimiterOffset == kNotFound || delimiterOffset == value.length() - 1)
    127         return false;
    128 
    129     float firstCoord;
    130     if (!parseFloatPercentageValue(value.substring(0, delimiterOffset), firstCoord))
    131         return false;
    132 
    133     float secondCoord;
    134     if (!parseFloatPercentageValue(value.substring(delimiterOffset + 1, value.length() - 1), secondCoord))
    135         return false;
    136 
    137     valuePair = FloatPoint(firstCoord, secondCoord);
    138     return true;
    139 }
    140 
    141 VTTParser::VTTParser(VTTParserClient* client, Document& document)
    142     : m_document(&document)
    143     , m_state(Initial)
    144     , m_decoder(TextResourceDecoder::create("text/plain", UTF8Encoding()))
    145     , m_currentStartTime(0)
    146     , m_currentEndTime(0)
    147     , m_client(client)
    148 {
    149 }
    150 
    151 void VTTParser::getNewCues(Vector<RefPtr<VTTCue> >& outputCues)
    152 {
    153     outputCues = m_cuelist;
    154     m_cuelist.clear();
    155 }
    156 
    157 void VTTParser::getNewRegions(Vector<RefPtr<VTTRegion> >& outputRegions)
    158 {
    159     outputRegions = m_regionList;
    160     m_regionList.clear();
    161 }
    162 
    163 void VTTParser::parseBytes(const char* data, unsigned length)
    164 {
    165     String textData = m_decoder->decode(data, length);
    166     m_lineReader.append(textData);
    167     parse();
    168 }
    169 
    170 void VTTParser::flush()
    171 {
    172     String textData = m_decoder->flush();
    173     m_lineReader.append(textData);
    174     m_lineReader.setEndOfStream();
    175     parse();
    176     flushPendingCue();
    177 }
    178 
    179 void VTTParser::parse()
    180 {
    181     // WebVTT parser algorithm. (5.1 WebVTT file parsing.)
    182     // Steps 1 - 3 - Initial setup.
    183 
    184     String line;
    185     while (m_lineReader.getLine(line)) {
    186         switch (m_state) {
    187         case Initial:
    188             // Steps 4 - 9 - Check for a valid WebVTT signature.
    189             if (!hasRequiredFileIdentifier(line)) {
    190                 if (m_client)
    191                     m_client->fileFailedToParse();
    192                 return;
    193             }
    194 
    195             m_state = Header;
    196             break;
    197 
    198         case Header:
    199             // Steps 10 - 14 - Allow a header (comment area) under the WEBVTT line.
    200             collectMetadataHeader(line);
    201 
    202             if (line.isEmpty()) {
    203                 if (m_client && m_regionList.size())
    204                     m_client->newRegionsParsed();
    205 
    206                 m_state = Id;
    207                 break;
    208             }
    209 
    210             // Step 15 - Break out of header loop if the line could be a timestamp line.
    211             if (line.contains("-->"))
    212                 m_state = recoverCue(line);
    213 
    214             // Step 16 - Line is not the empty string and does not contain "-->".
    215             break;
    216 
    217         case Id:
    218             // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values.
    219             if (line.isEmpty())
    220                 break;
    221 
    222             // Step 21 - Cue creation (start a new cue).
    223             resetCueValues();
    224 
    225             // Steps 22 - 25 - Check if this line contains an optional identifier or timing data.
    226             m_state = collectCueId(line);
    227             break;
    228 
    229         case TimingsAndSettings:
    230             // Steps 26 - 27 - Discard current cue if the line is empty.
    231             if (line.isEmpty()) {
    232                 m_state = Id;
    233                 break;
    234             }
    235 
    236             // Steps 28 - 29 - Collect cue timings and settings.
    237             m_state = collectTimingsAndSettings(line);
    238             break;
    239 
    240         case CueText:
    241             // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output.
    242             m_state = collectCueText(line);
    243             break;
    244 
    245         case BadCue:
    246             // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen.
    247             m_state = ignoreBadCue(line);
    248             break;
    249         }
    250     }
    251 }
    252 
    253 void VTTParser::flushPendingCue()
    254 {
    255     ASSERT(m_lineReader.isAtEndOfStream());
    256     // If we're in the CueText state when we run out of data, we emit the pending cue.
    257     if (m_state == CueText)
    258         createNewCue();
    259 }
    260 
    261 bool VTTParser::hasRequiredFileIdentifier(const String& line)
    262 {
    263     // A WebVTT file identifier consists of an optional BOM character,
    264     // the string "WEBVTT" followed by an optional space or tab character,
    265     // and any number of characters that are not line terminators ...
    266     if (!line.startsWith("WEBVTT", fileIdentifierLength))
    267         return false;
    268     if (line.length() > fileIdentifierLength && !isASpace(line[fileIdentifierLength]))
    269         return false;
    270 
    271     return true;
    272 }
    273 
    274 void VTTParser::collectMetadataHeader(const String& line)
    275 {
    276     // WebVTT header parsing (WebVTT parser algorithm step 12)
    277     DEFINE_STATIC_LOCAL(const AtomicString, regionHeaderName, ("Region", AtomicString::ConstructFromLiteral));
    278 
    279     // The only currently supported header is the "Region" header.
    280     if (!RuntimeEnabledFeatures::webVTTRegionsEnabled())
    281         return;
    282 
    283     // Step 12.4 If line contains the character ":" (A U+003A COLON), then set metadata's
    284     // name to the substring of line before the first ":" character and
    285     // metadata's value to the substring after this character.
    286     size_t colonPosition = line.find(':');
    287     if (colonPosition == kNotFound)
    288         return;
    289 
    290     String headerName = line.substring(0, colonPosition);
    291 
    292     // Steps 12.5 If metadata's name equals "Region":
    293     if (headerName == regionHeaderName) {
    294         String headerValue = line.substring(colonPosition + 1);
    295         // Steps 12.5.1 - 12.5.11 Region creation: Let region be a new text track region [...]
    296         createNewRegion(headerValue);
    297     }
    298 }
    299 
    300 VTTParser::ParseState VTTParser::collectCueId(const String& line)
    301 {
    302     if (line.contains("-->"))
    303         return collectTimingsAndSettings(line);
    304     m_currentId = line;
    305     return TimingsAndSettings;
    306 }
    307 
    308 VTTParser::ParseState VTTParser::collectTimingsAndSettings(const String& line)
    309 {
    310     // Collect WebVTT cue timings and settings. (5.3 WebVTT cue timings and settings parsing.)
    311     // Steps 1 - 3 - Let input be the string being parsed and position be a pointer into input.
    312     unsigned position = 0;
    313     skipWhiteSpace(line, &position);
    314 
    315     // Steps 4 - 5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time.
    316     if (!collectTimeStamp(line, &position, m_currentStartTime))
    317         return BadCue;
    318     if (position >= line.length())
    319         return BadCue;
    320 
    321     skipWhiteSpace(line, &position);
    322 
    323     // Steps 6 - 9 - If the next three characters are not "-->", abort and return failure.
    324     if (line.find("-->", position) == kNotFound)
    325         return BadCue;
    326     position += 3;
    327     if (position >= line.length())
    328         return BadCue;
    329 
    330     skipWhiteSpace(line, &position);
    331 
    332     // Steps 10 - 11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time.
    333     if (!collectTimeStamp(line, &position, m_currentEndTime))
    334         return BadCue;
    335     skipWhiteSpace(line, &position);
    336 
    337     // Step 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue).
    338     m_currentSettings = line.substring(position, line.length()-1);
    339     return CueText;
    340 }
    341 
    342 VTTParser::ParseState VTTParser::collectCueText(const String& line)
    343 {
    344     // Step 34.
    345     if (line.isEmpty()) {
    346         createNewCue();
    347         return Id;
    348     }
    349     // Step 35.
    350     if (line.contains("-->")) {
    351         // Step 39-40.
    352         createNewCue();
    353 
    354         // Step 41 - New iteration of the cue loop.
    355         return recoverCue(line);
    356     }
    357     if (!m_currentContent.isEmpty())
    358         m_currentContent.append("\n");
    359     m_currentContent.append(line);
    360 
    361     return CueText;
    362 }
    363 
    364 VTTParser::ParseState VTTParser::recoverCue(const String& line)
    365 {
    366     // Step 17 and 21.
    367     resetCueValues();
    368 
    369     // Step 22.
    370     return collectTimingsAndSettings(line);
    371 }
    372 
    373 VTTParser::ParseState VTTParser::ignoreBadCue(const String& line)
    374 {
    375     if (line.isEmpty())
    376         return Id;
    377     if (line.contains("-->"))
    378         return recoverCue(line);
    379     return BadCue;
    380 }
    381 
    382 // A helper class for the construction of a "cue fragment" from the cue text.
    383 class VTTTreeBuilder {
    384 public:
    385     VTTTreeBuilder(Document& document)
    386         : m_document(document) { }
    387 
    388     PassRefPtr<DocumentFragment> buildFromString(const String& cueText);
    389 
    390 private:
    391     void constructTreeFromToken(Document&);
    392 
    393     VTTToken m_token;
    394     RefPtr<ContainerNode> m_currentNode;
    395     Vector<AtomicString> m_languageStack;
    396     Document& m_document;
    397 };
    398 
    399 PassRefPtr<DocumentFragment> VTTTreeBuilder::buildFromString(const String& cueText)
    400 {
    401     // Cue text processing based on
    402     // 5.4 WebVTT cue text parsing rules, and
    403     // 5.5 WebVTT cue text DOM construction rules
    404 
    405     RefPtr<DocumentFragment> fragment = DocumentFragment::create(m_document);
    406 
    407     if (cueText.isEmpty()) {
    408         fragment->parserAppendChild(Text::create(m_document, ""));
    409         return fragment;
    410     }
    411 
    412     m_currentNode = fragment;
    413 
    414     VTTTokenizer tokenizer(cueText);
    415     m_languageStack.clear();
    416 
    417     while (tokenizer.nextToken(m_token))
    418         constructTreeFromToken(m_document);
    419 
    420     return fragment.release();
    421 }
    422 
    423 PassRefPtr<DocumentFragment> VTTParser::createDocumentFragmentFromCueText(Document& document, const String& cueText)
    424 {
    425     VTTTreeBuilder treeBuilder(document);
    426     return treeBuilder.buildFromString(cueText);
    427 }
    428 
    429 void VTTParser::createNewCue()
    430 {
    431     RefPtr<VTTCue> cue = VTTCue::create(*m_document, m_currentStartTime, m_currentEndTime, m_currentContent.toString());
    432     cue->setId(m_currentId);
    433     cue->parseSettings(m_currentSettings);
    434 
    435     m_cuelist.append(cue);
    436     if (m_client)
    437         m_client->newCuesParsed();
    438 }
    439 
    440 void VTTParser::resetCueValues()
    441 {
    442     m_currentId = emptyString();
    443     m_currentSettings = emptyString();
    444     m_currentStartTime = 0;
    445     m_currentEndTime = 0;
    446     m_currentContent.clear();
    447 }
    448 
    449 void VTTParser::createNewRegion(const String& headerValue)
    450 {
    451     if (headerValue.isEmpty())
    452         return;
    453 
    454     // Steps 12.5.1 - 12.5.9 - Construct and initialize a WebVTT Region object.
    455     RefPtr<VTTRegion> region = VTTRegion::create();
    456     region->setRegionSettings(headerValue);
    457 
    458     // Step 12.5.10 If the text track list of regions regions contains a region
    459     // with the same region identifier value as region, remove that region.
    460     for (size_t i = 0; i < m_regionList.size(); ++i) {
    461         if (m_regionList[i]->id() == region->id()) {
    462             m_regionList.remove(i);
    463             break;
    464         }
    465     }
    466 
    467     // Step 12.5.11
    468     m_regionList.append(region);
    469 }
    470 
    471 bool VTTParser::collectTimeStamp(const String& line, unsigned* position, double& timeStamp)
    472 {
    473     // Collect a WebVTT timestamp (5.3 WebVTT cue timings and settings parsing.)
    474     // Steps 1 - 4 - Initial checks, let most significant units be minutes.
    475     enum Mode { Minutes, Hours };
    476     Mode mode = Minutes;
    477 
    478     // Steps 5 - 7 - Collect a sequence of characters that are 0-9.
    479     // If not 2 characters or value is greater than 59, interpret as hours.
    480     int value1;
    481     unsigned value1Digits = collectDigitsToInt(line, position, value1);
    482     if (!value1Digits)
    483         return false;
    484     if (value1Digits != 2 || value1 > 59)
    485         mode = Hours;
    486 
    487     // Steps 8 - 11 - Collect the next sequence of 0-9 after ':' (must be 2 chars).
    488     if (*position >= line.length() || line[(*position)++] != ':')
    489         return false;
    490     int value2;
    491     if (collectDigitsToInt(line, position, value2) != 2)
    492         return false;
    493 
    494     // Step 12 - Detect whether this timestamp includes hours.
    495     int value3;
    496     if (mode == Hours || (*position < line.length() && line[*position] == ':')) {
    497         if (*position >= line.length() || line[(*position)++] != ':')
    498             return false;
    499         if (collectDigitsToInt(line, position, value3) != 2)
    500             return false;
    501     } else {
    502         value3 = value2;
    503         value2 = value1;
    504         value1 = 0;
    505     }
    506 
    507     // Steps 13 - 17 - Collect next sequence of 0-9 after '.' (must be 3 chars).
    508     if (*position >= line.length() || line[(*position)++] != '.')
    509         return false;
    510     int value4;
    511     if (collectDigitsToInt(line, position, value4) != 3)
    512         return false;
    513     if (value2 > 59 || value3 > 59)
    514         return false;
    515 
    516     // Steps 18 - 19 - Calculate result.
    517     timeStamp = (value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond);
    518     return true;
    519 }
    520 
    521 static VTTNodeType tokenToNodeType(VTTToken& token)
    522 {
    523     switch (token.name().length()) {
    524     case 1:
    525         if (token.name()[0] == 'c')
    526             return VTTNodeTypeClass;
    527         if (token.name()[0] == 'v')
    528             return VTTNodeTypeVoice;
    529         if (token.name()[0] == 'b')
    530             return VTTNodeTypeBold;
    531         if (token.name()[0] == 'i')
    532             return VTTNodeTypeItalic;
    533         if (token.name()[0] == 'u')
    534             return VTTNodeTypeUnderline;
    535         break;
    536     case 2:
    537         if (token.name()[0] == 'r' && token.name()[1] == 't')
    538             return VTTNodeTypeRubyText;
    539         break;
    540     case 4:
    541         if (token.name()[0] == 'r' && token.name()[1] == 'u' && token.name()[2] == 'b' && token.name()[3] == 'y')
    542             return VTTNodeTypeRuby;
    543         if (token.name()[0] == 'l' && token.name()[1] == 'a' && token.name()[2] == 'n' && token.name()[3] == 'g')
    544             return VTTNodeTypeLanguage;
    545         break;
    546     }
    547     return VTTNodeTypeNone;
    548 }
    549 
    550 void VTTTreeBuilder::constructTreeFromToken(Document& document)
    551 {
    552     // http://dev.w3.org/html5/webvtt/#webvtt-cue-text-dom-construction-rules
    553 
    554     switch (m_token.type()) {
    555     case VTTTokenTypes::Character: {
    556         RefPtr<Text> child = Text::create(document, m_token.characters());
    557         m_currentNode->parserAppendChild(child);
    558         break;
    559     }
    560     case VTTTokenTypes::StartTag: {
    561         VTTNodeType nodeType = tokenToNodeType(m_token);
    562         if (nodeType == VTTNodeTypeNone)
    563             break;
    564 
    565         VTTNodeType currentType = m_currentNode->isVTTElement() ? toVTTElement(m_currentNode.get())->webVTTNodeType() : VTTNodeTypeNone;
    566         // <rt> is only allowed if the current node is <ruby>.
    567         if (nodeType == VTTNodeTypeRubyText && currentType != VTTNodeTypeRuby)
    568             break;
    569 
    570         RefPtr<VTTElement> child = VTTElement::create(nodeType, &document);
    571         if (!m_token.classes().isEmpty())
    572             child->setAttribute(classAttr, m_token.classes());
    573 
    574         if (nodeType == VTTNodeTypeVoice) {
    575             child->setAttribute(VTTElement::voiceAttributeName(), m_token.annotation());
    576         } else if (nodeType == VTTNodeTypeLanguage) {
    577             m_languageStack.append(m_token.annotation());
    578             child->setAttribute(VTTElement::langAttributeName(), m_languageStack.last());
    579         }
    580         if (!m_languageStack.isEmpty())
    581             child->setLanguage(m_languageStack.last());
    582         m_currentNode->parserAppendChild(child);
    583         m_currentNode = child;
    584         break;
    585     }
    586     case VTTTokenTypes::EndTag: {
    587         VTTNodeType nodeType = tokenToNodeType(m_token);
    588         if (nodeType == VTTNodeTypeNone)
    589             break;
    590 
    591         // The only non-VTTElement would be the DocumentFragment root. (Text
    592         // nodes and PIs will never appear as m_currentNode.)
    593         if (!m_currentNode->isVTTElement())
    594             break;
    595 
    596         VTTNodeType currentType = toVTTElement(m_currentNode.get())->webVTTNodeType();
    597         bool matchesCurrent = nodeType == currentType;
    598         if (!matchesCurrent) {
    599             // </ruby> auto-closes <rt>.
    600             if (currentType == VTTNodeTypeRubyText && nodeType == VTTNodeTypeRuby) {
    601                 if (m_currentNode->parentNode())
    602                     m_currentNode = m_currentNode->parentNode();
    603             } else {
    604                 break;
    605             }
    606         }
    607         if (nodeType == VTTNodeTypeLanguage)
    608             m_languageStack.removeLast();
    609         if (m_currentNode->parentNode())
    610             m_currentNode = m_currentNode->parentNode();
    611         break;
    612     }
    613     case VTTTokenTypes::TimestampTag: {
    614         unsigned position = 0;
    615         String charactersString = m_token.characters();
    616         double parsedTimeStamp;
    617         if (VTTParser::collectTimeStamp(charactersString, &position, parsedTimeStamp))
    618             m_currentNode->parserAppendChild(ProcessingInstruction::create(document, "timestamp", charactersString));
    619         break;
    620     }
    621     default:
    622         break;
    623     }
    624 }
    625 
    626 }
    627 
    628