1 /* 2 * Copyright (C) 2011 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "core/html/track/vtt/VTTParser.h" 33 34 #include "core/dom/Document.h" 35 #include "core/dom/ProcessingInstruction.h" 36 #include "core/dom/Text.h" 37 #include "core/html/track/vtt/VTTElement.h" 38 #include "platform/text/SegmentedString.h" 39 #include "wtf/text/WTFString.h" 40 41 namespace WebCore { 42 43 const double secondsPerHour = 3600; 44 const double secondsPerMinute = 60; 45 const double secondsPerMillisecond = 0.001; 46 const unsigned fileIdentifierLength = 6; 47 48 static unsigned scanDigits(const String& input, unsigned* position) 49 { 50 unsigned startPosition = *position; 51 while (*position < input.length() && isASCIIDigit(input[*position])) 52 (*position)++; 53 return *position - startPosition; 54 } 55 56 unsigned VTTParser::collectDigitsToInt(const String& input, unsigned* position, int& number) 57 { 58 unsigned startPosition = *position; 59 unsigned numDigits = scanDigits(input, position); 60 if (!numDigits) { 61 number = 0; 62 return 0; 63 } 64 bool validNumber; 65 if (input.is8Bit()) 66 number = charactersToInt(input.characters8() + startPosition, numDigits, &validNumber); 67 else 68 number = charactersToInt(input.characters16() + startPosition, numDigits, &validNumber); 69 70 // Since we know that scanDigits only scanned valid (ASCII) digits (and 71 // hence that's what got passed to charactersToInt()), the remaining 72 // failure mode for charactersToInt() is overflow, so if |validNumber| is 73 // not true, then set |number| to the maximum int value. 74 if (!validNumber) 75 number = std::numeric_limits<int>::max(); 76 return numDigits; 77 } 78 79 String VTTParser::collectWord(const String& input, unsigned* position) 80 { 81 StringBuilder string; 82 while (*position < input.length() && !isASpace(input[*position])) 83 string.append(input[(*position)++]); 84 return string.toString(); 85 } 86 87 void VTTParser::skipWhiteSpace(const String& line, unsigned* position) 88 { 89 while (*position < line.length() && isASpace(line[*position])) 90 (*position)++; 91 } 92 93 bool VTTParser::parseFloatPercentageValue(const String& value, float& percentage) 94 { 95 // '%' must be present and at the end of the setting value. 96 if (value.isEmpty() || value[value.length() - 1] != '%') 97 return false; 98 99 unsigned position = 0; 100 unsigned digitsBeforeDot = scanDigits(value, &position); 101 unsigned digitsAfterDot = 0; 102 if (value[position] == '.') { 103 position++; 104 105 digitsAfterDot = scanDigits(value, &position); 106 } 107 108 // At least one digit required. 109 if (!digitsBeforeDot && !digitsAfterDot) 110 return false; 111 112 float number = value.toFloat(); 113 if (number < 0 || number > 100) 114 return false; 115 116 percentage = number; 117 return true; 118 } 119 120 bool VTTParser::parseFloatPercentageValuePair(const String& value, char delimiter, FloatPoint& valuePair) 121 { 122 // The delimiter can't be the first or second value because a pair of 123 // percentages (x%,y%) implies that at least the first two characters 124 // are the first percentage value. 125 size_t delimiterOffset = value.find(delimiter, 2); 126 if (delimiterOffset == kNotFound || delimiterOffset == value.length() - 1) 127 return false; 128 129 float firstCoord; 130 if (!parseFloatPercentageValue(value.substring(0, delimiterOffset), firstCoord)) 131 return false; 132 133 float secondCoord; 134 if (!parseFloatPercentageValue(value.substring(delimiterOffset + 1, value.length() - 1), secondCoord)) 135 return false; 136 137 valuePair = FloatPoint(firstCoord, secondCoord); 138 return true; 139 } 140 141 VTTParser::VTTParser(VTTParserClient* client, Document& document) 142 : m_document(&document) 143 , m_state(Initial) 144 , m_decoder(TextResourceDecoder::create("text/plain", UTF8Encoding())) 145 , m_currentStartTime(0) 146 , m_currentEndTime(0) 147 , m_client(client) 148 { 149 } 150 151 void VTTParser::getNewCues(Vector<RefPtr<VTTCue> >& outputCues) 152 { 153 outputCues = m_cuelist; 154 m_cuelist.clear(); 155 } 156 157 void VTTParser::getNewRegions(Vector<RefPtr<VTTRegion> >& outputRegions) 158 { 159 outputRegions = m_regionList; 160 m_regionList.clear(); 161 } 162 163 void VTTParser::parseBytes(const char* data, unsigned length) 164 { 165 String textData = m_decoder->decode(data, length); 166 m_lineReader.append(textData); 167 parse(); 168 } 169 170 void VTTParser::flush() 171 { 172 String textData = m_decoder->flush(); 173 m_lineReader.append(textData); 174 m_lineReader.setEndOfStream(); 175 parse(); 176 flushPendingCue(); 177 } 178 179 void VTTParser::parse() 180 { 181 // WebVTT parser algorithm. (5.1 WebVTT file parsing.) 182 // Steps 1 - 3 - Initial setup. 183 184 String line; 185 while (m_lineReader.getLine(line)) { 186 switch (m_state) { 187 case Initial: 188 // Steps 4 - 9 - Check for a valid WebVTT signature. 189 if (!hasRequiredFileIdentifier(line)) { 190 if (m_client) 191 m_client->fileFailedToParse(); 192 return; 193 } 194 195 m_state = Header; 196 break; 197 198 case Header: 199 // Steps 10 - 14 - Allow a header (comment area) under the WEBVTT line. 200 collectMetadataHeader(line); 201 202 if (line.isEmpty()) { 203 if (m_client && m_regionList.size()) 204 m_client->newRegionsParsed(); 205 206 m_state = Id; 207 break; 208 } 209 210 // Step 15 - Break out of header loop if the line could be a timestamp line. 211 if (line.contains("-->")) 212 m_state = recoverCue(line); 213 214 // Step 16 - Line is not the empty string and does not contain "-->". 215 break; 216 217 case Id: 218 // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values. 219 if (line.isEmpty()) 220 break; 221 222 // Step 21 - Cue creation (start a new cue). 223 resetCueValues(); 224 225 // Steps 22 - 25 - Check if this line contains an optional identifier or timing data. 226 m_state = collectCueId(line); 227 break; 228 229 case TimingsAndSettings: 230 // Steps 26 - 27 - Discard current cue if the line is empty. 231 if (line.isEmpty()) { 232 m_state = Id; 233 break; 234 } 235 236 // Steps 28 - 29 - Collect cue timings and settings. 237 m_state = collectTimingsAndSettings(line); 238 break; 239 240 case CueText: 241 // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output. 242 m_state = collectCueText(line); 243 break; 244 245 case BadCue: 246 // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen. 247 m_state = ignoreBadCue(line); 248 break; 249 } 250 } 251 } 252 253 void VTTParser::flushPendingCue() 254 { 255 ASSERT(m_lineReader.isAtEndOfStream()); 256 // If we're in the CueText state when we run out of data, we emit the pending cue. 257 if (m_state == CueText) 258 createNewCue(); 259 } 260 261 bool VTTParser::hasRequiredFileIdentifier(const String& line) 262 { 263 // A WebVTT file identifier consists of an optional BOM character, 264 // the string "WEBVTT" followed by an optional space or tab character, 265 // and any number of characters that are not line terminators ... 266 if (!line.startsWith("WEBVTT", fileIdentifierLength)) 267 return false; 268 if (line.length() > fileIdentifierLength && !isASpace(line[fileIdentifierLength])) 269 return false; 270 271 return true; 272 } 273 274 void VTTParser::collectMetadataHeader(const String& line) 275 { 276 // WebVTT header parsing (WebVTT parser algorithm step 12) 277 DEFINE_STATIC_LOCAL(const AtomicString, regionHeaderName, ("Region", AtomicString::ConstructFromLiteral)); 278 279 // The only currently supported header is the "Region" header. 280 if (!RuntimeEnabledFeatures::webVTTRegionsEnabled()) 281 return; 282 283 // Step 12.4 If line contains the character ":" (A U+003A COLON), then set metadata's 284 // name to the substring of line before the first ":" character and 285 // metadata's value to the substring after this character. 286 size_t colonPosition = line.find(':'); 287 if (colonPosition == kNotFound) 288 return; 289 290 String headerName = line.substring(0, colonPosition); 291 292 // Steps 12.5 If metadata's name equals "Region": 293 if (headerName == regionHeaderName) { 294 String headerValue = line.substring(colonPosition + 1); 295 // Steps 12.5.1 - 12.5.11 Region creation: Let region be a new text track region [...] 296 createNewRegion(headerValue); 297 } 298 } 299 300 VTTParser::ParseState VTTParser::collectCueId(const String& line) 301 { 302 if (line.contains("-->")) 303 return collectTimingsAndSettings(line); 304 m_currentId = line; 305 return TimingsAndSettings; 306 } 307 308 VTTParser::ParseState VTTParser::collectTimingsAndSettings(const String& line) 309 { 310 // Collect WebVTT cue timings and settings. (5.3 WebVTT cue timings and settings parsing.) 311 // Steps 1 - 3 - Let input be the string being parsed and position be a pointer into input. 312 unsigned position = 0; 313 skipWhiteSpace(line, &position); 314 315 // Steps 4 - 5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time. 316 if (!collectTimeStamp(line, &position, m_currentStartTime)) 317 return BadCue; 318 if (position >= line.length()) 319 return BadCue; 320 321 skipWhiteSpace(line, &position); 322 323 // Steps 6 - 9 - If the next three characters are not "-->", abort and return failure. 324 if (line.find("-->", position) == kNotFound) 325 return BadCue; 326 position += 3; 327 if (position >= line.length()) 328 return BadCue; 329 330 skipWhiteSpace(line, &position); 331 332 // Steps 10 - 11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time. 333 if (!collectTimeStamp(line, &position, m_currentEndTime)) 334 return BadCue; 335 skipWhiteSpace(line, &position); 336 337 // Step 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue). 338 m_currentSettings = line.substring(position, line.length()-1); 339 return CueText; 340 } 341 342 VTTParser::ParseState VTTParser::collectCueText(const String& line) 343 { 344 // Step 34. 345 if (line.isEmpty()) { 346 createNewCue(); 347 return Id; 348 } 349 // Step 35. 350 if (line.contains("-->")) { 351 // Step 39-40. 352 createNewCue(); 353 354 // Step 41 - New iteration of the cue loop. 355 return recoverCue(line); 356 } 357 if (!m_currentContent.isEmpty()) 358 m_currentContent.append("\n"); 359 m_currentContent.append(line); 360 361 return CueText; 362 } 363 364 VTTParser::ParseState VTTParser::recoverCue(const String& line) 365 { 366 // Step 17 and 21. 367 resetCueValues(); 368 369 // Step 22. 370 return collectTimingsAndSettings(line); 371 } 372 373 VTTParser::ParseState VTTParser::ignoreBadCue(const String& line) 374 { 375 if (line.isEmpty()) 376 return Id; 377 if (line.contains("-->")) 378 return recoverCue(line); 379 return BadCue; 380 } 381 382 // A helper class for the construction of a "cue fragment" from the cue text. 383 class VTTTreeBuilder { 384 public: 385 VTTTreeBuilder(Document& document) 386 : m_document(document) { } 387 388 PassRefPtr<DocumentFragment> buildFromString(const String& cueText); 389 390 private: 391 void constructTreeFromToken(Document&); 392 393 VTTToken m_token; 394 RefPtr<ContainerNode> m_currentNode; 395 Vector<AtomicString> m_languageStack; 396 Document& m_document; 397 }; 398 399 PassRefPtr<DocumentFragment> VTTTreeBuilder::buildFromString(const String& cueText) 400 { 401 // Cue text processing based on 402 // 5.4 WebVTT cue text parsing rules, and 403 // 5.5 WebVTT cue text DOM construction rules 404 405 RefPtr<DocumentFragment> fragment = DocumentFragment::create(m_document); 406 407 if (cueText.isEmpty()) { 408 fragment->parserAppendChild(Text::create(m_document, "")); 409 return fragment; 410 } 411 412 m_currentNode = fragment; 413 414 VTTTokenizer tokenizer(cueText); 415 m_languageStack.clear(); 416 417 while (tokenizer.nextToken(m_token)) 418 constructTreeFromToken(m_document); 419 420 return fragment.release(); 421 } 422 423 PassRefPtr<DocumentFragment> VTTParser::createDocumentFragmentFromCueText(Document& document, const String& cueText) 424 { 425 VTTTreeBuilder treeBuilder(document); 426 return treeBuilder.buildFromString(cueText); 427 } 428 429 void VTTParser::createNewCue() 430 { 431 RefPtr<VTTCue> cue = VTTCue::create(*m_document, m_currentStartTime, m_currentEndTime, m_currentContent.toString()); 432 cue->setId(m_currentId); 433 cue->parseSettings(m_currentSettings); 434 435 m_cuelist.append(cue); 436 if (m_client) 437 m_client->newCuesParsed(); 438 } 439 440 void VTTParser::resetCueValues() 441 { 442 m_currentId = emptyString(); 443 m_currentSettings = emptyString(); 444 m_currentStartTime = 0; 445 m_currentEndTime = 0; 446 m_currentContent.clear(); 447 } 448 449 void VTTParser::createNewRegion(const String& headerValue) 450 { 451 if (headerValue.isEmpty()) 452 return; 453 454 // Steps 12.5.1 - 12.5.9 - Construct and initialize a WebVTT Region object. 455 RefPtr<VTTRegion> region = VTTRegion::create(); 456 region->setRegionSettings(headerValue); 457 458 // Step 12.5.10 If the text track list of regions regions contains a region 459 // with the same region identifier value as region, remove that region. 460 for (size_t i = 0; i < m_regionList.size(); ++i) { 461 if (m_regionList[i]->id() == region->id()) { 462 m_regionList.remove(i); 463 break; 464 } 465 } 466 467 // Step 12.5.11 468 m_regionList.append(region); 469 } 470 471 bool VTTParser::collectTimeStamp(const String& line, unsigned* position, double& timeStamp) 472 { 473 // Collect a WebVTT timestamp (5.3 WebVTT cue timings and settings parsing.) 474 // Steps 1 - 4 - Initial checks, let most significant units be minutes. 475 enum Mode { Minutes, Hours }; 476 Mode mode = Minutes; 477 478 // Steps 5 - 7 - Collect a sequence of characters that are 0-9. 479 // If not 2 characters or value is greater than 59, interpret as hours. 480 int value1; 481 unsigned value1Digits = collectDigitsToInt(line, position, value1); 482 if (!value1Digits) 483 return false; 484 if (value1Digits != 2 || value1 > 59) 485 mode = Hours; 486 487 // Steps 8 - 11 - Collect the next sequence of 0-9 after ':' (must be 2 chars). 488 if (*position >= line.length() || line[(*position)++] != ':') 489 return false; 490 int value2; 491 if (collectDigitsToInt(line, position, value2) != 2) 492 return false; 493 494 // Step 12 - Detect whether this timestamp includes hours. 495 int value3; 496 if (mode == Hours || (*position < line.length() && line[*position] == ':')) { 497 if (*position >= line.length() || line[(*position)++] != ':') 498 return false; 499 if (collectDigitsToInt(line, position, value3) != 2) 500 return false; 501 } else { 502 value3 = value2; 503 value2 = value1; 504 value1 = 0; 505 } 506 507 // Steps 13 - 17 - Collect next sequence of 0-9 after '.' (must be 3 chars). 508 if (*position >= line.length() || line[(*position)++] != '.') 509 return false; 510 int value4; 511 if (collectDigitsToInt(line, position, value4) != 3) 512 return false; 513 if (value2 > 59 || value3 > 59) 514 return false; 515 516 // Steps 18 - 19 - Calculate result. 517 timeStamp = (value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond); 518 return true; 519 } 520 521 static VTTNodeType tokenToNodeType(VTTToken& token) 522 { 523 switch (token.name().length()) { 524 case 1: 525 if (token.name()[0] == 'c') 526 return VTTNodeTypeClass; 527 if (token.name()[0] == 'v') 528 return VTTNodeTypeVoice; 529 if (token.name()[0] == 'b') 530 return VTTNodeTypeBold; 531 if (token.name()[0] == 'i') 532 return VTTNodeTypeItalic; 533 if (token.name()[0] == 'u') 534 return VTTNodeTypeUnderline; 535 break; 536 case 2: 537 if (token.name()[0] == 'r' && token.name()[1] == 't') 538 return VTTNodeTypeRubyText; 539 break; 540 case 4: 541 if (token.name()[0] == 'r' && token.name()[1] == 'u' && token.name()[2] == 'b' && token.name()[3] == 'y') 542 return VTTNodeTypeRuby; 543 if (token.name()[0] == 'l' && token.name()[1] == 'a' && token.name()[2] == 'n' && token.name()[3] == 'g') 544 return VTTNodeTypeLanguage; 545 break; 546 } 547 return VTTNodeTypeNone; 548 } 549 550 void VTTTreeBuilder::constructTreeFromToken(Document& document) 551 { 552 // http://dev.w3.org/html5/webvtt/#webvtt-cue-text-dom-construction-rules 553 554 switch (m_token.type()) { 555 case VTTTokenTypes::Character: { 556 RefPtr<Text> child = Text::create(document, m_token.characters()); 557 m_currentNode->parserAppendChild(child); 558 break; 559 } 560 case VTTTokenTypes::StartTag: { 561 VTTNodeType nodeType = tokenToNodeType(m_token); 562 if (nodeType == VTTNodeTypeNone) 563 break; 564 565 VTTNodeType currentType = m_currentNode->isVTTElement() ? toVTTElement(m_currentNode.get())->webVTTNodeType() : VTTNodeTypeNone; 566 // <rt> is only allowed if the current node is <ruby>. 567 if (nodeType == VTTNodeTypeRubyText && currentType != VTTNodeTypeRuby) 568 break; 569 570 RefPtr<VTTElement> child = VTTElement::create(nodeType, &document); 571 if (!m_token.classes().isEmpty()) 572 child->setAttribute(classAttr, m_token.classes()); 573 574 if (nodeType == VTTNodeTypeVoice) { 575 child->setAttribute(VTTElement::voiceAttributeName(), m_token.annotation()); 576 } else if (nodeType == VTTNodeTypeLanguage) { 577 m_languageStack.append(m_token.annotation()); 578 child->setAttribute(VTTElement::langAttributeName(), m_languageStack.last()); 579 } 580 if (!m_languageStack.isEmpty()) 581 child->setLanguage(m_languageStack.last()); 582 m_currentNode->parserAppendChild(child); 583 m_currentNode = child; 584 break; 585 } 586 case VTTTokenTypes::EndTag: { 587 VTTNodeType nodeType = tokenToNodeType(m_token); 588 if (nodeType == VTTNodeTypeNone) 589 break; 590 591 // The only non-VTTElement would be the DocumentFragment root. (Text 592 // nodes and PIs will never appear as m_currentNode.) 593 if (!m_currentNode->isVTTElement()) 594 break; 595 596 VTTNodeType currentType = toVTTElement(m_currentNode.get())->webVTTNodeType(); 597 bool matchesCurrent = nodeType == currentType; 598 if (!matchesCurrent) { 599 // </ruby> auto-closes <rt>. 600 if (currentType == VTTNodeTypeRubyText && nodeType == VTTNodeTypeRuby) { 601 if (m_currentNode->parentNode()) 602 m_currentNode = m_currentNode->parentNode(); 603 } else { 604 break; 605 } 606 } 607 if (nodeType == VTTNodeTypeLanguage) 608 m_languageStack.removeLast(); 609 if (m_currentNode->parentNode()) 610 m_currentNode = m_currentNode->parentNode(); 611 break; 612 } 613 case VTTTokenTypes::TimestampTag: { 614 unsigned position = 0; 615 String charactersString = m_token.characters(); 616 double parsedTimeStamp; 617 if (VTTParser::collectTimeStamp(charactersString, &position, parsedTimeStamp)) 618 m_currentNode->parserAppendChild(ProcessingInstruction::create(document, "timestamp", charactersString)); 619 break; 620 } 621 default: 622 break; 623 } 624 } 625 626 } 627 628