Home | History | Annotate | Download | only in src
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_DATEPARSER_INL_H_
      6 #define V8_DATEPARSER_INL_H_
      7 
      8 #include "src/dateparser.h"
      9 
     10 namespace v8 {
     11 namespace internal {
     12 
     13 template <typename Char>
     14 bool DateParser::Parse(Vector<Char> str,
     15                        FixedArray* out,
     16                        UnicodeCache* unicode_cache) {
     17   ASSERT(out->length() >= OUTPUT_SIZE);
     18   InputReader<Char> in(unicode_cache, str);
     19   DateStringTokenizer<Char> scanner(&in);
     20   TimeZoneComposer tz;
     21   TimeComposer time;
     22   DayComposer day;
     23 
     24   // Specification:
     25   // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible
     26   // with Safari.
     27   // ES5 ISO 8601 dates:
     28   //   [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]]
     29   //   where yyyy is in the range 0000..9999 and
     30   //         +/-yyyyyy is in the range -999999..+999999 -
     31   //           but -000000 is invalid (year zero must be positive),
     32   //         MM is in the range 01..12,
     33   //         DD is in the range 01..31,
     34   //         MM and DD defaults to 01 if missing,,
     35   //         HH is generally in the range 00..23, but can be 24 if mm, ss
     36   //           and sss are zero (or missing), representing midnight at the
     37   //           end of a day,
     38   //         mm and ss are in the range 00..59,
     39   //         sss is in the range 000..999,
     40   //         hh is in the range 00..23,
     41   //         mm, ss, and sss default to 00 if missing, and
     42   //         timezone defaults to Z if missing
     43   //           (following Safari, ISO actually demands local time).
     44   //  Extensions:
     45   //   We also allow sss to have more or less than three digits (but at
     46   //   least one).
     47   //   We allow hh:mm to be specified as hhmm.
     48   // Legacy dates:
     49   //  Any unrecognized word before the first number is ignored.
     50   //  Parenthesized text is ignored.
     51   //  An unsigned number followed by ':' is a time value, and is
     52   //  added to the TimeComposer. A number followed by '::' adds a second
     53   //  zero as well. A number followed by '.' is also a time and must be
     54   //  followed by milliseconds.
     55   //  Any other number is a date component and is added to DayComposer.
     56   //  A month name (or really: any word having the same first three letters
     57   //  as a month name) is recorded as a named month in the Day composer.
     58   //  A word recognizable as a time-zone is recorded as such, as is
     59   //  '(+|-)(hhmm|hh:)'.
     60   //  Legacy dates don't allow extra signs ('+' or '-') or umatched ')'
     61   //  after a number has been read (before the first number, any garbage
     62   //  is allowed).
     63   // Intersection of the two:
     64   //  A string that matches both formats (e.g. 1970-01-01) will be
     65   //  parsed as an ES5 date-time string - which means it will default
     66   //  to UTC time-zone. That's unavoidable if following the ES5
     67   //  specification.
     68   //  After a valid "T" has been read while scanning an ES5 datetime string,
     69   //  the input can no longer be a valid legacy date, since the "T" is a
     70   //  garbage string after a number has been read.
     71 
     72   // First try getting as far as possible with as ES5 Date Time String.
     73   DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz);
     74   if (next_unhandled_token.IsInvalid()) return false;
     75   bool has_read_number = !day.IsEmpty();
     76   // If there's anything left, continue with the legacy parser.
     77   for (DateToken token = next_unhandled_token;
     78        !token.IsEndOfInput();
     79        token = scanner.Next()) {
     80     if (token.IsNumber()) {
     81       has_read_number = true;
     82       int n = token.number();
     83       if (scanner.SkipSymbol(':')) {
     84         if (scanner.SkipSymbol(':')) {
     85           // n + "::"
     86           if (!time.IsEmpty()) return false;
     87           time.Add(n);
     88           time.Add(0);
     89         } else {
     90           // n + ":"
     91           if (!time.Add(n)) return false;
     92           if (scanner.Peek().IsSymbol('.')) scanner.Next();
     93         }
     94       } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) {
     95         time.Add(n);
     96         if (!scanner.Peek().IsNumber()) return false;
     97         int n = ReadMilliseconds(scanner.Next());
     98         if (n < 0) return false;
     99         time.AddFinal(n);
    100       } else if (tz.IsExpecting(n)) {
    101         tz.SetAbsoluteMinute(n);
    102       } else if (time.IsExpecting(n)) {
    103         time.AddFinal(n);
    104         // Require end, white space, "Z", "+" or "-" immediately after
    105         // finalizing time.
    106         DateToken peek = scanner.Peek();
    107         if (!peek.IsEndOfInput() &&
    108             !peek.IsWhiteSpace() &&
    109             !peek.IsKeywordZ() &&
    110             !peek.IsAsciiSign()) return false;
    111       } else {
    112         if (!day.Add(n)) return false;
    113         scanner.SkipSymbol('-');
    114       }
    115     } else if (token.IsKeyword()) {
    116       // Parse a "word" (sequence of chars. >= 'A').
    117       KeywordType type = token.keyword_type();
    118       int value = token.keyword_value();
    119       if (type == AM_PM && !time.IsEmpty()) {
    120         time.SetHourOffset(value);
    121       } else if (type == MONTH_NAME) {
    122         day.SetNamedMonth(value);
    123         scanner.SkipSymbol('-');
    124       } else if (type == TIME_ZONE_NAME && has_read_number) {
    125         tz.Set(value);
    126       } else {
    127         // Garbage words are illegal if a number has been read.
    128         if (has_read_number) return false;
    129         // The first number has to be separated from garbage words by
    130         // whitespace or other separators.
    131         if (scanner.Peek().IsNumber()) return false;
    132       }
    133     } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
    134       // Parse UTC offset (only after UTC or time).
    135       tz.SetSign(token.ascii_sign());
    136       // The following number may be empty.
    137       int n = 0;
    138       if (scanner.Peek().IsNumber()) {
    139         n = scanner.Next().number();
    140       }
    141       has_read_number = true;
    142 
    143       if (scanner.Peek().IsSymbol(':')) {
    144         tz.SetAbsoluteHour(n);
    145         tz.SetAbsoluteMinute(kNone);
    146       } else {
    147         tz.SetAbsoluteHour(n / 100);
    148         tz.SetAbsoluteMinute(n % 100);
    149       }
    150     } else if ((token.IsAsciiSign() || token.IsSymbol(')')) &&
    151                has_read_number) {
    152       // Extra sign or ')' is illegal if a number has been read.
    153       return false;
    154     } else {
    155       // Ignore other characters and whitespace.
    156     }
    157   }
    158 
    159   return day.Write(out) && time.Write(out) && tz.Write(out);
    160 }
    161 
    162 
    163 template<typename CharType>
    164 DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() {
    165   int pre_pos = in_->position();
    166   if (in_->IsEnd()) return DateToken::EndOfInput();
    167   if (in_->IsAsciiDigit()) {
    168     int n = in_->ReadUnsignedNumeral();
    169     int length = in_->position() - pre_pos;
    170     return DateToken::Number(n, length);
    171   }
    172   if (in_->Skip(':')) return DateToken::Symbol(':');
    173   if (in_->Skip('-')) return DateToken::Symbol('-');
    174   if (in_->Skip('+')) return DateToken::Symbol('+');
    175   if (in_->Skip('.')) return DateToken::Symbol('.');
    176   if (in_->Skip(')')) return DateToken::Symbol(')');
    177   if (in_->IsAsciiAlphaOrAbove()) {
    178     ASSERT(KeywordTable::kPrefixLength == 3);
    179     uint32_t buffer[3] = {0, 0, 0};
    180     int length = in_->ReadWord(buffer, 3);
    181     int index = KeywordTable::Lookup(buffer, length);
    182     return DateToken::Keyword(KeywordTable::GetType(index),
    183                               KeywordTable::GetValue(index),
    184                               length);
    185   }
    186   if (in_->SkipWhiteSpace()) {
    187     return DateToken::WhiteSpace(in_->position() - pre_pos);
    188   }
    189   if (in_->SkipParentheses()) {
    190     return DateToken::Unknown();
    191   }
    192   in_->Next();
    193   return DateToken::Unknown();
    194 }
    195 
    196 
    197 template <typename Char>
    198 DateParser::DateToken DateParser::ParseES5DateTime(
    199     DateStringTokenizer<Char>* scanner,
    200     DayComposer* day,
    201     TimeComposer* time,
    202     TimeZoneComposer* tz) {
    203   ASSERT(day->IsEmpty());
    204   ASSERT(time->IsEmpty());
    205   ASSERT(tz->IsEmpty());
    206 
    207   // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]]
    208   if (scanner->Peek().IsAsciiSign()) {
    209     // Keep the sign token, so we can pass it back to the legacy
    210     // parser if we don't use it.
    211     DateToken sign_token = scanner->Next();
    212     if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token;
    213     int sign = sign_token.ascii_sign();
    214     int year = scanner->Next().number();
    215     if (sign < 0 && year == 0) return sign_token;
    216     day->Add(sign * year);
    217   } else if (scanner->Peek().IsFixedLengthNumber(4)) {
    218     day->Add(scanner->Next().number());
    219   } else {
    220     return scanner->Next();
    221   }
    222   if (scanner->SkipSymbol('-')) {
    223     if (!scanner->Peek().IsFixedLengthNumber(2) ||
    224         !DayComposer::IsMonth(scanner->Peek().number())) return scanner->Next();
    225     day->Add(scanner->Next().number());
    226     if (scanner->SkipSymbol('-')) {
    227       if (!scanner->Peek().IsFixedLengthNumber(2) ||
    228           !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next();
    229       day->Add(scanner->Next().number());
    230     }
    231   }
    232   // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z
    233   if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) {
    234     if (!scanner->Peek().IsEndOfInput()) return scanner->Next();
    235   } else {
    236     // ES5 Date Time String time part is present.
    237     scanner->Next();
    238     if (!scanner->Peek().IsFixedLengthNumber(2) ||
    239         !Between(scanner->Peek().number(), 0, 24)) {
    240       return DateToken::Invalid();
    241     }
    242     // Allow 24:00[:00[.000]], but no other time starting with 24.
    243     bool hour_is_24 = (scanner->Peek().number() == 24);
    244     time->Add(scanner->Next().number());
    245     if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
    246     if (!scanner->Peek().IsFixedLengthNumber(2) ||
    247         !TimeComposer::IsMinute(scanner->Peek().number()) ||
    248         (hour_is_24 && scanner->Peek().number() > 0)) {
    249       return DateToken::Invalid();
    250     }
    251     time->Add(scanner->Next().number());
    252     if (scanner->SkipSymbol(':')) {
    253       if (!scanner->Peek().IsFixedLengthNumber(2) ||
    254           !TimeComposer::IsSecond(scanner->Peek().number()) ||
    255           (hour_is_24 && scanner->Peek().number() > 0)) {
    256         return DateToken::Invalid();
    257       }
    258       time->Add(scanner->Next().number());
    259       if (scanner->SkipSymbol('.')) {
    260         if (!scanner->Peek().IsNumber() ||
    261             (hour_is_24 && scanner->Peek().number() > 0)) {
    262           return DateToken::Invalid();
    263         }
    264         // Allow more or less than the mandated three digits.
    265         time->Add(ReadMilliseconds(scanner->Next()));
    266       }
    267     }
    268     // Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm
    269     if (scanner->Peek().IsKeywordZ()) {
    270       scanner->Next();
    271       tz->Set(0);
    272     } else if (scanner->Peek().IsSymbol('+') ||
    273                scanner->Peek().IsSymbol('-')) {
    274       tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1);
    275       if (scanner->Peek().IsFixedLengthNumber(4)) {
    276         // hhmm extension syntax.
    277         int hourmin = scanner->Next().number();
    278         int hour = hourmin / 100;
    279         int min = hourmin % 100;
    280         if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) {
    281           return DateToken::Invalid();
    282         }
    283         tz->SetAbsoluteHour(hour);
    284         tz->SetAbsoluteMinute(min);
    285       } else {
    286         // hh:mm standard syntax.
    287         if (!scanner->Peek().IsFixedLengthNumber(2) ||
    288             !TimeComposer::IsHour(scanner->Peek().number())) {
    289           return DateToken::Invalid();
    290         }
    291         tz->SetAbsoluteHour(scanner->Next().number());
    292         if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
    293         if (!scanner->Peek().IsFixedLengthNumber(2) ||
    294             !TimeComposer::IsMinute(scanner->Peek().number())) {
    295           return DateToken::Invalid();
    296         }
    297         tz->SetAbsoluteMinute(scanner->Next().number());
    298       }
    299     }
    300     if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid();
    301   }
    302   // Successfully parsed ES5 Date Time String. Default to UTC if no TZ given.
    303   if (tz->IsEmpty()) tz->Set(0);
    304   day->set_iso_date();
    305   return DateToken::EndOfInput();
    306 }
    307 
    308 
    309 } }  // namespace v8::internal
    310 
    311 #endif  // V8_DATEPARSER_INL_H_
    312