Home | History | Annotate | Download | only in src
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_DATEPARSER_H_
      6 #define V8_DATEPARSER_H_
      7 
      8 #include "src/allocation.h"
      9 #include "src/char-predicates-inl.h"
     10 
     11 namespace v8 {
     12 namespace internal {
     13 
     14 class DateParser : public AllStatic {
     15  public:
     16   // Parse the string as a date. If parsing succeeds, return true after
     17   // filling out the output array as follows (all integers are Smis):
     18   // [0]: year
     19   // [1]: month (0 = Jan, 1 = Feb, ...)
     20   // [2]: day
     21   // [3]: hour
     22   // [4]: minute
     23   // [5]: second
     24   // [6]: millisecond
     25   // [7]: UTC offset in seconds, or null value if no timezone specified
     26   // If parsing fails, return false (content of output array is not defined).
     27   template <typename Char>
     28   static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
     29 
     30   enum {
     31     YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
     32   };
     33 
     34  private:
     35   // Range testing
     36   static inline bool Between(int x, int lo, int hi) {
     37     return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
     38   }
     39 
     40   // Indicates a missing value.
     41   static const int kNone = kMaxInt;
     42 
     43   // Maximal number of digits used to build the value of a numeral.
     44   // Remaining digits are ignored.
     45   static const int kMaxSignificantDigits = 9;
     46 
     47   // InputReader provides basic string parsing and character classification.
     48   template <typename Char>
     49   class InputReader BASE_EMBEDDED {
     50    public:
     51     InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
     52         : index_(0),
     53           buffer_(s),
     54           unicode_cache_(unicode_cache) {
     55       Next();
     56     }
     57 
     58     int position() { return index_; }
     59 
     60     // Advance to the next character of the string.
     61     void Next() {
     62       ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
     63       index_++;
     64     }
     65 
     66     // Read a string of digits as an unsigned number. Cap value at
     67     // kMaxSignificantDigits, but skip remaining digits if the numeral
     68     // is longer.
     69     int ReadUnsignedNumeral() {
     70       int n = 0;
     71       int i = 0;
     72       while (IsAsciiDigit()) {
     73         if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
     74         i++;
     75         Next();
     76       }
     77       return n;
     78     }
     79 
     80     // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
     81     // lower-case prefix, and pad any remainder of the buffer with zeroes.
     82     // Return word length.
     83     int ReadWord(uint32_t* prefix, int prefix_size) {
     84       int len;
     85       for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
     86         if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
     87       }
     88       for (int i = len; i < prefix_size; i++) prefix[i] = 0;
     89       return len;
     90     }
     91 
     92     // The skip methods return whether they actually skipped something.
     93     bool Skip(uint32_t c) {
     94       if (ch_ == c) {
     95         Next();
     96         return true;
     97       }
     98       return false;
     99     }
    100 
    101     bool SkipWhiteSpace() {
    102       if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
    103         Next();
    104         return true;
    105       }
    106       return false;
    107     }
    108 
    109     bool SkipParentheses() {
    110       if (ch_ != '(') return false;
    111       int balance = 0;
    112       do {
    113         if (ch_ == ')') --balance;
    114         else if (ch_ == '(') ++balance;
    115         Next();
    116       } while (balance > 0 && ch_);
    117       return true;
    118     }
    119 
    120     // Character testing/classification. Non-ASCII digits are not supported.
    121     bool Is(uint32_t c) const { return ch_ == c; }
    122     bool IsEnd() const { return ch_ == 0; }
    123     bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
    124     bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
    125     bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
    126 
    127     // Return 1 for '+' and -1 for '-'.
    128     int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
    129 
    130    private:
    131     int index_;
    132     Vector<Char> buffer_;
    133     uint32_t ch_;
    134     UnicodeCache* unicode_cache_;
    135   };
    136 
    137   enum KeywordType {
    138       INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
    139   };
    140 
    141   struct DateToken {
    142    public:
    143     bool IsInvalid() { return tag_ == kInvalidTokenTag; }
    144     bool IsUnknown() { return tag_ == kUnknownTokenTag; }
    145     bool IsNumber() { return tag_ == kNumberTag; }
    146     bool IsSymbol() { return tag_ == kSymbolTag; }
    147     bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
    148     bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
    149     bool IsKeyword() { return tag_ >= kKeywordTagStart; }
    150 
    151     int length() { return length_; }
    152 
    153     int number() {
    154       DCHECK(IsNumber());
    155       return value_;
    156     }
    157     KeywordType keyword_type() {
    158       DCHECK(IsKeyword());
    159       return static_cast<KeywordType>(tag_);
    160     }
    161     int keyword_value() {
    162       DCHECK(IsKeyword());
    163       return value_;
    164     }
    165     char symbol() {
    166       DCHECK(IsSymbol());
    167       return static_cast<char>(value_);
    168     }
    169     bool IsSymbol(char symbol) {
    170       return IsSymbol() && this->symbol() == symbol;
    171     }
    172     bool IsKeywordType(KeywordType tag) {
    173       return tag_ == tag;
    174     }
    175     bool IsFixedLengthNumber(int length) {
    176       return IsNumber() && length_ == length;
    177     }
    178     bool IsAsciiSign() {
    179       return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
    180     }
    181     int ascii_sign() {
    182       DCHECK(IsAsciiSign());
    183       return 44 - value_;
    184     }
    185     bool IsKeywordZ() {
    186       return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
    187     }
    188     bool IsUnknown(int character) {
    189       return IsUnknown() && value_ == character;
    190     }
    191     // Factory functions.
    192     static DateToken Keyword(KeywordType tag, int value, int length) {
    193       return DateToken(tag, length, value);
    194     }
    195     static DateToken Number(int value, int length) {
    196       return DateToken(kNumberTag, length, value);
    197     }
    198     static DateToken Symbol(char symbol) {
    199       return DateToken(kSymbolTag, 1, symbol);
    200     }
    201     static DateToken EndOfInput() {
    202       return DateToken(kEndOfInputTag, 0, -1);
    203     }
    204     static DateToken WhiteSpace(int length) {
    205       return DateToken(kWhiteSpaceTag, length, -1);
    206     }
    207     static DateToken Unknown() {
    208       return DateToken(kUnknownTokenTag, 1, -1);
    209     }
    210     static DateToken Invalid() {
    211       return DateToken(kInvalidTokenTag, 0, -1);
    212     }
    213 
    214    private:
    215     enum TagType {
    216       kInvalidTokenTag = -6,
    217       kUnknownTokenTag = -5,
    218       kWhiteSpaceTag = -4,
    219       kNumberTag = -3,
    220       kSymbolTag = -2,
    221       kEndOfInputTag = -1,
    222       kKeywordTagStart = 0
    223     };
    224     DateToken(int tag, int length, int value)
    225         : tag_(tag),
    226           length_(length),
    227           value_(value) { }
    228 
    229     int tag_;
    230     int length_;  // Number of characters.
    231     int value_;
    232   };
    233 
    234   template <typename Char>
    235   class DateStringTokenizer {
    236    public:
    237     explicit DateStringTokenizer(InputReader<Char>* in)
    238         : in_(in), next_(Scan()) { }
    239     DateToken Next() {
    240       DateToken result = next_;
    241       next_ = Scan();
    242       return result;
    243     }
    244 
    245     DateToken Peek() {
    246       return next_;
    247     }
    248     bool SkipSymbol(char symbol) {
    249       if (next_.IsSymbol(symbol)) {
    250         next_ = Scan();
    251         return true;
    252       }
    253       return false;
    254     }
    255 
    256    private:
    257     DateToken Scan();
    258 
    259     InputReader<Char>* in_;
    260     DateToken next_;
    261   };
    262 
    263   static int ReadMilliseconds(DateToken number);
    264 
    265   // KeywordTable maps names of months, time zones, am/pm to numbers.
    266   class KeywordTable : public AllStatic {
    267    public:
    268     // Look up a word in the keyword table and return an index.
    269     // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
    270     // and 'len' is the word length.
    271     static int Lookup(const uint32_t* pre, int len);
    272     // Get the type of the keyword at index i.
    273     static KeywordType GetType(int i) {
    274       return static_cast<KeywordType>(array[i][kTypeOffset]);
    275     }
    276     // Get the value of the keyword at index i.
    277     static int GetValue(int i) { return array[i][kValueOffset]; }
    278 
    279     static const int kPrefixLength = 3;
    280     static const int kTypeOffset = kPrefixLength;
    281     static const int kValueOffset = kTypeOffset + 1;
    282     static const int kEntrySize = kValueOffset + 1;
    283     static const int8_t array[][kEntrySize];
    284   };
    285 
    286   class TimeZoneComposer BASE_EMBEDDED {
    287    public:
    288     TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
    289     void Set(int offset_in_hours) {
    290       sign_ = offset_in_hours < 0 ? -1 : 1;
    291       hour_ = offset_in_hours * sign_;
    292       minute_ = 0;
    293     }
    294     void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
    295     void SetAbsoluteHour(int hour) { hour_ = hour; }
    296     void SetAbsoluteMinute(int minute) { minute_ = minute; }
    297     bool IsExpecting(int n) const {
    298       return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
    299     }
    300     bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
    301     bool Write(FixedArray* output);
    302     bool IsEmpty() { return hour_ == kNone; }
    303    private:
    304     int sign_;
    305     int hour_;
    306     int minute_;
    307   };
    308 
    309   class TimeComposer BASE_EMBEDDED {
    310    public:
    311     TimeComposer() : index_(0), hour_offset_(kNone) {}
    312     bool IsEmpty() const { return index_ == 0; }
    313     bool IsExpecting(int n) const {
    314       return (index_ == 1 && IsMinute(n)) ||
    315              (index_ == 2 && IsSecond(n)) ||
    316              (index_ == 3 && IsMillisecond(n));
    317     }
    318     bool Add(int n) {
    319       return index_ < kSize ? (comp_[index_++] = n, true) : false;
    320     }
    321     bool AddFinal(int n) {
    322       if (!Add(n)) return false;
    323       while (index_ < kSize) comp_[index_++] = 0;
    324       return true;
    325     }
    326     void SetHourOffset(int n) { hour_offset_ = n; }
    327     bool Write(FixedArray* output);
    328 
    329     static bool IsMinute(int x) { return Between(x, 0, 59); }
    330     static bool IsHour(int x) { return Between(x, 0, 23); }
    331     static bool IsSecond(int x) { return Between(x, 0, 59); }
    332 
    333    private:
    334     static bool IsHour12(int x) { return Between(x, 0, 12); }
    335     static bool IsMillisecond(int x) { return Between(x, 0, 999); }
    336 
    337     static const int kSize = 4;
    338     int comp_[kSize];
    339     int index_;
    340     int hour_offset_;
    341   };
    342 
    343   class DayComposer BASE_EMBEDDED {
    344    public:
    345     DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
    346     bool IsEmpty() const { return index_ == 0; }
    347     bool Add(int n) {
    348       if (index_ < kSize) {
    349         comp_[index_] = n;
    350         index_++;
    351         return true;
    352       }
    353       return false;
    354     }
    355     void SetNamedMonth(int n) { named_month_ = n; }
    356     bool Write(FixedArray* output);
    357     void set_iso_date() { is_iso_date_ = true; }
    358     static bool IsMonth(int x) { return Between(x, 1, 12); }
    359     static bool IsDay(int x) { return Between(x, 1, 31); }
    360 
    361    private:
    362     static const int kSize = 3;
    363     int comp_[kSize];
    364     int index_;
    365     int named_month_;
    366     // If set, ensures that data is always parsed in year-month-date order.
    367     bool is_iso_date_;
    368   };
    369 
    370   // Tries to parse an ES5 Date Time String. Returns the next token
    371   // to continue with in the legacy date string parser. If parsing is
    372   // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
    373   // returns DateToken::Invalid(). Otherwise parsing continues in the
    374   // legacy parser.
    375   template <typename Char>
    376   static DateParser::DateToken ParseES5DateTime(
    377       DateStringTokenizer<Char>* scanner,
    378       DayComposer* day,
    379       TimeComposer* time,
    380       TimeZoneComposer* tz);
    381 };
    382 
    383 
    384 } }  // namespace v8::internal
    385 
    386 #endif  // V8_DATEPARSER_H_
    387