Home | History | Annotate | Download | only in src
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_DATEPARSER_H_
      6 #define V8_DATEPARSER_H_
      7 
      8 #include "src/allocation.h"
      9 #include "src/char-predicates.h"
     10 #include "src/unicode-cache.h"
     11 
     12 namespace v8 {
     13 namespace internal {
     14 
     15 class DateParser : public AllStatic {
     16  public:
     17   // Parse the string as a date. If parsing succeeds, return true after
     18   // filling out the output array as follows (all integers are Smis):
     19   // [0]: year
     20   // [1]: month (0 = Jan, 1 = Feb, ...)
     21   // [2]: day
     22   // [3]: hour
     23   // [4]: minute
     24   // [5]: second
     25   // [6]: millisecond
     26   // [7]: UTC offset in seconds, or null value if no timezone specified
     27   // If parsing fails, return false (content of output array is not defined).
     28   template <typename Char>
     29   static bool Parse(Isolate* isolate, Vector<Char> str, FixedArray* output);
     30 
     31   enum {
     32     YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
     33   };
     34 
     35  private:
     36   // Range testing
     37   static inline bool Between(int x, int lo, int hi) {
     38     return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
     39   }
     40 
     41   // Indicates a missing value.
     42   static const int kNone = kMaxInt;
     43 
     44   // Maximal number of digits used to build the value of a numeral.
     45   // Remaining digits are ignored.
     46   static const int kMaxSignificantDigits = 9;
     47 
     48   // InputReader provides basic string parsing and character classification.
     49   template <typename Char>
     50   class InputReader BASE_EMBEDDED {
     51    public:
     52     InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
     53         : index_(0),
     54           buffer_(s),
     55           unicode_cache_(unicode_cache) {
     56       Next();
     57     }
     58 
     59     int position() { return index_; }
     60 
     61     // Advance to the next character of the string.
     62     void Next() {
     63       ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
     64       index_++;
     65     }
     66 
     67     // Read a string of digits as an unsigned number. Cap value at
     68     // kMaxSignificantDigits, but skip remaining digits if the numeral
     69     // is longer.
     70     int ReadUnsignedNumeral() {
     71       int n = 0;
     72       int i = 0;
     73       while (IsAsciiDigit()) {
     74         if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
     75         i++;
     76         Next();
     77       }
     78       return n;
     79     }
     80 
     81     // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
     82     // lower-case prefix, and pad any remainder of the buffer with zeroes.
     83     // Return word length.
     84     int ReadWord(uint32_t* prefix, int prefix_size) {
     85       int len;
     86       for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
     87         if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
     88       }
     89       for (int i = len; i < prefix_size; i++) prefix[i] = 0;
     90       return len;
     91     }
     92 
     93     // The skip methods return whether they actually skipped something.
     94     bool Skip(uint32_t c) {
     95       if (ch_ == c) {
     96         Next();
     97         return true;
     98       }
     99       return false;
    100     }
    101 
    102     inline bool SkipWhiteSpace();
    103     inline bool SkipParentheses();
    104 
    105     // Character testing/classification. Non-ASCII digits are not supported.
    106     bool Is(uint32_t c) const { return ch_ == c; }
    107     bool IsEnd() const { return ch_ == 0; }
    108     bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
    109     bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
    110     bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
    111 
    112     // Return 1 for '+' and -1 for '-'.
    113     int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
    114 
    115    private:
    116     int index_;
    117     Vector<Char> buffer_;
    118     uint32_t ch_;
    119     UnicodeCache* unicode_cache_;
    120   };
    121 
    122   enum KeywordType {
    123       INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
    124   };
    125 
    126   struct DateToken {
    127    public:
    128     bool IsInvalid() { return tag_ == kInvalidTokenTag; }
    129     bool IsUnknown() { return tag_ == kUnknownTokenTag; }
    130     bool IsNumber() { return tag_ == kNumberTag; }
    131     bool IsSymbol() { return tag_ == kSymbolTag; }
    132     bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
    133     bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
    134     bool IsKeyword() { return tag_ >= kKeywordTagStart; }
    135 
    136     int length() { return length_; }
    137 
    138     int number() {
    139       DCHECK(IsNumber());
    140       return value_;
    141     }
    142     KeywordType keyword_type() {
    143       DCHECK(IsKeyword());
    144       return static_cast<KeywordType>(tag_);
    145     }
    146     int keyword_value() {
    147       DCHECK(IsKeyword());
    148       return value_;
    149     }
    150     char symbol() {
    151       DCHECK(IsSymbol());
    152       return static_cast<char>(value_);
    153     }
    154     bool IsSymbol(char symbol) {
    155       return IsSymbol() && this->symbol() == symbol;
    156     }
    157     bool IsKeywordType(KeywordType tag) {
    158       return tag_ == tag;
    159     }
    160     bool IsFixedLengthNumber(int length) {
    161       return IsNumber() && length_ == length;
    162     }
    163     bool IsAsciiSign() {
    164       return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
    165     }
    166     int ascii_sign() {
    167       DCHECK(IsAsciiSign());
    168       return 44 - value_;
    169     }
    170     bool IsKeywordZ() {
    171       return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
    172     }
    173     bool IsUnknown(int character) {
    174       return IsUnknown() && value_ == character;
    175     }
    176     // Factory functions.
    177     static DateToken Keyword(KeywordType tag, int value, int length) {
    178       return DateToken(tag, length, value);
    179     }
    180     static DateToken Number(int value, int length) {
    181       return DateToken(kNumberTag, length, value);
    182     }
    183     static DateToken Symbol(char symbol) {
    184       return DateToken(kSymbolTag, 1, symbol);
    185     }
    186     static DateToken EndOfInput() {
    187       return DateToken(kEndOfInputTag, 0, -1);
    188     }
    189     static DateToken WhiteSpace(int length) {
    190       return DateToken(kWhiteSpaceTag, length, -1);
    191     }
    192     static DateToken Unknown() {
    193       return DateToken(kUnknownTokenTag, 1, -1);
    194     }
    195     static DateToken Invalid() {
    196       return DateToken(kInvalidTokenTag, 0, -1);
    197     }
    198 
    199    private:
    200     enum TagType {
    201       kInvalidTokenTag = -6,
    202       kUnknownTokenTag = -5,
    203       kWhiteSpaceTag = -4,
    204       kNumberTag = -3,
    205       kSymbolTag = -2,
    206       kEndOfInputTag = -1,
    207       kKeywordTagStart = 0
    208     };
    209     DateToken(int tag, int length, int value)
    210         : tag_(tag),
    211           length_(length),
    212           value_(value) { }
    213 
    214     int tag_;
    215     int length_;  // Number of characters.
    216     int value_;
    217   };
    218 
    219   template <typename Char>
    220   class DateStringTokenizer {
    221    public:
    222     explicit DateStringTokenizer(InputReader<Char>* in)
    223         : in_(in), next_(Scan()) { }
    224     DateToken Next() {
    225       DateToken result = next_;
    226       next_ = Scan();
    227       return result;
    228     }
    229 
    230     DateToken Peek() {
    231       return next_;
    232     }
    233     bool SkipSymbol(char symbol) {
    234       if (next_.IsSymbol(symbol)) {
    235         next_ = Scan();
    236         return true;
    237       }
    238       return false;
    239     }
    240 
    241    private:
    242     DateToken Scan();
    243 
    244     InputReader<Char>* in_;
    245     DateToken next_;
    246   };
    247 
    248   static int ReadMilliseconds(DateToken number);
    249 
    250   // KeywordTable maps names of months, time zones, am/pm to numbers.
    251   class KeywordTable : public AllStatic {
    252    public:
    253     // Look up a word in the keyword table and return an index.
    254     // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
    255     // and 'len' is the word length.
    256     static int Lookup(const uint32_t* pre, int len);
    257     // Get the type of the keyword at index i.
    258     static KeywordType GetType(int i) {
    259       return static_cast<KeywordType>(array[i][kTypeOffset]);
    260     }
    261     // Get the value of the keyword at index i.
    262     static int GetValue(int i) { return array[i][kValueOffset]; }
    263 
    264     static const int kPrefixLength = 3;
    265     static const int kTypeOffset = kPrefixLength;
    266     static const int kValueOffset = kTypeOffset + 1;
    267     static const int kEntrySize = kValueOffset + 1;
    268     static const int8_t array[][kEntrySize];
    269   };
    270 
    271   class TimeZoneComposer BASE_EMBEDDED {
    272    public:
    273     TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
    274     void Set(int offset_in_hours) {
    275       sign_ = offset_in_hours < 0 ? -1 : 1;
    276       hour_ = offset_in_hours * sign_;
    277       minute_ = 0;
    278     }
    279     void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
    280     void SetAbsoluteHour(int hour) { hour_ = hour; }
    281     void SetAbsoluteMinute(int minute) { minute_ = minute; }
    282     bool IsExpecting(int n) const {
    283       return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
    284     }
    285     bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
    286     bool Write(FixedArray* output);
    287     bool IsEmpty() { return hour_ == kNone; }
    288    private:
    289     int sign_;
    290     int hour_;
    291     int minute_;
    292   };
    293 
    294   class TimeComposer BASE_EMBEDDED {
    295    public:
    296     TimeComposer() : index_(0), hour_offset_(kNone) {}
    297     bool IsEmpty() const { return index_ == 0; }
    298     bool IsExpecting(int n) const {
    299       return (index_ == 1 && IsMinute(n)) ||
    300              (index_ == 2 && IsSecond(n)) ||
    301              (index_ == 3 && IsMillisecond(n));
    302     }
    303     bool Add(int n) {
    304       return index_ < kSize ? (comp_[index_++] = n, true) : false;
    305     }
    306     bool AddFinal(int n) {
    307       if (!Add(n)) return false;
    308       while (index_ < kSize) comp_[index_++] = 0;
    309       return true;
    310     }
    311     void SetHourOffset(int n) { hour_offset_ = n; }
    312     bool Write(FixedArray* output);
    313 
    314     static bool IsMinute(int x) { return Between(x, 0, 59); }
    315     static bool IsHour(int x) { return Between(x, 0, 23); }
    316     static bool IsSecond(int x) { return Between(x, 0, 59); }
    317 
    318    private:
    319     static bool IsHour12(int x) { return Between(x, 0, 12); }
    320     static bool IsMillisecond(int x) { return Between(x, 0, 999); }
    321 
    322     static const int kSize = 4;
    323     int comp_[kSize];
    324     int index_;
    325     int hour_offset_;
    326   };
    327 
    328   class DayComposer BASE_EMBEDDED {
    329    public:
    330     DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
    331     bool IsEmpty() const { return index_ == 0; }
    332     bool Add(int n) {
    333       if (index_ < kSize) {
    334         comp_[index_] = n;
    335         index_++;
    336         return true;
    337       }
    338       return false;
    339     }
    340     void SetNamedMonth(int n) { named_month_ = n; }
    341     bool Write(FixedArray* output);
    342     void set_iso_date() { is_iso_date_ = true; }
    343     static bool IsMonth(int x) { return Between(x, 1, 12); }
    344     static bool IsDay(int x) { return Between(x, 1, 31); }
    345 
    346    private:
    347     static const int kSize = 3;
    348     int comp_[kSize];
    349     int index_;
    350     int named_month_;
    351     // If set, ensures that data is always parsed in year-month-date order.
    352     bool is_iso_date_;
    353   };
    354 
    355   // Tries to parse an ES5 Date Time String. Returns the next token
    356   // to continue with in the legacy date string parser. If parsing is
    357   // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
    358   // returns DateToken::Invalid(). Otherwise parsing continues in the
    359   // legacy parser.
    360   template <typename Char>
    361   static DateParser::DateToken ParseES5DateTime(
    362       DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
    363       TimeZoneComposer* tz);
    364 };
    365 
    366 
    367 }  // namespace internal
    368 }  // namespace v8
    369 
    370 #endif  // V8_DATEPARSER_H_
    371