Home | History | Annotate | Download | only in src
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #ifndef V8_DATEPARSER_H_
     29 #define V8_DATEPARSER_H_
     30 
     31 #include "allocation.h"
     32 #include "char-predicates-inl.h"
     33 
     34 namespace v8 {
     35 namespace internal {
     36 
     37 class DateParser : public AllStatic {
     38  public:
     39   // Parse the string as a date. If parsing succeeds, return true after
     40   // filling out the output array as follows (all integers are Smis):
     41   // [0]: year
     42   // [1]: month (0 = Jan, 1 = Feb, ...)
     43   // [2]: day
     44   // [3]: hour
     45   // [4]: minute
     46   // [5]: second
     47   // [6]: millisecond
     48   // [7]: UTC offset in seconds, or null value if no timezone specified
     49   // If parsing fails, return false (content of output array is not defined).
     50   template <typename Char>
     51   static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
     52 
     53   enum {
     54     YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
     55   };
     56 
     57  private:
     58   // Range testing
     59   static inline bool Between(int x, int lo, int hi) {
     60     return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
     61   }
     62 
     63   // Indicates a missing value.
     64   static const int kNone = kMaxInt;
     65 
     66   // Maximal number of digits used to build the value of a numeral.
     67   // Remaining digits are ignored.
     68   static const int kMaxSignificantDigits = 9;
     69 
     70   // InputReader provides basic string parsing and character classification.
     71   template <typename Char>
     72   class InputReader BASE_EMBEDDED {
     73    public:
     74     InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
     75         : index_(0),
     76           buffer_(s),
     77           unicode_cache_(unicode_cache) {
     78       Next();
     79     }
     80 
     81     int position() { return index_; }
     82 
     83     // Advance to the next character of the string.
     84     void Next() {
     85       ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
     86       index_++;
     87     }
     88 
     89     // Read a string of digits as an unsigned number. Cap value at
     90     // kMaxSignificantDigits, but skip remaining digits if the numeral
     91     // is longer.
     92     int ReadUnsignedNumeral() {
     93       int n = 0;
     94       int i = 0;
     95       while (IsAsciiDigit()) {
     96         if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
     97         i++;
     98         Next();
     99       }
    100       return n;
    101     }
    102 
    103     // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
    104     // lower-case prefix, and pad any remainder of the buffer with zeroes.
    105     // Return word length.
    106     int ReadWord(uint32_t* prefix, int prefix_size) {
    107       int len;
    108       for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
    109         if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
    110       }
    111       for (int i = len; i < prefix_size; i++) prefix[i] = 0;
    112       return len;
    113     }
    114 
    115     // The skip methods return whether they actually skipped something.
    116     bool Skip(uint32_t c) {
    117       if (ch_ == c) {
    118         Next();
    119         return true;
    120       }
    121       return false;
    122     }
    123 
    124     bool SkipWhiteSpace() {
    125       if (unicode_cache_->IsWhiteSpace(ch_)) {
    126         Next();
    127         return true;
    128       }
    129       return false;
    130     }
    131 
    132     bool SkipParentheses() {
    133       if (ch_ != '(') return false;
    134       int balance = 0;
    135       do {
    136         if (ch_ == ')') --balance;
    137         else if (ch_ == '(') ++balance;
    138         Next();
    139       } while (balance > 0 && ch_);
    140       return true;
    141     }
    142 
    143     // Character testing/classification. Non-ASCII digits are not supported.
    144     bool Is(uint32_t c) const { return ch_ == c; }
    145     bool IsEnd() const { return ch_ == 0; }
    146     bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
    147     bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
    148     bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
    149 
    150     // Return 1 for '+' and -1 for '-'.
    151     int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
    152 
    153    private:
    154     int index_;
    155     Vector<Char> buffer_;
    156     uint32_t ch_;
    157     UnicodeCache* unicode_cache_;
    158   };
    159 
    160   enum KeywordType {
    161       INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
    162   };
    163 
    164   struct DateToken {
    165    public:
    166     bool IsInvalid() { return tag_ == kInvalidTokenTag; }
    167     bool IsUnknown() { return tag_ == kUnknownTokenTag; }
    168     bool IsNumber() { return tag_ == kNumberTag; }
    169     bool IsSymbol() { return tag_ == kSymbolTag; }
    170     bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
    171     bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
    172     bool IsKeyword() { return tag_ >= kKeywordTagStart; }
    173 
    174     int length() { return length_; }
    175 
    176     int number() {
    177       ASSERT(IsNumber());
    178       return value_;
    179     }
    180     KeywordType keyword_type() {
    181       ASSERT(IsKeyword());
    182       return static_cast<KeywordType>(tag_);
    183     }
    184     int keyword_value() {
    185       ASSERT(IsKeyword());
    186       return value_;
    187     }
    188     char symbol() {
    189       ASSERT(IsSymbol());
    190       return static_cast<char>(value_);
    191     }
    192     bool IsSymbol(char symbol) {
    193       return IsSymbol() && this->symbol() == symbol;
    194     }
    195     bool IsKeywordType(KeywordType tag) {
    196       return tag_ == tag;
    197     }
    198     bool IsFixedLengthNumber(int length) {
    199       return IsNumber() && length_ == length;
    200     }
    201     bool IsAsciiSign() {
    202       return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
    203     }
    204     int ascii_sign() {
    205       ASSERT(IsAsciiSign());
    206       return 44 - value_;
    207     }
    208     bool IsKeywordZ() {
    209       return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
    210     }
    211     bool IsUnknown(int character) {
    212       return IsUnknown() && value_ == character;
    213     }
    214     // Factory functions.
    215     static DateToken Keyword(KeywordType tag, int value, int length) {
    216       return DateToken(tag, length, value);
    217     }
    218     static DateToken Number(int value, int length) {
    219       return DateToken(kNumberTag, length, value);
    220     }
    221     static DateToken Symbol(char symbol) {
    222       return DateToken(kSymbolTag, 1, symbol);
    223     }
    224     static DateToken EndOfInput() {
    225       return DateToken(kEndOfInputTag, 0, -1);
    226     }
    227     static DateToken WhiteSpace(int length) {
    228       return DateToken(kWhiteSpaceTag, length, -1);
    229     }
    230     static DateToken Unknown() {
    231       return DateToken(kUnknownTokenTag, 1, -1);
    232     }
    233     static DateToken Invalid() {
    234       return DateToken(kInvalidTokenTag, 0, -1);
    235     }
    236 
    237    private:
    238     enum TagType {
    239       kInvalidTokenTag = -6,
    240       kUnknownTokenTag = -5,
    241       kWhiteSpaceTag = -4,
    242       kNumberTag = -3,
    243       kSymbolTag = -2,
    244       kEndOfInputTag = -1,
    245       kKeywordTagStart = 0
    246     };
    247     DateToken(int tag, int length, int value)
    248         : tag_(tag),
    249           length_(length),
    250           value_(value) { }
    251 
    252     int tag_;
    253     int length_;  // Number of characters.
    254     int value_;
    255   };
    256 
    257   template <typename Char>
    258   class DateStringTokenizer {
    259    public:
    260     explicit DateStringTokenizer(InputReader<Char>* in)
    261         : in_(in), next_(Scan()) { }
    262     DateToken Next() {
    263       DateToken result = next_;
    264       next_ = Scan();
    265       return result;
    266     }
    267 
    268     DateToken Peek() {
    269       return next_;
    270     }
    271     bool SkipSymbol(char symbol) {
    272       if (next_.IsSymbol(symbol)) {
    273         next_ = Scan();
    274         return true;
    275       }
    276       return false;
    277     }
    278 
    279    private:
    280     DateToken Scan();
    281 
    282     InputReader<Char>* in_;
    283     DateToken next_;
    284   };
    285 
    286   static int ReadMilliseconds(DateToken number);
    287 
    288   // KeywordTable maps names of months, time zones, am/pm to numbers.
    289   class KeywordTable : public AllStatic {
    290    public:
    291     // Look up a word in the keyword table and return an index.
    292     // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
    293     // and 'len' is the word length.
    294     static int Lookup(const uint32_t* pre, int len);
    295     // Get the type of the keyword at index i.
    296     static KeywordType GetType(int i) {
    297       return static_cast<KeywordType>(array[i][kTypeOffset]);
    298     }
    299     // Get the value of the keyword at index i.
    300     static int GetValue(int i) { return array[i][kValueOffset]; }
    301 
    302     static const int kPrefixLength = 3;
    303     static const int kTypeOffset = kPrefixLength;
    304     static const int kValueOffset = kTypeOffset + 1;
    305     static const int kEntrySize = kValueOffset + 1;
    306     static const int8_t array[][kEntrySize];
    307   };
    308 
    309   class TimeZoneComposer BASE_EMBEDDED {
    310    public:
    311     TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
    312     void Set(int offset_in_hours) {
    313       sign_ = offset_in_hours < 0 ? -1 : 1;
    314       hour_ = offset_in_hours * sign_;
    315       minute_ = 0;
    316     }
    317     void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
    318     void SetAbsoluteHour(int hour) { hour_ = hour; }
    319     void SetAbsoluteMinute(int minute) { minute_ = minute; }
    320     bool IsExpecting(int n) const {
    321       return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
    322     }
    323     bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
    324     bool Write(FixedArray* output);
    325     bool IsEmpty() { return hour_ == kNone; }
    326    private:
    327     int sign_;
    328     int hour_;
    329     int minute_;
    330   };
    331 
    332   class TimeComposer BASE_EMBEDDED {
    333    public:
    334     TimeComposer() : index_(0), hour_offset_(kNone) {}
    335     bool IsEmpty() const { return index_ == 0; }
    336     bool IsExpecting(int n) const {
    337       return (index_ == 1 && IsMinute(n)) ||
    338              (index_ == 2 && IsSecond(n)) ||
    339              (index_ == 3 && IsMillisecond(n));
    340     }
    341     bool Add(int n) {
    342       return index_ < kSize ? (comp_[index_++] = n, true) : false;
    343     }
    344     bool AddFinal(int n) {
    345       if (!Add(n)) return false;
    346       while (index_ < kSize) comp_[index_++] = 0;
    347       return true;
    348     }
    349     void SetHourOffset(int n) { hour_offset_ = n; }
    350     bool Write(FixedArray* output);
    351 
    352     static bool IsMinute(int x) { return Between(x, 0, 59); }
    353     static bool IsHour(int x) { return Between(x, 0, 23); }
    354     static bool IsSecond(int x) { return Between(x, 0, 59); }
    355 
    356    private:
    357     static bool IsHour12(int x) { return Between(x, 0, 12); }
    358     static bool IsMillisecond(int x) { return Between(x, 0, 999); }
    359 
    360     static const int kSize = 4;
    361     int comp_[kSize];
    362     int index_;
    363     int hour_offset_;
    364   };
    365 
    366   class DayComposer BASE_EMBEDDED {
    367    public:
    368     DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
    369     bool IsEmpty() const { return index_ == 0; }
    370     bool Add(int n) {
    371       if (index_ < kSize) {
    372         comp_[index_] = n;
    373         index_++;
    374         return true;
    375       }
    376       return false;
    377     }
    378     void SetNamedMonth(int n) { named_month_ = n; }
    379     bool Write(FixedArray* output);
    380     void set_iso_date() { is_iso_date_ = true; }
    381     static bool IsMonth(int x) { return Between(x, 1, 12); }
    382     static bool IsDay(int x) { return Between(x, 1, 31); }
    383 
    384    private:
    385     static const int kSize = 3;
    386     int comp_[kSize];
    387     int index_;
    388     int named_month_;
    389     // If set, ensures that data is always parsed in year-month-date order.
    390     bool is_iso_date_;
    391   };
    392 
    393   // Tries to parse an ES5 Date Time String. Returns the next token
    394   // to continue with in the legacy date string parser. If parsing is
    395   // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
    396   // returns DateToken::Invalid(). Otherwise parsing continues in the
    397   // legacy parser.
    398   template <typename Char>
    399   static DateParser::DateToken ParseES5DateTime(
    400       DateStringTokenizer<Char>* scanner,
    401       DayComposer* day,
    402       TimeComposer* time,
    403       TimeZoneComposer* tz);
    404 };
    405 
    406 
    407 } }  // namespace v8::internal
    408 
    409 #endif  // V8_DATEPARSER_H_
    410