1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_DATEPARSER_H_ 6 #define V8_DATEPARSER_H_ 7 8 #include "src/allocation.h" 9 #include "src/char-predicates-inl.h" 10 11 namespace v8 { 12 namespace internal { 13 14 class DateParser : public AllStatic { 15 public: 16 // Parse the string as a date. If parsing succeeds, return true after 17 // filling out the output array as follows (all integers are Smis): 18 // [0]: year 19 // [1]: month (0 = Jan, 1 = Feb, ...) 20 // [2]: day 21 // [3]: hour 22 // [4]: minute 23 // [5]: second 24 // [6]: millisecond 25 // [7]: UTC offset in seconds, or null value if no timezone specified 26 // If parsing fails, return false (content of output array is not defined). 27 template <typename Char> 28 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache); 29 30 enum { 31 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE 32 }; 33 34 private: 35 // Range testing 36 static inline bool Between(int x, int lo, int hi) { 37 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); 38 } 39 40 // Indicates a missing value. 41 static const int kNone = kMaxInt; 42 43 // Maximal number of digits used to build the value of a numeral. 44 // Remaining digits are ignored. 45 static const int kMaxSignificantDigits = 9; 46 47 // InputReader provides basic string parsing and character classification. 48 template <typename Char> 49 class InputReader BASE_EMBEDDED { 50 public: 51 InputReader(UnicodeCache* unicode_cache, Vector<Char> s) 52 : index_(0), 53 buffer_(s), 54 unicode_cache_(unicode_cache) { 55 Next(); 56 } 57 58 int position() { return index_; } 59 60 // Advance to the next character of the string. 61 void Next() { 62 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; 63 index_++; 64 } 65 66 // Read a string of digits as an unsigned number. Cap value at 67 // kMaxSignificantDigits, but skip remaining digits if the numeral 68 // is longer. 69 int ReadUnsignedNumeral() { 70 int n = 0; 71 int i = 0; 72 while (IsAsciiDigit()) { 73 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; 74 i++; 75 Next(); 76 } 77 return n; 78 } 79 80 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a 81 // lower-case prefix, and pad any remainder of the buffer with zeroes. 82 // Return word length. 83 int ReadWord(uint32_t* prefix, int prefix_size) { 84 int len; 85 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { 86 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); 87 } 88 for (int i = len; i < prefix_size; i++) prefix[i] = 0; 89 return len; 90 } 91 92 // The skip methods return whether they actually skipped something. 93 bool Skip(uint32_t c) { 94 if (ch_ == c) { 95 Next(); 96 return true; 97 } 98 return false; 99 } 100 101 bool SkipWhiteSpace() { 102 if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) { 103 Next(); 104 return true; 105 } 106 return false; 107 } 108 109 bool SkipParentheses() { 110 if (ch_ != '(') return false; 111 int balance = 0; 112 do { 113 if (ch_ == ')') --balance; 114 else if (ch_ == '(') ++balance; 115 Next(); 116 } while (balance > 0 && ch_); 117 return true; 118 } 119 120 // Character testing/classification. Non-ASCII digits are not supported. 121 bool Is(uint32_t c) const { return ch_ == c; } 122 bool IsEnd() const { return ch_ == 0; } 123 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } 124 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } 125 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } 126 127 // Return 1 for '+' and -1 for '-'. 128 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } 129 130 private: 131 int index_; 132 Vector<Char> buffer_; 133 uint32_t ch_; 134 UnicodeCache* unicode_cache_; 135 }; 136 137 enum KeywordType { 138 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM 139 }; 140 141 struct DateToken { 142 public: 143 bool IsInvalid() { return tag_ == kInvalidTokenTag; } 144 bool IsUnknown() { return tag_ == kUnknownTokenTag; } 145 bool IsNumber() { return tag_ == kNumberTag; } 146 bool IsSymbol() { return tag_ == kSymbolTag; } 147 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } 148 bool IsEndOfInput() { return tag_ == kEndOfInputTag; } 149 bool IsKeyword() { return tag_ >= kKeywordTagStart; } 150 151 int length() { return length_; } 152 153 int number() { 154 ASSERT(IsNumber()); 155 return value_; 156 } 157 KeywordType keyword_type() { 158 ASSERT(IsKeyword()); 159 return static_cast<KeywordType>(tag_); 160 } 161 int keyword_value() { 162 ASSERT(IsKeyword()); 163 return value_; 164 } 165 char symbol() { 166 ASSERT(IsSymbol()); 167 return static_cast<char>(value_); 168 } 169 bool IsSymbol(char symbol) { 170 return IsSymbol() && this->symbol() == symbol; 171 } 172 bool IsKeywordType(KeywordType tag) { 173 return tag_ == tag; 174 } 175 bool IsFixedLengthNumber(int length) { 176 return IsNumber() && length_ == length; 177 } 178 bool IsAsciiSign() { 179 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); 180 } 181 int ascii_sign() { 182 ASSERT(IsAsciiSign()); 183 return 44 - value_; 184 } 185 bool IsKeywordZ() { 186 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; 187 } 188 bool IsUnknown(int character) { 189 return IsUnknown() && value_ == character; 190 } 191 // Factory functions. 192 static DateToken Keyword(KeywordType tag, int value, int length) { 193 return DateToken(tag, length, value); 194 } 195 static DateToken Number(int value, int length) { 196 return DateToken(kNumberTag, length, value); 197 } 198 static DateToken Symbol(char symbol) { 199 return DateToken(kSymbolTag, 1, symbol); 200 } 201 static DateToken EndOfInput() { 202 return DateToken(kEndOfInputTag, 0, -1); 203 } 204 static DateToken WhiteSpace(int length) { 205 return DateToken(kWhiteSpaceTag, length, -1); 206 } 207 static DateToken Unknown() { 208 return DateToken(kUnknownTokenTag, 1, -1); 209 } 210 static DateToken Invalid() { 211 return DateToken(kInvalidTokenTag, 0, -1); 212 } 213 214 private: 215 enum TagType { 216 kInvalidTokenTag = -6, 217 kUnknownTokenTag = -5, 218 kWhiteSpaceTag = -4, 219 kNumberTag = -3, 220 kSymbolTag = -2, 221 kEndOfInputTag = -1, 222 kKeywordTagStart = 0 223 }; 224 DateToken(int tag, int length, int value) 225 : tag_(tag), 226 length_(length), 227 value_(value) { } 228 229 int tag_; 230 int length_; // Number of characters. 231 int value_; 232 }; 233 234 template <typename Char> 235 class DateStringTokenizer { 236 public: 237 explicit DateStringTokenizer(InputReader<Char>* in) 238 : in_(in), next_(Scan()) { } 239 DateToken Next() { 240 DateToken result = next_; 241 next_ = Scan(); 242 return result; 243 } 244 245 DateToken Peek() { 246 return next_; 247 } 248 bool SkipSymbol(char symbol) { 249 if (next_.IsSymbol(symbol)) { 250 next_ = Scan(); 251 return true; 252 } 253 return false; 254 } 255 256 private: 257 DateToken Scan(); 258 259 InputReader<Char>* in_; 260 DateToken next_; 261 }; 262 263 static int ReadMilliseconds(DateToken number); 264 265 // KeywordTable maps names of months, time zones, am/pm to numbers. 266 class KeywordTable : public AllStatic { 267 public: 268 // Look up a word in the keyword table and return an index. 269 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength 270 // and 'len' is the word length. 271 static int Lookup(const uint32_t* pre, int len); 272 // Get the type of the keyword at index i. 273 static KeywordType GetType(int i) { 274 return static_cast<KeywordType>(array[i][kTypeOffset]); 275 } 276 // Get the value of the keyword at index i. 277 static int GetValue(int i) { return array[i][kValueOffset]; } 278 279 static const int kPrefixLength = 3; 280 static const int kTypeOffset = kPrefixLength; 281 static const int kValueOffset = kTypeOffset + 1; 282 static const int kEntrySize = kValueOffset + 1; 283 static const int8_t array[][kEntrySize]; 284 }; 285 286 class TimeZoneComposer BASE_EMBEDDED { 287 public: 288 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {} 289 void Set(int offset_in_hours) { 290 sign_ = offset_in_hours < 0 ? -1 : 1; 291 hour_ = offset_in_hours * sign_; 292 minute_ = 0; 293 } 294 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } 295 void SetAbsoluteHour(int hour) { hour_ = hour; } 296 void SetAbsoluteMinute(int minute) { minute_ = minute; } 297 bool IsExpecting(int n) const { 298 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); 299 } 300 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } 301 bool Write(FixedArray* output); 302 bool IsEmpty() { return hour_ == kNone; } 303 private: 304 int sign_; 305 int hour_; 306 int minute_; 307 }; 308 309 class TimeComposer BASE_EMBEDDED { 310 public: 311 TimeComposer() : index_(0), hour_offset_(kNone) {} 312 bool IsEmpty() const { return index_ == 0; } 313 bool IsExpecting(int n) const { 314 return (index_ == 1 && IsMinute(n)) || 315 (index_ == 2 && IsSecond(n)) || 316 (index_ == 3 && IsMillisecond(n)); 317 } 318 bool Add(int n) { 319 return index_ < kSize ? (comp_[index_++] = n, true) : false; 320 } 321 bool AddFinal(int n) { 322 if (!Add(n)) return false; 323 while (index_ < kSize) comp_[index_++] = 0; 324 return true; 325 } 326 void SetHourOffset(int n) { hour_offset_ = n; } 327 bool Write(FixedArray* output); 328 329 static bool IsMinute(int x) { return Between(x, 0, 59); } 330 static bool IsHour(int x) { return Between(x, 0, 23); } 331 static bool IsSecond(int x) { return Between(x, 0, 59); } 332 333 private: 334 static bool IsHour12(int x) { return Between(x, 0, 12); } 335 static bool IsMillisecond(int x) { return Between(x, 0, 999); } 336 337 static const int kSize = 4; 338 int comp_[kSize]; 339 int index_; 340 int hour_offset_; 341 }; 342 343 class DayComposer BASE_EMBEDDED { 344 public: 345 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} 346 bool IsEmpty() const { return index_ == 0; } 347 bool Add(int n) { 348 if (index_ < kSize) { 349 comp_[index_] = n; 350 index_++; 351 return true; 352 } 353 return false; 354 } 355 void SetNamedMonth(int n) { named_month_ = n; } 356 bool Write(FixedArray* output); 357 void set_iso_date() { is_iso_date_ = true; } 358 static bool IsMonth(int x) { return Between(x, 1, 12); } 359 static bool IsDay(int x) { return Between(x, 1, 31); } 360 361 private: 362 static const int kSize = 3; 363 int comp_[kSize]; 364 int index_; 365 int named_month_; 366 // If set, ensures that data is always parsed in year-month-date order. 367 bool is_iso_date_; 368 }; 369 370 // Tries to parse an ES5 Date Time String. Returns the next token 371 // to continue with in the legacy date string parser. If parsing is 372 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, 373 // returns DateToken::Invalid(). Otherwise parsing continues in the 374 // legacy parser. 375 template <typename Char> 376 static DateParser::DateToken ParseES5DateTime( 377 DateStringTokenizer<Char>* scanner, 378 DayComposer* day, 379 TimeComposer* time, 380 TimeZoneComposer* tz); 381 }; 382 383 384 } } // namespace v8::internal 385 386 #endif // V8_DATEPARSER_H_ 387