1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_DATEPARSER_H_ 29 #define V8_DATEPARSER_H_ 30 31 #include "allocation.h" 32 #include "char-predicates-inl.h" 33 34 namespace v8 { 35 namespace internal { 36 37 class DateParser : public AllStatic { 38 public: 39 // Parse the string as a date. If parsing succeeds, return true after 40 // filling out the output array as follows (all integers are Smis): 41 // [0]: year 42 // [1]: month (0 = Jan, 1 = Feb, ...) 43 // [2]: day 44 // [3]: hour 45 // [4]: minute 46 // [5]: second 47 // [6]: millisecond 48 // [7]: UTC offset in seconds, or null value if no timezone specified 49 // If parsing fails, return false (content of output array is not defined). 50 template <typename Char> 51 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache); 52 53 enum { 54 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE 55 }; 56 57 private: 58 // Range testing 59 static inline bool Between(int x, int lo, int hi) { 60 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); 61 } 62 63 // Indicates a missing value. 64 static const int kNone = kMaxInt; 65 66 // Maximal number of digits used to build the value of a numeral. 67 // Remaining digits are ignored. 68 static const int kMaxSignificantDigits = 9; 69 70 // InputReader provides basic string parsing and character classification. 71 template <typename Char> 72 class InputReader BASE_EMBEDDED { 73 public: 74 InputReader(UnicodeCache* unicode_cache, Vector<Char> s) 75 : index_(0), 76 buffer_(s), 77 unicode_cache_(unicode_cache) { 78 Next(); 79 } 80 81 int position() { return index_; } 82 83 // Advance to the next character of the string. 84 void Next() { 85 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; 86 index_++; 87 } 88 89 // Read a string of digits as an unsigned number. Cap value at 90 // kMaxSignificantDigits, but skip remaining digits if the numeral 91 // is longer. 92 int ReadUnsignedNumeral() { 93 int n = 0; 94 int i = 0; 95 while (IsAsciiDigit()) { 96 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; 97 i++; 98 Next(); 99 } 100 return n; 101 } 102 103 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a 104 // lower-case prefix, and pad any remainder of the buffer with zeroes. 105 // Return word length. 106 int ReadWord(uint32_t* prefix, int prefix_size) { 107 int len; 108 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) { 109 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); 110 } 111 for (int i = len; i < prefix_size; i++) prefix[i] = 0; 112 return len; 113 } 114 115 // The skip methods return whether they actually skipped something. 116 bool Skip(uint32_t c) { 117 if (ch_ == c) { 118 Next(); 119 return true; 120 } 121 return false; 122 } 123 124 bool SkipWhiteSpace() { 125 if (unicode_cache_->IsWhiteSpace(ch_)) { 126 Next(); 127 return true; 128 } 129 return false; 130 } 131 132 bool SkipParentheses() { 133 if (ch_ != '(') return false; 134 int balance = 0; 135 do { 136 if (ch_ == ')') --balance; 137 else if (ch_ == '(') ++balance; 138 Next(); 139 } while (balance > 0 && ch_); 140 return true; 141 } 142 143 // Character testing/classification. Non-ASCII digits are not supported. 144 bool Is(uint32_t c) const { return ch_ == c; } 145 bool IsEnd() const { return ch_ == 0; } 146 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } 147 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } 148 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } 149 150 // Return 1 for '+' and -1 for '-'. 151 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } 152 153 private: 154 int index_; 155 Vector<Char> buffer_; 156 uint32_t ch_; 157 UnicodeCache* unicode_cache_; 158 }; 159 160 enum KeywordType { 161 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM 162 }; 163 164 struct DateToken { 165 public: 166 bool IsInvalid() { return tag_ == kInvalidTokenTag; } 167 bool IsUnknown() { return tag_ == kUnknownTokenTag; } 168 bool IsNumber() { return tag_ == kNumberTag; } 169 bool IsSymbol() { return tag_ == kSymbolTag; } 170 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } 171 bool IsEndOfInput() { return tag_ == kEndOfInputTag; } 172 bool IsKeyword() { return tag_ >= kKeywordTagStart; } 173 174 int length() { return length_; } 175 176 int number() { 177 ASSERT(IsNumber()); 178 return value_; 179 } 180 KeywordType keyword_type() { 181 ASSERT(IsKeyword()); 182 return static_cast<KeywordType>(tag_); 183 } 184 int keyword_value() { 185 ASSERT(IsKeyword()); 186 return value_; 187 } 188 char symbol() { 189 ASSERT(IsSymbol()); 190 return static_cast<char>(value_); 191 } 192 bool IsSymbol(char symbol) { 193 return IsSymbol() && this->symbol() == symbol; 194 } 195 bool IsKeywordType(KeywordType tag) { 196 return tag_ == tag; 197 } 198 bool IsFixedLengthNumber(int length) { 199 return IsNumber() && length_ == length; 200 } 201 bool IsAsciiSign() { 202 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); 203 } 204 int ascii_sign() { 205 ASSERT(IsAsciiSign()); 206 return 44 - value_; 207 } 208 bool IsKeywordZ() { 209 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; 210 } 211 bool IsUnknown(int character) { 212 return IsUnknown() && value_ == character; 213 } 214 // Factory functions. 215 static DateToken Keyword(KeywordType tag, int value, int length) { 216 return DateToken(tag, length, value); 217 } 218 static DateToken Number(int value, int length) { 219 return DateToken(kNumberTag, length, value); 220 } 221 static DateToken Symbol(char symbol) { 222 return DateToken(kSymbolTag, 1, symbol); 223 } 224 static DateToken EndOfInput() { 225 return DateToken(kEndOfInputTag, 0, -1); 226 } 227 static DateToken WhiteSpace(int length) { 228 return DateToken(kWhiteSpaceTag, length, -1); 229 } 230 static DateToken Unknown() { 231 return DateToken(kUnknownTokenTag, 1, -1); 232 } 233 static DateToken Invalid() { 234 return DateToken(kInvalidTokenTag, 0, -1); 235 } 236 237 private: 238 enum TagType { 239 kInvalidTokenTag = -6, 240 kUnknownTokenTag = -5, 241 kWhiteSpaceTag = -4, 242 kNumberTag = -3, 243 kSymbolTag = -2, 244 kEndOfInputTag = -1, 245 kKeywordTagStart = 0 246 }; 247 DateToken(int tag, int length, int value) 248 : tag_(tag), 249 length_(length), 250 value_(value) { } 251 252 int tag_; 253 int length_; // Number of characters. 254 int value_; 255 }; 256 257 template <typename Char> 258 class DateStringTokenizer { 259 public: 260 explicit DateStringTokenizer(InputReader<Char>* in) 261 : in_(in), next_(Scan()) { } 262 DateToken Next() { 263 DateToken result = next_; 264 next_ = Scan(); 265 return result; 266 } 267 268 DateToken Peek() { 269 return next_; 270 } 271 bool SkipSymbol(char symbol) { 272 if (next_.IsSymbol(symbol)) { 273 next_ = Scan(); 274 return true; 275 } 276 return false; 277 } 278 279 private: 280 DateToken Scan(); 281 282 InputReader<Char>* in_; 283 DateToken next_; 284 }; 285 286 static int ReadMilliseconds(DateToken number); 287 288 // KeywordTable maps names of months, time zones, am/pm to numbers. 289 class KeywordTable : public AllStatic { 290 public: 291 // Look up a word in the keyword table and return an index. 292 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength 293 // and 'len' is the word length. 294 static int Lookup(const uint32_t* pre, int len); 295 // Get the type of the keyword at index i. 296 static KeywordType GetType(int i) { 297 return static_cast<KeywordType>(array[i][kTypeOffset]); 298 } 299 // Get the value of the keyword at index i. 300 static int GetValue(int i) { return array[i][kValueOffset]; } 301 302 static const int kPrefixLength = 3; 303 static const int kTypeOffset = kPrefixLength; 304 static const int kValueOffset = kTypeOffset + 1; 305 static const int kEntrySize = kValueOffset + 1; 306 static const int8_t array[][kEntrySize]; 307 }; 308 309 class TimeZoneComposer BASE_EMBEDDED { 310 public: 311 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {} 312 void Set(int offset_in_hours) { 313 sign_ = offset_in_hours < 0 ? -1 : 1; 314 hour_ = offset_in_hours * sign_; 315 minute_ = 0; 316 } 317 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } 318 void SetAbsoluteHour(int hour) { hour_ = hour; } 319 void SetAbsoluteMinute(int minute) { minute_ = minute; } 320 bool IsExpecting(int n) const { 321 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); 322 } 323 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } 324 bool Write(FixedArray* output); 325 bool IsEmpty() { return hour_ == kNone; } 326 private: 327 int sign_; 328 int hour_; 329 int minute_; 330 }; 331 332 class TimeComposer BASE_EMBEDDED { 333 public: 334 TimeComposer() : index_(0), hour_offset_(kNone) {} 335 bool IsEmpty() const { return index_ == 0; } 336 bool IsExpecting(int n) const { 337 return (index_ == 1 && IsMinute(n)) || 338 (index_ == 2 && IsSecond(n)) || 339 (index_ == 3 && IsMillisecond(n)); 340 } 341 bool Add(int n) { 342 return index_ < kSize ? (comp_[index_++] = n, true) : false; 343 } 344 bool AddFinal(int n) { 345 if (!Add(n)) return false; 346 while (index_ < kSize) comp_[index_++] = 0; 347 return true; 348 } 349 void SetHourOffset(int n) { hour_offset_ = n; } 350 bool Write(FixedArray* output); 351 352 static bool IsMinute(int x) { return Between(x, 0, 59); } 353 static bool IsHour(int x) { return Between(x, 0, 23); } 354 static bool IsSecond(int x) { return Between(x, 0, 59); } 355 356 private: 357 static bool IsHour12(int x) { return Between(x, 0, 12); } 358 static bool IsMillisecond(int x) { return Between(x, 0, 999); } 359 360 static const int kSize = 4; 361 int comp_[kSize]; 362 int index_; 363 int hour_offset_; 364 }; 365 366 class DayComposer BASE_EMBEDDED { 367 public: 368 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} 369 bool IsEmpty() const { return index_ == 0; } 370 bool Add(int n) { 371 if (index_ < kSize) { 372 comp_[index_] = n; 373 index_++; 374 return true; 375 } 376 return false; 377 } 378 void SetNamedMonth(int n) { named_month_ = n; } 379 bool Write(FixedArray* output); 380 void set_iso_date() { is_iso_date_ = true; } 381 static bool IsMonth(int x) { return Between(x, 1, 12); } 382 static bool IsDay(int x) { return Between(x, 1, 31); } 383 384 private: 385 static const int kSize = 3; 386 int comp_[kSize]; 387 int index_; 388 int named_month_; 389 // If set, ensures that data is always parsed in year-month-date order. 390 bool is_iso_date_; 391 }; 392 393 // Tries to parse an ES5 Date Time String. Returns the next token 394 // to continue with in the legacy date string parser. If parsing is 395 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, 396 // returns DateToken::Invalid(). Otherwise parsing continues in the 397 // legacy parser. 398 template <typename Char> 399 static DateParser::DateToken ParseES5DateTime( 400 DateStringTokenizer<Char>* scanner, 401 DayComposer* day, 402 TimeComposer* time, 403 TimeZoneComposer* tz); 404 }; 405 406 407 } } // namespace v8::internal 408 409 #endif // V8_DATEPARSER_H_ 410