1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_CHAR_PREDICATES_H_ 6 #define V8_CHAR_PREDICATES_H_ 7 8 #include "src/unicode.h" 9 10 namespace v8 { 11 namespace internal { 12 13 // Unicode character predicates as defined by ECMA-262, 3rd, 14 // used for lexical analysis. 15 16 inline int AsciiAlphaToLower(uc32 c); 17 inline bool IsCarriageReturn(uc32 c); 18 inline bool IsLineFeed(uc32 c); 19 inline bool IsAsciiIdentifier(uc32 c); 20 inline bool IsAlphaNumeric(uc32 c); 21 inline bool IsDecimalDigit(uc32 c); 22 inline bool IsHexDigit(uc32 c); 23 inline bool IsOctalDigit(uc32 c); 24 inline bool IsBinaryDigit(uc32 c); 25 inline bool IsRegExpWord(uc32 c); 26 inline bool IsRegExpNewline(uc32 c); 27 28 29 struct SupplementaryPlanes { 30 static bool IsIDStart(uc32 c); 31 static bool IsIDPart(uc32 c); 32 }; 33 34 35 // ES6 draft section 11.6 36 // This includes '_', '$' and '\', and ID_Start according to 37 // http://www.unicode.org/reports/tr31/, which consists of categories 38 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties 39 // 'Pattern_Syntax' or 'Pattern_White_Space'. 40 // For code points in the SMPs, we can resort to ICU (if available). 41 struct IdentifierStart { 42 static inline bool Is(uc32 c) { 43 if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c); 44 return unibrow::ID_Start::Is(c); 45 } 46 }; 47 48 49 // ES6 draft section 11.6 50 // This includes \u200c and \u200d, and ID_Continue according to 51 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, 52 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties 53 // 'Pattern_Syntax' or 'Pattern_White_Space'. 54 // For code points in the SMPs, we can resort to ICU (if available). 55 struct IdentifierPart { 56 static inline bool Is(uc32 c) { 57 if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c); 58 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); 59 } 60 }; 61 62 63 // ES6 draft section 11.2 64 // This includes all code points of Unicode category 'Zs'. 65 // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, 66 // so it is also included. 67 // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. 68 // There are no category 'Zs' code points in the SMPs. 69 struct WhiteSpace { 70 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } 71 }; 72 73 74 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 75 // This consists of \000a, \000d, \u2028, and \u2029. 76 struct WhiteSpaceOrLineTerminator { 77 static inline bool Is(uc32 c) { 78 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); 79 } 80 }; 81 82 } // namespace internal 83 } // namespace v8 84 85 #endif // V8_CHAR_PREDICATES_H_ 86