Home | History | Annotate | Download | only in objects
      1 // Copyright 2017 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_OBJECTS_JS_REGEXP_H_
      6 #define V8_OBJECTS_JS_REGEXP_H_
      7 
      8 #include "src/objects/js-array.h"
      9 
     10 // Has to be the last include (doesn't have include guards):
     11 #include "src/objects/object-macros.h"
     12 
     13 namespace v8 {
     14 namespace internal {
     15 
     16 // Regular expressions
     17 // The regular expression holds a single reference to a FixedArray in
     18 // the kDataOffset field.
     19 // The FixedArray contains the following data:
     20 // - tag : type of regexp implementation (not compiled yet, atom or irregexp)
     21 // - reference to the original source string
     22 // - reference to the original flag string
     23 // If it is an atom regexp
     24 // - a reference to a literal string to search for
     25 // If it is an irregexp regexp:
     26 // - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
     27 // used for tracking the last usage (used for regexp code flushing).
     28 // - a reference to code for UC16 inputs (bytecode or compiled), or a smi
     29 // used for tracking the last usage (used for regexp code flushing).
     30 // - max number of registers used by irregexp implementations.
     31 // - number of capture registers (output values) of the regexp.
     32 class JSRegExp : public JSObject {
     33  public:
     34   // Meaning of Type:
     35   // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
     36   // ATOM: A simple string to match against using an indexOf operation.
     37   // IRREGEXP: Compiled with Irregexp.
     38   enum Type { NOT_COMPILED, ATOM, IRREGEXP };
     39   enum Flag {
     40     kNone = 0,
     41     kGlobal = 1 << 0,
     42     kIgnoreCase = 1 << 1,
     43     kMultiline = 1 << 2,
     44     kSticky = 1 << 3,
     45     kUnicode = 1 << 4,
     46     kDotAll = 1 << 5,
     47     // Update FlagCount when adding new flags.
     48   };
     49   typedef base::Flags<Flag> Flags;
     50 
     51   static constexpr int FlagCount() { return 6; }
     52 
     53   DECL_ACCESSORS(data, Object)
     54   DECL_ACCESSORS(flags, Object)
     55   DECL_ACCESSORS(last_index, Object)
     56   DECL_ACCESSORS(source, Object)
     57 
     58   V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(Isolate* isolate,
     59                                                      Handle<String> source,
     60                                                      Flags flags);
     61   static Handle<JSRegExp> Copy(Handle<JSRegExp> regexp);
     62 
     63   static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
     64                                           Handle<String> source, Flags flags);
     65   static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
     66                                           Handle<String> source,
     67                                           Handle<String> flags_string);
     68 
     69   inline Type TypeTag();
     70   // Number of captures (without the match itself).
     71   inline int CaptureCount();
     72   inline Flags GetFlags();
     73   inline String* Pattern();
     74   inline Object* CaptureNameMap();
     75   inline Object* DataAt(int index);
     76   // Set implementation data after the object has been prepared.
     77   inline void SetDataAt(int index, Object* value);
     78 
     79   static int code_index(bool is_latin1) {
     80     if (is_latin1) {
     81       return kIrregexpLatin1CodeIndex;
     82     } else {
     83       return kIrregexpUC16CodeIndex;
     84     }
     85   }
     86 
     87   DECL_CAST(JSRegExp)
     88 
     89   // Dispatched behavior.
     90   DECL_PRINTER(JSRegExp)
     91   DECL_VERIFIER(JSRegExp)
     92 
     93   static const int kDataOffset = JSObject::kHeaderSize;
     94   static const int kSourceOffset = kDataOffset + kPointerSize;
     95   static const int kFlagsOffset = kSourceOffset + kPointerSize;
     96   static const int kSize = kFlagsOffset + kPointerSize;
     97   static const int kLastIndexOffset = kSize;  // In-object field.
     98 
     99   // Indices in the data array.
    100   static const int kTagIndex = 0;
    101   static const int kSourceIndex = kTagIndex + 1;
    102   static const int kFlagsIndex = kSourceIndex + 1;
    103   static const int kDataIndex = kFlagsIndex + 1;
    104   // The data fields are used in different ways depending on the
    105   // value of the tag.
    106   // Atom regexps (literal strings).
    107   static const int kAtomPatternIndex = kDataIndex;
    108 
    109   static const int kAtomDataSize = kAtomPatternIndex + 1;
    110 
    111   // Irregexp compiled code or bytecode for Latin1. If compilation
    112   // fails, this fields hold an exception object that should be
    113   // thrown if the regexp is used again.
    114   static const int kIrregexpLatin1CodeIndex = kDataIndex;
    115   // Irregexp compiled code or bytecode for UC16.  If compilation
    116   // fails, this fields hold an exception object that should be
    117   // thrown if the regexp is used again.
    118   static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
    119   // Maximal number of registers used by either Latin1 or UC16.
    120   // Only used to check that there is enough stack space
    121   static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2;
    122   // Number of captures in the compiled regexp.
    123   static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
    124   // Maps names of named capture groups (at indices 2i) to their corresponding
    125   // (1-based) capture group indices (at indices 2i + 1).
    126   static const int kIrregexpCaptureNameMapIndex = kDataIndex + 4;
    127 
    128   static const int kIrregexpDataSize = kIrregexpCaptureNameMapIndex + 1;
    129 
    130   // In-object fields.
    131   static const int kLastIndexFieldIndex = 0;
    132   static const int kInObjectFieldCount = 1;
    133 
    134   // The uninitialized value for a regexp code object.
    135   static const int kUninitializedValue = -1;
    136 };
    137 
    138 DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
    139 
    140 // JSRegExpResult is just a JSArray with a specific initial map.
    141 // This initial map adds in-object properties for "index" and "input"
    142 // properties, as assigned by RegExp.prototype.exec, which allows
    143 // faster creation of RegExp exec results.
    144 // This class just holds constants used when creating the result.
    145 // After creation the result must be treated as a JSArray in all regards.
    146 class JSRegExpResult : public JSArray {
    147  public:
    148 #define REG_EXP_RESULT_FIELDS(V) \
    149   V(kIndexOffset, kPointerSize)  \
    150   V(kInputOffset, kPointerSize)  \
    151   V(kGroupsOffset, kPointerSize) \
    152   V(kSize, 0)
    153 
    154   DEFINE_FIELD_OFFSET_CONSTANTS(JSArray::kSize, REG_EXP_RESULT_FIELDS)
    155 #undef REG_EXP_RESULT_FIELDS
    156 
    157   // Indices of in-object properties.
    158   static const int kIndexIndex = 0;
    159   static const int kInputIndex = 1;
    160   static const int kGroupsIndex = 2;
    161   static const int kInObjectPropertyCount = 3;
    162 
    163  private:
    164   DISALLOW_IMPLICIT_CONSTRUCTORS(JSRegExpResult);
    165 };
    166 
    167 }  // namespace internal
    168 }  // namespace v8
    169 
    170 #include "src/objects/object-macros-undef.h"
    171 
    172 #endif  // V8_OBJECTS_JS_REGEXP_H_
    173