1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_MACRO_ASSEMBLER_H_ 6 #define V8_REGEXP_MACRO_ASSEMBLER_H_ 7 8 #include "src/ast.h" 9 10 namespace v8 { 11 namespace internal { 12 13 struct DisjunctDecisionRow { 14 RegExpCharacterClass cc; 15 Label* on_match; 16 }; 17 18 19 class RegExpMacroAssembler { 20 public: 21 // The implementation must be able to handle at least: 22 static const int kMaxRegister = (1 << 16) - 1; 23 static const int kMaxCPOffset = (1 << 15) - 1; 24 static const int kMinCPOffset = -(1 << 15); 25 26 static const int kTableSizeBits = 7; 27 static const int kTableSize = 1 << kTableSizeBits; 28 static const int kTableMask = kTableSize - 1; 29 30 enum IrregexpImplementation { 31 kIA32Implementation, 32 kARMImplementation, 33 kARM64Implementation, 34 kMIPSImplementation, 35 kX64Implementation, 36 kX87Implementation, 37 kBytecodeImplementation 38 }; 39 40 enum StackCheckFlag { 41 kNoStackLimitCheck = false, 42 kCheckStackLimit = true 43 }; 44 45 explicit RegExpMacroAssembler(Zone* zone); 46 virtual ~RegExpMacroAssembler(); 47 // The maximal number of pushes between stack checks. Users must supply 48 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck) 49 // at least once for every stack_limit() pushes that are executed. 50 virtual int stack_limit_slack() = 0; 51 virtual bool CanReadUnaligned() = 0; 52 virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change. 53 virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by. 54 // Continues execution from the position pushed on the top of the backtrack 55 // stack by an earlier PushBacktrack(Label*). 56 virtual void Backtrack() = 0; 57 virtual void Bind(Label* label) = 0; 58 virtual void CheckAtStart(Label* on_at_start) = 0; 59 // Dispatch after looking the current character up in a 2-bits-per-entry 60 // map. The destinations vector has up to 4 labels. 61 virtual void CheckCharacter(unsigned c, Label* on_equal) = 0; 62 // Bitwise and the current character with the given constant and then 63 // check for a match with c. 64 virtual void CheckCharacterAfterAnd(unsigned c, 65 unsigned and_with, 66 Label* on_equal) = 0; 67 virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0; 68 virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0; 69 virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0; 70 virtual void CheckNotAtStart(Label* on_not_at_start) = 0; 71 virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0; 72 virtual void CheckNotBackReferenceIgnoreCase(int start_reg, 73 Label* on_no_match) = 0; 74 // Check the current character for a match with a literal character. If we 75 // fail to match then goto the on_failure label. End of input always 76 // matches. If the label is NULL then we should pop a backtrack address off 77 // the stack and go to that. 78 virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0; 79 virtual void CheckNotCharacterAfterAnd(unsigned c, 80 unsigned and_with, 81 Label* on_not_equal) = 0; 82 // Subtract a constant from the current character, then and with the given 83 // constant and then check for a match with c. 84 virtual void CheckNotCharacterAfterMinusAnd(uc16 c, 85 uc16 minus, 86 uc16 and_with, 87 Label* on_not_equal) = 0; 88 virtual void CheckCharacterInRange(uc16 from, 89 uc16 to, // Both inclusive. 90 Label* on_in_range) = 0; 91 virtual void CheckCharacterNotInRange(uc16 from, 92 uc16 to, // Both inclusive. 93 Label* on_not_in_range) = 0; 94 95 // The current character (modulus the kTableSize) is looked up in the byte 96 // array, and if the found byte is non-zero, we jump to the on_bit_set label. 97 virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0; 98 99 // Checks whether the given offset from the current position is before 100 // the end of the string. May overwrite the current character. 101 virtual void CheckPosition(int cp_offset, Label* on_outside_input) { 102 LoadCurrentCharacter(cp_offset, on_outside_input, true); 103 } 104 // Check whether a standard/default character class matches the current 105 // character. Returns false if the type of special character class does 106 // not have custom support. 107 // May clobber the current loaded character. 108 virtual bool CheckSpecialCharacterClass(uc16 type, 109 Label* on_no_match) { 110 return false; 111 } 112 virtual void Fail() = 0; 113 virtual Handle<HeapObject> GetCode(Handle<String> source) = 0; 114 virtual void GoTo(Label* label) = 0; 115 // Check whether a register is >= a given constant and go to a label if it 116 // is. Backtracks instead if the label is NULL. 117 virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0; 118 // Check whether a register is < a given constant and go to a label if it is. 119 // Backtracks instead if the label is NULL. 120 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0; 121 // Check whether a register is == to the current position and go to a 122 // label if it is. 123 virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0; 124 virtual IrregexpImplementation Implementation() = 0; 125 virtual void LoadCurrentCharacter(int cp_offset, 126 Label* on_end_of_input, 127 bool check_bounds = true, 128 int characters = 1) = 0; 129 virtual void PopCurrentPosition() = 0; 130 virtual void PopRegister(int register_index) = 0; 131 // Pushes the label on the backtrack stack, so that a following Backtrack 132 // will go to this label. Always checks the backtrack stack limit. 133 virtual void PushBacktrack(Label* label) = 0; 134 virtual void PushCurrentPosition() = 0; 135 virtual void PushRegister(int register_index, 136 StackCheckFlag check_stack_limit) = 0; 137 virtual void ReadCurrentPositionFromRegister(int reg) = 0; 138 virtual void ReadStackPointerFromRegister(int reg) = 0; 139 virtual void SetCurrentPositionFromEnd(int by) = 0; 140 virtual void SetRegister(int register_index, int to) = 0; 141 // Return whether the matching (with a global regexp) will be restarted. 142 virtual bool Succeed() = 0; 143 virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0; 144 virtual void ClearRegisters(int reg_from, int reg_to) = 0; 145 virtual void WriteStackPointerToRegister(int reg) = 0; 146 147 // Controls the generation of large inlined constants in the code. 148 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; } 149 bool slow_safe() { return slow_safe_compiler_; } 150 151 enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK }; 152 // Set whether the regular expression has the global flag. Exiting due to 153 // a failure in a global regexp may still mean success overall. 154 inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; } 155 inline bool global() { return global_mode_ != NOT_GLOBAL; } 156 inline bool global_with_zero_length_check() { 157 return global_mode_ == GLOBAL; 158 } 159 160 Zone* zone() const { return zone_; } 161 162 private: 163 bool slow_safe_compiler_; 164 bool global_mode_; 165 Zone* zone_; 166 }; 167 168 169 #ifndef V8_INTERPRETED_REGEXP // Avoid compiling unused code. 170 171 class NativeRegExpMacroAssembler: public RegExpMacroAssembler { 172 public: 173 // Type of input string to generate code for. 174 enum Mode { LATIN1 = 1, UC16 = 2 }; 175 176 // Result of calling generated native RegExp code. 177 // RETRY: Something significant changed during execution, and the matching 178 // should be retried from scratch. 179 // EXCEPTION: Something failed during execution. If no exception has been 180 // thrown, it's an internal out-of-memory, and the caller should 181 // throw the exception. 182 // FAILURE: Matching failed. 183 // SUCCESS: Matching succeeded, and the output array has been filled with 184 // capture positions. 185 enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 }; 186 187 explicit NativeRegExpMacroAssembler(Zone* zone); 188 virtual ~NativeRegExpMacroAssembler(); 189 virtual bool CanReadUnaligned(); 190 191 static Result Match(Handle<Code> regexp, 192 Handle<String> subject, 193 int* offsets_vector, 194 int offsets_vector_length, 195 int previous_index, 196 Isolate* isolate); 197 198 // Compares two-byte strings case insensitively. 199 // Called from generated RegExp code. 200 static int CaseInsensitiveCompareUC16(Address byte_offset1, 201 Address byte_offset2, 202 size_t byte_length, 203 Isolate* isolate); 204 205 // Called from RegExp if the backtrack stack limit is hit. 206 // Tries to expand the stack. Returns the new stack-pointer if 207 // successful, and updates the stack_top address, or returns 0 if unable 208 // to grow the stack. 209 // This function must not trigger a garbage collection. 210 static Address GrowStack(Address stack_pointer, Address* stack_top, 211 Isolate* isolate); 212 213 static const byte* StringCharacterPosition(String* subject, int start_index); 214 215 // Byte map of one byte characters with a 0xff if the character is a word 216 // character (digit, letter or underscore) and 0x00 otherwise. 217 // Used by generated RegExp code. 218 static const byte word_character_map[256]; 219 220 static Address word_character_map_address() { 221 return const_cast<Address>(&word_character_map[0]); 222 } 223 224 static Result Execute(Code* code, 225 String* input, 226 int start_offset, 227 const byte* input_start, 228 const byte* input_end, 229 int* output, 230 int output_size, 231 Isolate* isolate); 232 }; 233 234 #endif // V8_INTERPRETED_REGEXP 235 236 } } // namespace v8::internal 237 238 #endif // V8_REGEXP_MACRO_ASSEMBLER_H_ 239