1 /********************************************************************** 2 regsyntax.c - Oniguruma (regular expression library) 3 **********************************************************************/ 4 /*- 5 * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include "regint.h" 31 32 OnigSyntaxType OnigSyntaxASIS = { 33 0 34 , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE 35 , 0 36 , ONIG_OPTION_NONE 37 , 38 { 39 (OnigCodePoint )'\\' /* esc */ 40 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 41 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 42 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 43 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 44 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 45 } 46 }; 47 48 OnigSyntaxType OnigSyntaxPosixBasic = { 49 ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | 50 ONIG_SYN_OP_ESC_BRACE_INTERVAL ) 51 , 0 52 , 0 53 , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) 54 , 55 { 56 (OnigCodePoint )'\\' /* esc */ 57 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 58 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 59 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 60 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 61 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 62 } 63 }; 64 65 OnigSyntaxType OnigSyntaxPosixExtended = { 66 ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | 67 ONIG_SYN_OP_BRACE_INTERVAL | 68 ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) 69 , 0 70 , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | 71 ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | 72 ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | 73 ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) 74 , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) 75 , 76 { 77 (OnigCodePoint )'\\' /* esc */ 78 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 79 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 80 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 81 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 82 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 83 } 84 }; 85 86 OnigSyntaxType OnigSyntaxEmacs = { 87 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | 88 ONIG_SYN_OP_ESC_BRACE_INTERVAL | 89 ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | 90 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | 91 ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | 92 ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) 93 , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR 94 , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC 95 , ONIG_OPTION_NONE 96 , 97 { 98 (OnigCodePoint )'\\' /* esc */ 99 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 100 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 101 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 102 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 103 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 104 } 105 }; 106 107 OnigSyntaxType OnigSyntaxGrep = { 108 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | 109 ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | 110 ONIG_SYN_OP_ESC_VBAR_ALT | 111 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | 112 ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | 113 ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | 114 ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) 115 , 0 116 , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) 117 , ONIG_OPTION_NONE 118 , 119 { 120 (OnigCodePoint )'\\' /* esc */ 121 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 122 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 123 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 124 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 125 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 126 } 127 }; 128 129 OnigSyntaxType OnigSyntaxGnuRegex = { 130 SYN_GNU_REGEX_OP 131 , 0 132 , SYN_GNU_REGEX_BV 133 , ONIG_OPTION_NONE 134 , 135 { 136 (OnigCodePoint )'\\' /* esc */ 137 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 138 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 139 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 140 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 141 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 142 } 143 }; 144 145 OnigSyntaxType OnigSyntaxJava = { 146 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | 147 ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | 148 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) 149 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) 150 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | 151 ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | 152 ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | 153 ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | 154 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) 155 , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) 156 , ONIG_OPTION_SINGLELINE 157 , 158 { 159 (OnigCodePoint )'\\' /* esc */ 160 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 161 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 162 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 163 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 164 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 165 } 166 }; 167 168 OnigSyntaxType OnigSyntaxPerl = { 169 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | 170 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | 171 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | 172 ONIG_SYN_OP_ESC_C_CONTROL ) 173 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) 174 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | 175 ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | 176 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | 177 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) 178 , SYN_GNU_REGEX_BV 179 , ONIG_OPTION_SINGLELINE 180 , 181 { 182 (OnigCodePoint )'\\' /* esc */ 183 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 184 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 185 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 186 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 187 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 188 } 189 }; 190 191 /* Perl + named group */ 192 OnigSyntaxType OnigSyntaxPerl_NG = { 193 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | 194 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | 195 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | 196 ONIG_SYN_OP_ESC_C_CONTROL ) 197 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) 198 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | 199 ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | 200 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | 201 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | 202 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | 203 ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | 204 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) 205 , ( SYN_GNU_REGEX_BV | 206 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | 207 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) 208 , ONIG_OPTION_SINGLELINE 209 , 210 { 211 (OnigCodePoint )'\\' /* esc */ 212 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 213 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 214 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 215 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 216 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 217 } 218 }; 219 220 221 222 extern int 223 onig_set_default_syntax(OnigSyntaxType* syntax) 224 { 225 if (IS_NULL(syntax)) 226 syntax = ONIG_SYNTAX_RUBY; 227 228 OnigDefaultSyntax = syntax; 229 return 0; 230 } 231 232 extern void 233 onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) 234 { 235 *to = *from; 236 } 237 238 extern void 239 onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) 240 { 241 syntax->op = op; 242 } 243 244 extern void 245 onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) 246 { 247 syntax->op2 = op2; 248 } 249 250 extern void 251 onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) 252 { 253 syntax->behavior = behavior; 254 } 255 256 extern void 257 onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) 258 { 259 syntax->options = options; 260 } 261 262 extern unsigned int 263 onig_get_syntax_op(OnigSyntaxType* syntax) 264 { 265 return syntax->op; 266 } 267 268 extern unsigned int 269 onig_get_syntax_op2(OnigSyntaxType* syntax) 270 { 271 return syntax->op2; 272 } 273 274 extern unsigned int 275 onig_get_syntax_behavior(OnigSyntaxType* syntax) 276 { 277 return syntax->behavior; 278 } 279 280 extern OnigOptionType 281 onig_get_syntax_options(OnigSyntaxType* syntax) 282 { 283 return syntax->options; 284 } 285 286 #ifdef USE_VARIABLE_META_CHARS 287 extern int onig_set_meta_char(OnigSyntaxType* enc, 288 unsigned int what, OnigCodePoint code) 289 { 290 switch (what) { 291 case ONIG_META_CHAR_ESCAPE: 292 enc->meta_char_table.esc = code; 293 break; 294 case ONIG_META_CHAR_ANYCHAR: 295 enc->meta_char_table.anychar = code; 296 break; 297 case ONIG_META_CHAR_ANYTIME: 298 enc->meta_char_table.anytime = code; 299 break; 300 case ONIG_META_CHAR_ZERO_OR_ONE_TIME: 301 enc->meta_char_table.zero_or_one_time = code; 302 break; 303 case ONIG_META_CHAR_ONE_OR_MORE_TIME: 304 enc->meta_char_table.one_or_more_time = code; 305 break; 306 case ONIG_META_CHAR_ANYCHAR_ANYTIME: 307 enc->meta_char_table.anychar_anytime = code; 308 break; 309 default: 310 return ONIGERR_INVALID_ARGUMENT; 311 break; 312 } 313 return 0; 314 } 315 #endif /* USE_VARIABLE_META_CHARS */ 316