Home | History | Annotate | Download | only in Oniguruma
      1 /**********************************************************************
      2   regsyntax.c -  Oniguruma (regular expression library)
      3 **********************************************************************/
      4 /*-
      5  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
      6  * All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 #include "regint.h"
     31 
     32 OnigSyntaxType OnigSyntaxASIS = {
     33     0
     34   , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
     35   , 0
     36   , ONIG_OPTION_NONE
     37   ,
     38   {
     39       (OnigCodePoint )'\\'                       /* esc */
     40     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
     41     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
     42     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
     43     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
     44     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
     45   }
     46 };
     47 
     48 OnigSyntaxType OnigSyntaxPosixBasic = {
     49   ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
     50     ONIG_SYN_OP_ESC_BRACE_INTERVAL )
     51   , 0
     52   , 0
     53   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
     54   ,
     55   {
     56       (OnigCodePoint )'\\'                       /* esc */
     57     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
     58     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
     59     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
     60     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
     61     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
     62   }
     63 };
     64 
     65 OnigSyntaxType OnigSyntaxPosixExtended = {
     66   ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
     67     ONIG_SYN_OP_BRACE_INTERVAL |
     68     ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
     69   , 0
     70   , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
     71       ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
     72       ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
     73       ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
     74   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
     75   ,
     76   {
     77       (OnigCodePoint )'\\'                       /* esc */
     78     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
     79     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
     80     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
     81     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
     82     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
     83   }
     84 };
     85 
     86 OnigSyntaxType OnigSyntaxEmacs = {
     87   ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
     88     ONIG_SYN_OP_ESC_BRACE_INTERVAL |
     89     ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
     90     ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
     91     ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
     92     ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
     93   , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
     94   , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
     95   , ONIG_OPTION_NONE
     96   ,
     97   {
     98       (OnigCodePoint )'\\'                       /* esc */
     99     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
    100     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
    101     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
    102     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
    103     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
    104   }
    105 };
    106 
    107 OnigSyntaxType OnigSyntaxGrep = {
    108   ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
    109     ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
    110     ONIG_SYN_OP_ESC_VBAR_ALT |
    111     ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
    112     ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
    113     ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
    114     ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
    115   , 0
    116   , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
    117   , ONIG_OPTION_NONE
    118   ,
    119   {
    120       (OnigCodePoint )'\\'                       /* esc */
    121     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
    122     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
    123     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
    124     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
    125     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
    126   }
    127 };
    128 
    129 OnigSyntaxType OnigSyntaxGnuRegex = {
    130   SYN_GNU_REGEX_OP
    131   , 0
    132   , SYN_GNU_REGEX_BV
    133   , ONIG_OPTION_NONE
    134   ,
    135   {
    136       (OnigCodePoint )'\\'                       /* esc */
    137     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
    138     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
    139     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
    140     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
    141     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
    142   }
    143 };
    144 
    145 OnigSyntaxType OnigSyntaxJava = {
    146   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
    147      ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
    148      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
    149    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
    150   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
    151       ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
    152       ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
    153       ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
    154       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
    155   , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
    156   , ONIG_OPTION_SINGLELINE
    157   ,
    158   {
    159       (OnigCodePoint )'\\'                       /* esc */
    160     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
    161     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
    162     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
    163     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
    164     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
    165   }
    166 };
    167 
    168 OnigSyntaxType OnigSyntaxPerl = {
    169   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
    170      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
    171      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
    172      ONIG_SYN_OP_ESC_C_CONTROL )
    173    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
    174   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
    175       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
    176       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
    177       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
    178   , SYN_GNU_REGEX_BV
    179   , ONIG_OPTION_SINGLELINE
    180   ,
    181   {
    182       (OnigCodePoint )'\\'                       /* esc */
    183     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
    184     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
    185     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
    186     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
    187     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
    188   }
    189 };
    190 
    191 /* Perl + named group */
    192 OnigSyntaxType OnigSyntaxPerl_NG = {
    193   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
    194      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
    195      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
    196      ONIG_SYN_OP_ESC_C_CONTROL )
    197    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
    198   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
    199       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
    200       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
    201       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
    202       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       |
    203       ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        |
    204       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
    205   , ( SYN_GNU_REGEX_BV |
    206       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
    207       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
    208   , ONIG_OPTION_SINGLELINE
    209   ,
    210   {
    211       (OnigCodePoint )'\\'                       /* esc */
    212     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
    213     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
    214     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
    215     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
    216     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
    217   }
    218 };
    219 
    220 
    221 
    222 extern int
    223 onig_set_default_syntax(OnigSyntaxType* syntax)
    224 {
    225   if (IS_NULL(syntax))
    226     syntax = ONIG_SYNTAX_RUBY;
    227 
    228   OnigDefaultSyntax = syntax;
    229   return 0;
    230 }
    231 
    232 extern void
    233 onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
    234 {
    235   *to = *from;
    236 }
    237 
    238 extern void
    239 onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
    240 {
    241   syntax->op = op;
    242 }
    243 
    244 extern void
    245 onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
    246 {
    247   syntax->op2 = op2;
    248 }
    249 
    250 extern void
    251 onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
    252 {
    253   syntax->behavior = behavior;
    254 }
    255 
    256 extern void
    257 onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
    258 {
    259   syntax->options = options;
    260 }
    261 
    262 extern unsigned int
    263 onig_get_syntax_op(OnigSyntaxType* syntax)
    264 {
    265   return syntax->op;
    266 }
    267 
    268 extern unsigned int
    269 onig_get_syntax_op2(OnigSyntaxType* syntax)
    270 {
    271   return syntax->op2;
    272 }
    273 
    274 extern unsigned int
    275 onig_get_syntax_behavior(OnigSyntaxType* syntax)
    276 {
    277   return syntax->behavior;
    278 }
    279 
    280 extern OnigOptionType
    281 onig_get_syntax_options(OnigSyntaxType* syntax)
    282 {
    283   return syntax->options;
    284 }
    285 
    286 #ifdef USE_VARIABLE_META_CHARS
    287 extern int onig_set_meta_char(OnigSyntaxType* enc,
    288                               unsigned int what, OnigCodePoint code)
    289 {
    290   switch (what) {
    291   case ONIG_META_CHAR_ESCAPE:
    292     enc->meta_char_table.esc = code;
    293     break;
    294   case ONIG_META_CHAR_ANYCHAR:
    295     enc->meta_char_table.anychar = code;
    296     break;
    297   case ONIG_META_CHAR_ANYTIME:
    298     enc->meta_char_table.anytime = code;
    299     break;
    300   case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
    301     enc->meta_char_table.zero_or_one_time = code;
    302     break;
    303   case ONIG_META_CHAR_ONE_OR_MORE_TIME:
    304     enc->meta_char_table.one_or_more_time = code;
    305     break;
    306   case ONIG_META_CHAR_ANYCHAR_ANYTIME:
    307     enc->meta_char_table.anychar_anytime = code;
    308     break;
    309   default:
    310     return ONIGERR_INVALID_ARGUMENT;
    311     break;
    312   }
    313   return 0;
    314 }
    315 #endif /* USE_VARIABLE_META_CHARS */
    316