Home | History | Annotate | Download | only in Oniguruma
      1 /**********************************************************************
      2   regparse.c -  Oniguruma (regular expression library)
      3 **********************************************************************/
      4 /*-
      5  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
      6  * All rights reserved.
      7  *
      8  * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include "regparse.h"
     33 #include "st.h"
     34 
     35 #define WARN_BUFSIZE    256
     36 
     37 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
     38 
     39 
     40 OnigSyntaxType OnigSyntaxRuby = {
     41   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
     42      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
     43      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
     44      ONIG_SYN_OP_ESC_C_CONTROL )
     45    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
     46   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
     47       ONIG_SYN_OP2_OPTION_RUBY |
     48       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
     49       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
     50       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
     51       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
     52       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
     53       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
     54       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
     55       ONIG_SYN_OP2_ESC_H_XDIGIT )
     56   , ( SYN_GNU_REGEX_BV |
     57       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
     58       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
     59       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
     60       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
     61       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
     62       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
     63       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
     64   , ONIG_OPTION_NONE
     65   ,
     66   {
     67       (OnigCodePoint )'\\'                       /* esc */
     68     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
     69     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
     70     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
     71     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
     72     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
     73   }
     74 };
     75 
     76 OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
     77 
     78 extern void onig_null_warn(const char* s ARG_UNUSED) { }
     79 
     80 #ifdef DEFAULT_WARN_FUNCTION
     81 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
     82 #else
     83 static OnigWarnFunc onig_warn = onig_null_warn;
     84 #endif
     85 
     86 #ifdef DEFAULT_VERB_WARN_FUNCTION
     87 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
     88 #else
     89 static OnigWarnFunc onig_verb_warn = onig_null_warn;
     90 #endif
     91 
     92 extern void onig_set_warn_func(OnigWarnFunc f)
     93 {
     94   onig_warn = f;
     95 }
     96 
     97 extern void onig_set_verb_warn_func(OnigWarnFunc f)
     98 {
     99   onig_verb_warn = f;
    100 }
    101 
    102 static void
    103 bbuf_free(BBuf* bbuf)
    104 {
    105   if (IS_NOT_NULL(bbuf)) {
    106     if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
    107     xfree(bbuf);
    108   }
    109 }
    110 
    111 static int
    112 bbuf_clone(BBuf** rto, BBuf* from)
    113 {
    114   int r;
    115   BBuf *to;
    116 
    117   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
    118   CHECK_NULL_RETURN_MEMERR(to);
    119   r = BBUF_INIT(to, from->alloc);
    120   if (r != 0) return r;
    121   to->used = from->used;
    122   xmemcpy(to->p, from->p, from->used);
    123   return 0;
    124 }
    125 
    126 #define BACKREF_REL_TO_ABS(rel_no, env) \
    127   ((env)->num_mem + 1 + (rel_no))
    128 
    129 #define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
    130 
    131 #define MBCODE_START_POS(enc) \
    132   (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
    133 
    134 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
    135   add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
    136 
    137 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
    138   if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
    139     r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
    140     if (r) return r;\
    141   }\
    142 } while (0)
    143 
    144 
    145 #define BITSET_IS_EMPTY(bs,empty) do {\
    146   int i;\
    147   empty = 1;\
    148   for (i = 0; i < (int )BITSET_SIZE; i++) {\
    149     if ((bs)[i] != 0) {\
    150       empty = 0; break;\
    151     }\
    152   }\
    153 } while (0)
    154 
    155 static void
    156 bitset_set_range(BitSetRef bs, int from, int to)
    157 {
    158   int i;
    159   for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
    160     BITSET_SET_BIT(bs, i);
    161   }
    162 }
    163 
    164 #if 0
    165 static void
    166 bitset_set_all(BitSetRef bs)
    167 {
    168   int i;
    169   for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
    170 }
    171 #endif
    172 
    173 static void
    174 bitset_invert(BitSetRef bs)
    175 {
    176   int i;
    177   for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
    178 }
    179 
    180 static void
    181 bitset_invert_to(BitSetRef from, BitSetRef to)
    182 {
    183   int i;
    184   for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
    185 }
    186 
    187 static void
    188 bitset_and(BitSetRef dest, BitSetRef bs)
    189 {
    190   int i;
    191   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
    192 }
    193 
    194 static void
    195 bitset_or(BitSetRef dest, BitSetRef bs)
    196 {
    197   int i;
    198   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
    199 }
    200 
    201 static void
    202 bitset_copy(BitSetRef dest, BitSetRef bs)
    203 {
    204   int i;
    205   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
    206 }
    207 
    208 extern int
    209 onig_strncmp(const UChar* s1, const UChar* s2, int n)
    210 {
    211   int x;
    212 
    213   while (n-- > 0) {
    214     x = *s2++ - *s1++;
    215     if (x) return x;
    216   }
    217   return 0;
    218 }
    219 
    220 extern void
    221 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
    222 {
    223   int len = (int)(end - src);
    224   if (len > 0) {
    225     xmemcpy(dest, src, len);
    226     dest[len] = (UChar )0;
    227   }
    228 }
    229 
    230 #ifdef USE_NAMED_GROUP
    231 static UChar*
    232 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
    233 {
    234   int slen, term_len, i;
    235   UChar *r;
    236 
    237   slen = (int)(end - s);
    238   term_len = ONIGENC_MBC_MINLEN(enc);
    239 
    240   r = (UChar* )xmalloc(slen + term_len);
    241   CHECK_NULL_RETURN(r);
    242   xmemcpy(r, s, slen);
    243 
    244   for (i = 0; i < term_len; i++)
    245     r[slen + i] = (UChar )0;
    246 
    247   return r;
    248 }
    249 #endif
    250 
    251 /* scan pattern methods */
    252 #define PEND_VALUE   0
    253 
    254 #define PFETCH_READY  UChar* pfetch_prev
    255 #define PEND         (p < end ?  0 : 1)
    256 #define PUNFETCH     p = pfetch_prev
    257 #define PINC       do { \
    258   pfetch_prev = p; \
    259   p += ONIGENC_MBC_ENC_LEN(enc, p); \
    260 } while (0)
    261 #define PFETCH(c)  do { \
    262   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
    263   pfetch_prev = p; \
    264   p += ONIGENC_MBC_ENC_LEN(enc, p); \
    265 } while (0)
    266 
    267 #define PINC_S     do { \
    268   p += ONIGENC_MBC_ENC_LEN(enc, p); \
    269 } while (0)
    270 #define PFETCH_S(c) do { \
    271   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
    272   p += ONIGENC_MBC_ENC_LEN(enc, p); \
    273 } while (0)
    274 
    275 #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
    276 #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
    277 
    278 static UChar*
    279 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
    280 	      int capa, int oldCapa)
    281 {
    282   UChar* r;
    283 
    284   if (dest)
    285     r = (UChar* )xrealloc(dest, capa + 1, oldCapa);
    286   else
    287     r = (UChar* )xmalloc(capa + 1);
    288 
    289   CHECK_NULL_RETURN(r);
    290   onig_strcpy(r + (dest_end - dest), src, src_end);
    291   return r;
    292 }
    293 
    294 /* dest on static area */
    295 static UChar*
    296 strcat_capa_from_static(UChar* dest, UChar* dest_end,
    297 			const UChar* src, const UChar* src_end, int capa)
    298 {
    299   UChar* r;
    300 
    301   r = (UChar* )xmalloc(capa + 1);
    302   CHECK_NULL_RETURN(r);
    303   onig_strcpy(r, dest, dest_end);
    304   onig_strcpy(r + (dest_end - dest), src, src_end);
    305   return r;
    306 }
    307 
    308 
    309 #ifdef USE_ST_LIBRARY
    310 
    311 typedef struct {
    312   UChar* s;
    313   UChar* end;
    314 } st_str_end_key;
    315 
    316 static int
    317 str_end_cmp(st_str_end_key* x, st_str_end_key* y)
    318 {
    319   UChar *p, *q;
    320   int c;
    321 
    322   if ((x->end - x->s) != (y->end - y->s))
    323     return 1;
    324 
    325   p = x->s;
    326   q = y->s;
    327   while (p < x->end) {
    328     c = (int )*p - (int )*q;
    329     if (c != 0) return c;
    330 
    331     p++; q++;
    332   }
    333 
    334   return 0;
    335 }
    336 
    337 static int
    338 str_end_hash(st_str_end_key* x)
    339 {
    340   UChar *p;
    341   int val = 0;
    342 
    343   p = x->s;
    344   while (p < x->end) {
    345     val = val * 997 + (int )*p++;
    346   }
    347 
    348   return val + (val >> 5);
    349 }
    350 
    351 extern hash_table_type*
    352 onig_st_init_strend_table_with_size(int size)
    353 {
    354   static struct st_hash_type hashType = {
    355     str_end_cmp,
    356     str_end_hash,
    357   };
    358 
    359   return (hash_table_type* )
    360            onig_st_init_table_with_size(&hashType, size);
    361 }
    362 
    363 extern int
    364 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
    365 		      const UChar* end_key, hash_data_type *value)
    366 {
    367   st_str_end_key key;
    368 
    369   key.s   = (UChar* )str_key;
    370   key.end = (UChar* )end_key;
    371 
    372   return onig_st_lookup(table, (st_data_t )(UINTN)(&key), value);
    373 }
    374 
    375 extern int
    376 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
    377 		      const UChar* end_key, hash_data_type value)
    378 {
    379   st_str_end_key* key;
    380   int result;
    381 
    382   key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
    383   CHECK_NULL_RETURN_MEMERR(key);
    384   key->s   = (UChar* )str_key;
    385   key->end = (UChar* )end_key;
    386   result = onig_st_insert(table, (st_data_t )(UINTN)key, value);
    387   if (result) {
    388     xfree(key);
    389   }
    390   return result;
    391 }
    392 
    393 #endif /* USE_ST_LIBRARY */
    394 
    395 
    396 #ifdef USE_NAMED_GROUP
    397 
    398 #define INIT_NAME_BACKREFS_ALLOC_NUM   8
    399 
    400 typedef struct {
    401   UChar* name;
    402   int    name_len;   /* byte length */
    403   int    back_num;   /* number of backrefs */
    404   int    back_alloc;
    405   int    back_ref1;
    406   int*   back_refs;
    407 } NameEntry;
    408 
    409 #ifdef USE_ST_LIBRARY
    410 
    411 typedef st_table  NameTable;
    412 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
    413 
    414 #define NAMEBUF_SIZE    24
    415 #define NAMEBUF_SIZE_1  25
    416 
    417 #ifdef ONIG_DEBUG
    418 static int
    419 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
    420 {
    421   int i;
    422   FILE* fp = (FILE* )arg;
    423 
    424   fprintf(fp, "%s: ", e->name);
    425   if (e->back_num == 0)
    426     fputs("-", fp);
    427   else if (e->back_num == 1)
    428     fprintf(fp, "%d", e->back_ref1);
    429   else {
    430     for (i = 0; i < e->back_num; i++) {
    431       if (i > 0) fprintf(fp, ", ");
    432       fprintf(fp, "%d", e->back_refs[i]);
    433     }
    434   }
    435   fputs("\n", fp);
    436   return ST_CONTINUE;
    437 }
    438 
    439 extern int
    440 onig_print_names(FILE* fp, regex_t* reg)
    441 {
    442   NameTable* t = (NameTable* )reg->name_table;
    443 
    444   if (IS_NOT_NULL(t)) {
    445     fprintf(fp, "name table\n");
    446     onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
    447     fputs("\n", fp);
    448   }
    449   return 0;
    450 }
    451 #endif /* ONIG_DEBUG */
    452 
    453 static int
    454 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
    455 {
    456   xfree(e->name);
    457   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
    458   xfree(key);
    459   xfree(e);
    460   return ST_DELETE;
    461 }
    462 
    463 static int
    464 names_clear(regex_t* reg)
    465 {
    466   NameTable* t = (NameTable* )reg->name_table;
    467 
    468   if (IS_NOT_NULL(t)) {
    469     onig_st_foreach(t, i_free_name_entry, 0);
    470   }
    471   return 0;
    472 }
    473 
    474 extern int
    475 onig_names_free(regex_t* reg)
    476 {
    477   int r;
    478   NameTable* t;
    479 
    480   r = names_clear(reg);
    481   if (r) return r;
    482 
    483   t = (NameTable* )reg->name_table;
    484   if (IS_NOT_NULL(t)) onig_st_free_table(t);
    485   reg->name_table = (void* )NULL;
    486   return 0;
    487 }
    488 
    489 static NameEntry*
    490 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
    491 {
    492   NameEntry* e;
    493   NameTable* t = (NameTable* )reg->name_table;
    494 
    495   e = (NameEntry* )NULL;
    496   if (IS_NOT_NULL(t)) {
    497     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
    498   }
    499   return e;
    500 }
    501 
    502 typedef struct {
    503   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
    504   regex_t* reg;
    505   void* arg;
    506   int ret;
    507   OnigEncoding enc;
    508 } INamesArg;
    509 
    510 static int
    511 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
    512 {
    513   int r = (*(arg->func))(e->name,
    514                          e->name + e->name_len,
    515                          e->back_num,
    516 			 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
    517 			 arg->reg, arg->arg);
    518   if (r != 0) {
    519     arg->ret = r;
    520     return ST_STOP;
    521   }
    522   return ST_CONTINUE;
    523 }
    524 
    525 extern int
    526 onig_foreach_name(regex_t* reg,
    527   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
    528 {
    529   INamesArg narg;
    530   NameTable* t = (NameTable* )reg->name_table;
    531 
    532   narg.ret = 0;
    533   if (IS_NOT_NULL(t)) {
    534     narg.func = func;
    535     narg.reg  = reg;
    536     narg.arg  = arg;
    537     narg.enc  = reg->enc; /* should be pattern encoding. */
    538     onig_st_foreach(t, i_names, (HashDataType )(UINTN)&narg);
    539   }
    540   return narg.ret;
    541 }
    542 
    543 static int
    544 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
    545 {
    546   int i;
    547 
    548   if (e->back_num > 1) {
    549     for (i = 0; i < e->back_num; i++) {
    550       e->back_refs[i] = map[e->back_refs[i]].new_val;
    551     }
    552   }
    553   else if (e->back_num == 1) {
    554     e->back_ref1 = map[e->back_ref1].new_val;
    555   }
    556 
    557   return ST_CONTINUE;
    558 }
    559 
    560 extern int
    561 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
    562 {
    563   NameTable* t = (NameTable* )reg->name_table;
    564 
    565   if (IS_NOT_NULL(t)) {
    566     onig_st_foreach(t, i_renumber_name, (HashDataType )(UINTN)map);
    567   }
    568   return 0;
    569 }
    570 
    571 
    572 extern int
    573 onig_number_of_names(regex_t* reg)
    574 {
    575   NameTable* t = (NameTable* )reg->name_table;
    576 
    577   if (IS_NOT_NULL(t))
    578     return t->num_entries;
    579   else
    580     return 0;
    581 }
    582 
    583 #else  /* USE_ST_LIBRARY */
    584 
    585 #define INIT_NAMES_ALLOC_NUM    8
    586 
    587 typedef struct {
    588   NameEntry* e;
    589   int        num;
    590   int        alloc;
    591 } NameTable;
    592 
    593 #ifdef ONIG_DEBUG
    594 extern int
    595 onig_print_names(FILE* fp, regex_t* reg)
    596 {
    597   int i, j;
    598   NameEntry* e;
    599   NameTable* t = (NameTable* )reg->name_table;
    600 
    601   if (IS_NOT_NULL(t) && t->num > 0) {
    602     fprintf(fp, "name table\n");
    603     for (i = 0; i < t->num; i++) {
    604       e = &(t->e[i]);
    605       fprintf(fp, "%s: ", e->name);
    606       if (e->back_num == 0) {
    607 	fputs("-", fp);
    608       }
    609       else if (e->back_num == 1) {
    610 	fprintf(fp, "%d", e->back_ref1);
    611       }
    612       else {
    613 	for (j = 0; j < e->back_num; j++) {
    614 	  if (j > 0) fprintf(fp, ", ");
    615 	  fprintf(fp, "%d", e->back_refs[j]);
    616 	}
    617       }
    618       fputs("\n", fp);
    619     }
    620     fputs("\n", fp);
    621   }
    622   return 0;
    623 }
    624 #endif
    625 
    626 static int
    627 names_clear(regex_t* reg)
    628 {
    629   int i;
    630   NameEntry* e;
    631   NameTable* t = (NameTable* )reg->name_table;
    632 
    633   if (IS_NOT_NULL(t)) {
    634     for (i = 0; i < t->num; i++) {
    635       e = &(t->e[i]);
    636       if (IS_NOT_NULL(e->name)) {
    637 	xfree(e->name);
    638 	e->name       = NULL;
    639 	e->name_len   = 0;
    640 	e->back_num   = 0;
    641 	e->back_alloc = 0;
    642 	if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
    643 	e->back_refs = (int* )NULL;
    644       }
    645     }
    646     if (IS_NOT_NULL(t->e)) {
    647       xfree(t->e);
    648       t->e = NULL;
    649     }
    650     t->num = 0;
    651   }
    652   return 0;
    653 }
    654 
    655 extern int
    656 onig_names_free(regex_t* reg)
    657 {
    658   int r;
    659   NameTable* t;
    660 
    661   r = names_clear(reg);
    662   if (r) return r;
    663 
    664   t = (NameTable* )reg->name_table;
    665   if (IS_NOT_NULL(t)) xfree(t);
    666   reg->name_table = NULL;
    667   return 0;
    668 }
    669 
    670 static NameEntry*
    671 name_find(regex_t* reg, UChar* name, UChar* name_end)
    672 {
    673   int i, len;
    674   NameEntry* e;
    675   NameTable* t = (NameTable* )reg->name_table;
    676 
    677   if (IS_NOT_NULL(t)) {
    678     len = name_end - name;
    679     for (i = 0; i < t->num; i++) {
    680       e = &(t->e[i]);
    681       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
    682 	return e;
    683     }
    684   }
    685   return (NameEntry* )NULL;
    686 }
    687 
    688 extern int
    689 onig_foreach_name(regex_t* reg,
    690   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
    691 {
    692   int i, r;
    693   NameEntry* e;
    694   NameTable* t = (NameTable* )reg->name_table;
    695 
    696   if (IS_NOT_NULL(t)) {
    697     for (i = 0; i < t->num; i++) {
    698       e = &(t->e[i]);
    699       r = (*func)(e->name, e->name + e->name_len, e->back_num,
    700 		  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
    701 		  reg, arg);
    702       if (r != 0) return r;
    703     }
    704   }
    705   return 0;
    706 }
    707 
    708 extern int
    709 onig_number_of_names(regex_t* reg)
    710 {
    711   NameTable* t = (NameTable* )reg->name_table;
    712 
    713   if (IS_NOT_NULL(t))
    714     return t->num;
    715   else
    716     return 0;
    717 }
    718 
    719 #endif /* else USE_ST_LIBRARY */
    720 
    721 static int
    722 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
    723 {
    724   int alloc;
    725   NameEntry* e;
    726   NameTable* t = (NameTable* )reg->name_table;
    727 
    728   if (name_end - name <= 0)
    729     return ONIGERR_EMPTY_GROUP_NAME;
    730 
    731   e = name_find(reg, name, name_end);
    732   if (IS_NULL(e)) {
    733 #ifdef USE_ST_LIBRARY
    734     if (IS_NULL(t)) {
    735       t = onig_st_init_strend_table_with_size(5);
    736       CHECK_NULL_RETURN_MEMERR(t);
    737       reg->name_table = (void* )t;
    738     }
    739     e = (NameEntry* )xmalloc(sizeof(NameEntry));
    740     CHECK_NULL_RETURN_MEMERR(e);
    741 
    742     e->name = strdup_with_null(reg->enc, name, name_end);
    743     if (IS_NULL(e->name)) {
    744       xfree(e);  return ONIGERR_MEMORY;
    745     }
    746     onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
    747                           (HashDataType )(UINTN)e);
    748 
    749     e->name_len   = (int)(name_end - name);
    750     e->back_num   = 0;
    751     e->back_alloc = 0;
    752     e->back_refs  = (int* )NULL;
    753 
    754 #else
    755 
    756     if (IS_NULL(t)) {
    757       alloc = INIT_NAMES_ALLOC_NUM;
    758       t = (NameTable* )xmalloc(sizeof(NameTable));
    759       CHECK_NULL_RETURN_MEMERR(t);
    760       t->e     = NULL;
    761       t->alloc = 0;
    762       t->num   = 0;
    763 
    764       t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
    765       if (IS_NULL(t->e)) {
    766 	xfree(t);
    767 	return ONIGERR_MEMORY;
    768       }
    769       t->alloc = alloc;
    770       reg->name_table = t;
    771       goto clear;
    772     }
    773     else if (t->num == t->alloc) {
    774       int i;
    775 
    776       alloc = t->alloc * 2;
    777       t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
    778       CHECK_NULL_RETURN_MEMERR(t->e);
    779       t->alloc = alloc;
    780 
    781     clear:
    782       for (i = t->num; i < t->alloc; i++) {
    783 	t->e[i].name       = NULL;
    784 	t->e[i].name_len   = 0;
    785 	t->e[i].back_num   = 0;
    786 	t->e[i].back_alloc = 0;
    787 	t->e[i].back_refs  = (int* )NULL;
    788       }
    789     }
    790     e = &(t->e[t->num]);
    791     t->num++;
    792     e->name = strdup_with_null(reg->enc, name, name_end);
    793     if (IS_NULL(e->name)) return ONIGERR_MEMORY;
    794     e->name_len = name_end - name;
    795 #endif
    796   }
    797 
    798   if (e->back_num >= 1 &&
    799       ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
    800     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
    801 				    name, name_end);
    802     return ONIGERR_MULTIPLEX_DEFINED_NAME;
    803   }
    804 
    805   e->back_num++;
    806   if (e->back_num == 1) {
    807     e->back_ref1 = backref;
    808   }
    809   else {
    810     if (e->back_num == 2) {
    811       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
    812       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
    813       CHECK_NULL_RETURN_MEMERR(e->back_refs);
    814       e->back_alloc = alloc;
    815       e->back_refs[0] = e->back_ref1;
    816       e->back_refs[1] = backref;
    817     }
    818     else {
    819       if (e->back_num > e->back_alloc) {
    820 	alloc = e->back_alloc * 2;
    821 	e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);
    822 	CHECK_NULL_RETURN_MEMERR(e->back_refs);
    823 	e->back_alloc = alloc;
    824       }
    825       e->back_refs[e->back_num - 1] = backref;
    826     }
    827   }
    828 
    829   return 0;
    830 }
    831 
    832 extern int
    833 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
    834 			   const UChar* name_end, int** nums)
    835 {
    836   NameEntry* e = name_find(reg, name, name_end);
    837 
    838   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
    839 
    840   switch (e->back_num) {
    841   case 0:
    842     break;
    843   case 1:
    844     *nums = &(e->back_ref1);
    845     break;
    846   default:
    847     *nums = e->back_refs;
    848     break;
    849   }
    850   return e->back_num;
    851 }
    852 
    853 extern int
    854 onig_name_to_backref_number(regex_t* reg, const UChar* name,
    855 			    const UChar* name_end, OnigRegion *region)
    856 {
    857   int i, n, *nums;
    858 
    859   n = onig_name_to_group_numbers(reg, name, name_end, &nums);
    860   if (n < 0)
    861     return n;
    862   else if (n == 0)
    863     return ONIGERR_PARSER_BUG;
    864   else if (n == 1)
    865     return nums[0];
    866   else {
    867     if (IS_NOT_NULL(region)) {
    868       for (i = n - 1; i >= 0; i--) {
    869 	if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
    870 	  return nums[i];
    871       }
    872     }
    873     return nums[n - 1];
    874   }
    875 }
    876 
    877 #else /* USE_NAMED_GROUP */
    878 
    879 extern int
    880 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
    881 			   const UChar* name_end, int** nums)
    882 {
    883   return ONIG_NO_SUPPORT_CONFIG;
    884 }
    885 
    886 extern int
    887 onig_name_to_backref_number(regex_t* reg, const UChar* name,
    888 			    const UChar* name_end, OnigRegion* region)
    889 {
    890   return ONIG_NO_SUPPORT_CONFIG;
    891 }
    892 
    893 extern int
    894 onig_foreach_name(regex_t* reg,
    895   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
    896 {
    897   return ONIG_NO_SUPPORT_CONFIG;
    898 }
    899 
    900 extern int
    901 onig_number_of_names(regex_t* reg)
    902 {
    903   return 0;
    904 }
    905 #endif /* else USE_NAMED_GROUP */
    906 
    907 extern int
    908 onig_noname_group_capture_is_active(regex_t* reg)
    909 {
    910   if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
    911     return 0;
    912 
    913 #ifdef USE_NAMED_GROUP
    914   if (onig_number_of_names(reg) > 0 &&
    915       IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
    916       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
    917     return 0;
    918   }
    919 #endif
    920 
    921   return 1;
    922 }
    923 
    924 
    925 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16
    926 
    927 static void
    928 scan_env_clear(ScanEnv* env)
    929 {
    930   int i;
    931 
    932   BIT_STATUS_CLEAR(env->capture_history);
    933   BIT_STATUS_CLEAR(env->bt_mem_start);
    934   BIT_STATUS_CLEAR(env->bt_mem_end);
    935   BIT_STATUS_CLEAR(env->backrefed_mem);
    936   env->error      = (UChar* )NULL;
    937   env->error_end  = (UChar* )NULL;
    938   env->num_call   = 0;
    939   env->num_mem    = 0;
    940 #ifdef USE_NAMED_GROUP
    941   env->num_named  = 0;
    942 #endif
    943   env->mem_alloc         = 0;
    944   env->mem_nodes_dynamic = (Node** )NULL;
    945 
    946   for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
    947     env->mem_nodes_static[i] = NULL_NODE;
    948 
    949 #ifdef USE_COMBINATION_EXPLOSION_CHECK
    950   env->num_comb_exp_check  = 0;
    951   env->comb_exp_max_regnum = 0;
    952   env->curr_max_regnum     = 0;
    953   env->has_recursion       = 0;
    954 #endif
    955 }
    956 
    957 static int
    958 scan_env_add_mem_entry(ScanEnv* env)
    959 {
    960   int i, need, alloc;
    961   Node** p;
    962 
    963   need = env->num_mem + 1;
    964   if (need >= SCANENV_MEMNODES_SIZE) {
    965     if (env->mem_alloc <= need) {
    966       if (IS_NULL(env->mem_nodes_dynamic)) {
    967 	alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
    968 	p = (Node** )xmalloc(sizeof(Node*) * alloc);
    969     CHECK_NULL_RETURN_MEMERR(p);
    970 
    971 	xmemcpy(p, env->mem_nodes_static,
    972 		sizeof(Node*) * SCANENV_MEMNODES_SIZE);
    973       }
    974       else {
    975 	alloc = env->mem_alloc * 2;
    976 	p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc, sizeof(Node*) * env->mem_alloc);
    977       }
    978       CHECK_NULL_RETURN_MEMERR(p);
    979 
    980       for (i = env->num_mem + 1; i < alloc; i++)
    981 	p[i] = NULL_NODE;
    982 
    983       env->mem_nodes_dynamic = p;
    984       env->mem_alloc = alloc;
    985     }
    986   }
    987 
    988   env->num_mem++;
    989   return env->num_mem;
    990 }
    991 
    992 static int
    993 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
    994 {
    995   if (env->num_mem >= num)
    996     SCANENV_MEM_NODES(env)[num] = node;
    997   else
    998     return ONIGERR_PARSER_BUG;
    999   return 0;
   1000 }
   1001 
   1002 
   1003 #ifdef USE_PARSE_TREE_NODE_RECYCLE
   1004 typedef struct _FreeNode {
   1005   struct _FreeNode* next;
   1006 } FreeNode;
   1007 
   1008 static FreeNode* FreeNodeList = (FreeNode* )NULL;
   1009 #endif
   1010 
   1011 extern void
   1012 onig_node_free(Node* node)
   1013 {
   1014  start:
   1015   if (IS_NULL(node)) return ;
   1016 
   1017   switch (NTYPE(node)) {
   1018   case NT_STR:
   1019     if (NSTR(node)->capa != 0 &&
   1020 	IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
   1021       xfree(NSTR(node)->s);
   1022     }
   1023     break;
   1024 
   1025   case NT_LIST:
   1026   case NT_ALT:
   1027     onig_node_free(NCAR(node));
   1028     {
   1029       Node* next_node = NCDR(node);
   1030 
   1031 #ifdef USE_PARSE_TREE_NODE_RECYCLE
   1032       {
   1033 	FreeNode* n = (FreeNode* )node;
   1034 
   1035         THREAD_ATOMIC_START;
   1036 	n->next = FreeNodeList;
   1037 	FreeNodeList = n;
   1038         THREAD_ATOMIC_END;
   1039       }
   1040 #else
   1041       xfree(node);
   1042 #endif
   1043       node = next_node;
   1044       goto start;
   1045     }
   1046     break;
   1047 
   1048   case NT_CCLASS:
   1049     {
   1050       CClassNode* cc = NCCLASS(node);
   1051 
   1052       if (IS_NCCLASS_SHARE(cc)) return ;
   1053       if (cc->mbuf)
   1054         bbuf_free(cc->mbuf);
   1055     }
   1056     break;
   1057 
   1058   case NT_QTFR:
   1059     if (NQTFR(node)->target)
   1060       onig_node_free(NQTFR(node)->target);
   1061     break;
   1062 
   1063   case NT_ENCLOSE:
   1064     if (NENCLOSE(node)->target)
   1065       onig_node_free(NENCLOSE(node)->target);
   1066     break;
   1067 
   1068   case NT_BREF:
   1069     if (IS_NOT_NULL(NBREF(node)->back_dynamic))
   1070       xfree(NBREF(node)->back_dynamic);
   1071     break;
   1072 
   1073   case NT_ANCHOR:
   1074     if (NANCHOR(node)->target)
   1075       onig_node_free(NANCHOR(node)->target);
   1076     break;
   1077   }
   1078 
   1079 #ifdef USE_PARSE_TREE_NODE_RECYCLE
   1080   {
   1081     FreeNode* n = (FreeNode* )node;
   1082 
   1083     THREAD_ATOMIC_START;
   1084     n->next = FreeNodeList;
   1085     FreeNodeList = n;
   1086     THREAD_ATOMIC_END;
   1087   }
   1088 #else
   1089   xfree(node);
   1090 #endif
   1091 }
   1092 
   1093 #ifdef USE_PARSE_TREE_NODE_RECYCLE
   1094 extern int
   1095 onig_free_node_list(void)
   1096 {
   1097   FreeNode* n;
   1098 
   1099   /* THREAD_ATOMIC_START; */
   1100   while (IS_NOT_NULL(FreeNodeList)) {
   1101     n = FreeNodeList;
   1102     FreeNodeList = FreeNodeList->next;
   1103     xfree(n);
   1104   }
   1105   /* THREAD_ATOMIC_END; */
   1106   return 0;
   1107 }
   1108 #endif
   1109 
   1110 static Node*
   1111 node_new(void)
   1112 {
   1113   Node* node;
   1114 
   1115 #ifdef USE_PARSE_TREE_NODE_RECYCLE
   1116   THREAD_ATOMIC_START;
   1117   if (IS_NOT_NULL(FreeNodeList)) {
   1118     node = (Node* )FreeNodeList;
   1119     FreeNodeList = FreeNodeList->next;
   1120     THREAD_ATOMIC_END;
   1121     return node;
   1122   }
   1123   THREAD_ATOMIC_END;
   1124 #endif
   1125 
   1126   node = (Node* )xmalloc(sizeof(Node));
   1127   /* xmemset(node, 0, sizeof(Node)); */
   1128   return node;
   1129 }
   1130 
   1131 
   1132 static void
   1133 initialize_cclass(CClassNode* cc)
   1134 {
   1135   BITSET_CLEAR(cc->bs);
   1136   /* cc->base.flags = 0; */
   1137   cc->flags = 0;
   1138   cc->mbuf  = NULL;
   1139 }
   1140 
   1141 static Node*
   1142 node_new_cclass(void)
   1143 {
   1144   Node* node = node_new();
   1145   CHECK_NULL_RETURN(node);
   1146 
   1147   SET_NTYPE(node, NT_CCLASS);
   1148   initialize_cclass(NCCLASS(node));
   1149   return node;
   1150 }
   1151 
   1152 static Node*
   1153 node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
   1154 				   const OnigCodePoint ranges[])
   1155 {
   1156   int n, i;
   1157   CClassNode* cc;
   1158   OnigCodePoint j;
   1159 
   1160   Node* node = node_new_cclass();
   1161   CHECK_NULL_RETURN(node);
   1162 
   1163   cc = NCCLASS(node);
   1164   if (not != 0) NCCLASS_SET_NOT(cc);
   1165 
   1166   BITSET_CLEAR(cc->bs);
   1167   if (sb_out > 0 && IS_NOT_NULL(ranges)) {
   1168     n = ONIGENC_CODE_RANGE_NUM(ranges);
   1169     for (i = 0; i < n; i++) {
   1170       for (j  = ONIGENC_CODE_RANGE_FROM(ranges, i);
   1171            j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
   1172 	if (j >= sb_out) goto sb_end;
   1173 
   1174         BITSET_SET_BIT(cc->bs, j);
   1175       }
   1176     }
   1177   }
   1178 
   1179  sb_end:
   1180   if (IS_NULL(ranges)) {
   1181   is_null:
   1182     cc->mbuf = NULL;
   1183   }
   1184   else {
   1185     BBuf* bbuf;
   1186 
   1187     n = ONIGENC_CODE_RANGE_NUM(ranges);
   1188     if (n == 0) goto is_null;
   1189 
   1190     bbuf = (BBuf* )xmalloc(sizeof(BBuf));
   1191     CHECK_NULL_RETURN(bbuf);
   1192     bbuf->alloc = n + 1;
   1193     bbuf->used  = n + 1;
   1194     bbuf->p     = (UChar* )((void* )ranges);
   1195 
   1196     cc->mbuf = bbuf;
   1197   }
   1198 
   1199   return node;
   1200 }
   1201 
   1202 static Node*
   1203 node_new_ctype(int type, int not)
   1204 {
   1205   Node* node = node_new();
   1206   CHECK_NULL_RETURN(node);
   1207 
   1208   SET_NTYPE(node, NT_CTYPE);
   1209   NCTYPE(node)->ctype = type;
   1210   NCTYPE(node)->not   = not;
   1211   return node;
   1212 }
   1213 
   1214 static Node*
   1215 node_new_anychar(void)
   1216 {
   1217   Node* node = node_new();
   1218   CHECK_NULL_RETURN(node);
   1219 
   1220   SET_NTYPE(node, NT_CANY);
   1221   return node;
   1222 }
   1223 
   1224 static Node*
   1225 node_new_list(Node* left, Node* right)
   1226 {
   1227   Node* node = node_new();
   1228   CHECK_NULL_RETURN(node);
   1229 
   1230   SET_NTYPE(node, NT_LIST);
   1231   NCAR(node)  = left;
   1232   NCDR(node) = right;
   1233   return node;
   1234 }
   1235 
   1236 extern Node*
   1237 onig_node_new_list(Node* left, Node* right)
   1238 {
   1239   return node_new_list(left, right);
   1240 }
   1241 
   1242 extern Node*
   1243 onig_node_list_add(Node* list, Node* x)
   1244 {
   1245   Node *n;
   1246 
   1247   n = onig_node_new_list(x, NULL);
   1248   if (IS_NULL(n)) return NULL_NODE;
   1249 
   1250   if (IS_NOT_NULL(list)) {
   1251     while (IS_NOT_NULL(NCDR(list)))
   1252       list = NCDR(list);
   1253 
   1254     NCDR(list) = n;
   1255   }
   1256 
   1257   return n;
   1258 }
   1259 
   1260 extern Node*
   1261 onig_node_new_alt(Node* left, Node* right)
   1262 {
   1263   Node* node = node_new();
   1264   CHECK_NULL_RETURN(node);
   1265 
   1266   SET_NTYPE(node, NT_ALT);
   1267   NCAR(node)  = left;
   1268   NCDR(node) = right;
   1269   return node;
   1270 }
   1271 
   1272 extern Node*
   1273 onig_node_new_anchor(int type)
   1274 {
   1275   Node* node = node_new();
   1276   CHECK_NULL_RETURN(node);
   1277 
   1278   SET_NTYPE(node, NT_ANCHOR);
   1279   NANCHOR(node)->type     = type;
   1280   NANCHOR(node)->target   = NULL;
   1281   NANCHOR(node)->char_len = -1;
   1282   return node;
   1283 }
   1284 
   1285 static Node*
   1286 node_new_backref(int back_num, int* backrefs, int by_name,
   1287 #ifdef USE_BACKREF_WITH_LEVEL
   1288 		 int exist_level, int nest_level,
   1289 #endif
   1290 		 ScanEnv* env)
   1291 {
   1292   int i;
   1293   Node* node = node_new();
   1294 
   1295   CHECK_NULL_RETURN(node);
   1296 
   1297   SET_NTYPE(node, NT_BREF);
   1298   NBREF(node)->state    = 0;
   1299   NBREF(node)->back_num = back_num;
   1300   NBREF(node)->back_dynamic = (int* )NULL;
   1301   if (by_name != 0)
   1302     NBREF(node)->state |= NST_NAME_REF;
   1303 
   1304 #ifdef USE_BACKREF_WITH_LEVEL
   1305   if (exist_level != 0) {
   1306     NBREF(node)->state |= NST_NEST_LEVEL;
   1307     NBREF(node)->nest_level  = nest_level;
   1308   }
   1309 #endif
   1310 
   1311   for (i = 0; i < back_num; i++) {
   1312     if (backrefs[i] <= env->num_mem &&
   1313 	IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
   1314       NBREF(node)->state |= NST_RECURSION;   /* /...(\1).../ */
   1315       break;
   1316     }
   1317   }
   1318 
   1319   if (back_num <= NODE_BACKREFS_SIZE) {
   1320     for (i = 0; i < back_num; i++)
   1321       NBREF(node)->back_static[i] = backrefs[i];
   1322   }
   1323   else {
   1324     int* p = (int* )xmalloc(sizeof(int) * back_num);
   1325     if (IS_NULL(p)) {
   1326       onig_node_free(node);
   1327       return NULL;
   1328     }
   1329     NBREF(node)->back_dynamic = p;
   1330     for (i = 0; i < back_num; i++)
   1331       p[i] = backrefs[i];
   1332   }
   1333   return node;
   1334 }
   1335 
   1336 #ifdef USE_SUBEXP_CALL
   1337 static Node*
   1338 node_new_call(UChar* name, UChar* name_end, int gnum)
   1339 {
   1340   Node* node = node_new();
   1341   CHECK_NULL_RETURN(node);
   1342 
   1343   SET_NTYPE(node, NT_CALL);
   1344   NCALL(node)->state     = 0;
   1345   NCALL(node)->target    = NULL_NODE;
   1346   NCALL(node)->name      = name;
   1347   NCALL(node)->name_end  = name_end;
   1348   NCALL(node)->group_num = gnum;  /* call by number if gnum != 0 */
   1349   return node;
   1350 }
   1351 #endif
   1352 
   1353 static Node*
   1354 node_new_quantifier(int lower, int upper, int by_number)
   1355 {
   1356   Node* node = node_new();
   1357   CHECK_NULL_RETURN(node);
   1358 
   1359   SET_NTYPE(node, NT_QTFR);
   1360   NQTFR(node)->state  = 0;
   1361   NQTFR(node)->target = NULL;
   1362   NQTFR(node)->lower  = lower;
   1363   NQTFR(node)->upper  = upper;
   1364   NQTFR(node)->greedy = 1;
   1365   NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
   1366   NQTFR(node)->head_exact        = NULL_NODE;
   1367   NQTFR(node)->next_head_exact   = NULL_NODE;
   1368   NQTFR(node)->is_refered        = 0;
   1369   if (by_number != 0)
   1370     NQTFR(node)->state |= NST_BY_NUMBER;
   1371 
   1372 #ifdef USE_COMBINATION_EXPLOSION_CHECK
   1373   NQTFR(node)->comb_exp_check_num = 0;
   1374 #endif
   1375 
   1376   return node;
   1377 }
   1378 
   1379 static Node*
   1380 node_new_enclose(int type)
   1381 {
   1382   Node* node = node_new();
   1383   CHECK_NULL_RETURN(node);
   1384 
   1385   SET_NTYPE(node, NT_ENCLOSE);
   1386   NENCLOSE(node)->type      = type;
   1387   NENCLOSE(node)->state     =  0;
   1388   NENCLOSE(node)->regnum    =  0;
   1389   NENCLOSE(node)->option    =  0;
   1390   NENCLOSE(node)->target    = NULL;
   1391   NENCLOSE(node)->call_addr = -1;
   1392   NENCLOSE(node)->opt_count =  0;
   1393   return node;
   1394 }
   1395 
   1396 extern Node*
   1397 onig_node_new_enclose(int type)
   1398 {
   1399   return node_new_enclose(type);
   1400 }
   1401 
   1402 static Node*
   1403 node_new_enclose_memory(OnigOptionType option, int is_named)
   1404 {
   1405   Node* node = node_new_enclose(ENCLOSE_MEMORY);
   1406   CHECK_NULL_RETURN(node);
   1407   if (is_named != 0)
   1408     SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
   1409 
   1410 #ifdef USE_SUBEXP_CALL
   1411   NENCLOSE(node)->option = option;
   1412 #endif
   1413   return node;
   1414 }
   1415 
   1416 static Node*
   1417 node_new_option(OnigOptionType option)
   1418 {
   1419   Node* node = node_new_enclose(ENCLOSE_OPTION);
   1420   CHECK_NULL_RETURN(node);
   1421   NENCLOSE(node)->option = option;
   1422   return node;
   1423 }
   1424 
   1425 extern int
   1426 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
   1427 {
   1428   int addlen = (int)(end - s);
   1429 
   1430   if (addlen > 0) {
   1431     int len  = (int)(NSTR(node)->end - NSTR(node)->s);
   1432 
   1433     if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
   1434       UChar* p;
   1435       int capa = len + addlen + NODE_STR_MARGIN;
   1436 
   1437       if (capa <= NSTR(node)->capa) {
   1438 	onig_strcpy(NSTR(node)->s + len, s, end);
   1439       }
   1440       else {
   1441 	if (NSTR(node)->s == NSTR(node)->buf)
   1442 	  p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
   1443 				      s, end, capa);
   1444 	else
   1445 	  p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa, NSTR(node)->capa);
   1446 
   1447 	CHECK_NULL_RETURN_MEMERR(p);
   1448 	NSTR(node)->s    = p;
   1449 	NSTR(node)->capa = capa;
   1450       }
   1451     }
   1452     else {
   1453       onig_strcpy(NSTR(node)->s + len, s, end);
   1454     }
   1455     NSTR(node)->end = NSTR(node)->s + len + addlen;
   1456   }
   1457 
   1458   return 0;
   1459 }
   1460 
   1461 extern int
   1462 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
   1463 {
   1464   onig_node_str_clear(node);
   1465   return onig_node_str_cat(node, s, end);
   1466 }
   1467 
   1468 static int
   1469 node_str_cat_char(Node* node, UChar c)
   1470 {
   1471   UChar s[1];
   1472 
   1473   s[0] = c;
   1474   return onig_node_str_cat(node, s, s + 1);
   1475 }
   1476 
   1477 extern void
   1478 onig_node_conv_to_str_node(Node* node, int flag)
   1479 {
   1480   SET_NTYPE(node, NT_STR);
   1481   NSTR(node)->flag = flag;
   1482   NSTR(node)->capa = 0;
   1483   NSTR(node)->s    = NSTR(node)->buf;
   1484   NSTR(node)->end  = NSTR(node)->buf;
   1485 }
   1486 
   1487 extern void
   1488 onig_node_str_clear(Node* node)
   1489 {
   1490   if (NSTR(node)->capa != 0 &&
   1491       IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
   1492     xfree(NSTR(node)->s);
   1493   }
   1494 
   1495   NSTR(node)->capa = 0;
   1496   NSTR(node)->flag = 0;
   1497   NSTR(node)->s    = NSTR(node)->buf;
   1498   NSTR(node)->end  = NSTR(node)->buf;
   1499 }
   1500 
   1501 static Node*
   1502 node_new_str(const UChar* s, const UChar* end)
   1503 {
   1504   Node* node = node_new();
   1505   CHECK_NULL_RETURN(node);
   1506 
   1507   SET_NTYPE(node, NT_STR);
   1508   NSTR(node)->capa = 0;
   1509   NSTR(node)->flag = 0;
   1510   NSTR(node)->s    = NSTR(node)->buf;
   1511   NSTR(node)->end  = NSTR(node)->buf;
   1512   if (onig_node_str_cat(node, s, end)) {
   1513     onig_node_free(node);
   1514     return NULL;
   1515   }
   1516   return node;
   1517 }
   1518 
   1519 extern Node*
   1520 onig_node_new_str(const UChar* s, const UChar* end)
   1521 {
   1522   return node_new_str(s, end);
   1523 }
   1524 
   1525 static Node*
   1526 node_new_str_raw(UChar* s, UChar* end)
   1527 {
   1528   Node* node = node_new_str(s, end);
   1529   CHECK_NULL_RETURN(node);
   1530   NSTRING_SET_RAW(node);
   1531   return node;
   1532 }
   1533 
   1534 static Node*
   1535 node_new_empty(void)
   1536 {
   1537   return node_new_str(NULL, NULL);
   1538 }
   1539 
   1540 static Node*
   1541 node_new_str_raw_char(UChar c)
   1542 {
   1543   UChar p[1];
   1544 
   1545   p[0] = c;
   1546   return node_new_str_raw(p, p + 1);
   1547 }
   1548 
   1549 static Node*
   1550 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
   1551 {
   1552   const UChar *p;
   1553   Node* n = NULL_NODE;
   1554 
   1555   if (sn->end > sn->s) {
   1556     p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
   1557     if (p && p > sn->s) { /* can be splitted. */
   1558       n = node_new_str(p, sn->end);
   1559       CHECK_NULL_RETURN(n);
   1560       if ((sn->flag & NSTR_RAW) != 0)
   1561 	NSTRING_SET_RAW(n);
   1562       sn->end = (UChar* )p;
   1563     }
   1564   }
   1565   return n;
   1566 }
   1567 
   1568 static int
   1569 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
   1570 {
   1571   if (sn->end > sn->s) {
   1572     return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
   1573   }
   1574   return 0;
   1575 }
   1576 
   1577 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
   1578 static int
   1579 node_str_head_pad(StrNode* sn, int num, UChar val)
   1580 {
   1581   UChar buf[NODE_STR_BUF_SIZE];
   1582   int i, len;
   1583 
   1584   len = sn->end - sn->s;
   1585   onig_strcpy(buf, sn->s, sn->end);
   1586   onig_strcpy(&(sn->s[num]), buf, buf + len);
   1587   sn->end += num;
   1588 
   1589   for (i = 0; i < num; i++) {
   1590     sn->s[i] = val;
   1591   }
   1592 }
   1593 #endif
   1594 
   1595 extern int
   1596 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
   1597 {
   1598   unsigned int num, val;
   1599   OnigCodePoint c;
   1600   UChar* p = *src;
   1601   PFETCH_READY;
   1602 
   1603   num = 0;
   1604   while (!PEND) {
   1605     PFETCH(c);
   1606     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
   1607       val = (unsigned int )DIGITVAL(c);
   1608       if ((INT_MAX_LIMIT - val) / 10UL < num)
   1609 	return -1;  /* overflow */
   1610 
   1611       num = num * 10 + val;
   1612     }
   1613     else {
   1614       PUNFETCH;
   1615       break;
   1616     }
   1617   }
   1618   *src = p;
   1619   return num;
   1620 }
   1621 
   1622 static int
   1623 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
   1624 				 OnigEncoding enc)
   1625 {
   1626   OnigCodePoint c;
   1627   unsigned int num, val;
   1628   UChar* p = *src;
   1629   PFETCH_READY;
   1630 
   1631   num = 0;
   1632   while (!PEND && maxlen-- != 0) {
   1633     PFETCH(c);
   1634     if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
   1635       val = (unsigned int )XDIGITVAL(enc,c);
   1636       if ((INT_MAX_LIMIT - val) / 16UL < num)
   1637 	return -1;  /* overflow */
   1638 
   1639       num = (num << 4) + XDIGITVAL(enc,c);
   1640     }
   1641     else {
   1642       PUNFETCH;
   1643       break;
   1644     }
   1645   }
   1646   *src = p;
   1647   return num;
   1648 }
   1649 
   1650 static int
   1651 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
   1652 			   OnigEncoding enc)
   1653 {
   1654   OnigCodePoint c;
   1655   unsigned int num, val;
   1656   UChar* p = *src;
   1657   PFETCH_READY;
   1658 
   1659   num = 0;
   1660   while (!PEND && maxlen-- != 0) {
   1661     PFETCH(c);
   1662     if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
   1663       val = ODIGITVAL(c);
   1664       if ((INT_MAX_LIMIT - val) / 8UL < num)
   1665 	return -1;  /* overflow */
   1666 
   1667       num = (num << 3) + val;
   1668     }
   1669     else {
   1670       PUNFETCH;
   1671       break;
   1672     }
   1673   }
   1674   *src = p;
   1675   return num;
   1676 }
   1677 
   1678 
   1679 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
   1680     BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
   1681 
   1682 /* data format:
   1683      [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
   1684      (all data size is OnigCodePoint)
   1685  */
   1686 static int
   1687 new_code_range(BBuf** pbuf)
   1688 {
   1689 #define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
   1690   int r;
   1691   OnigCodePoint n;
   1692   BBuf* bbuf;
   1693 
   1694   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
   1695   CHECK_NULL_RETURN_MEMERR(*pbuf);
   1696   r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
   1697   if (r) return r;
   1698 
   1699   n = 0;
   1700   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
   1701   return 0;
   1702 }
   1703 
   1704 static int
   1705 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
   1706 {
   1707   int r, inc_n, pos;
   1708   int low, high, bound, x;
   1709   OnigCodePoint n, *data;
   1710   BBuf* bbuf;
   1711 
   1712   if (from > to) {
   1713     n = from; from = to; to = n;
   1714   }
   1715 
   1716   if (IS_NULL(*pbuf)) {
   1717     r = new_code_range(pbuf);
   1718     if (r) return r;
   1719     bbuf = *pbuf;
   1720     n = 0;
   1721   }
   1722   else {
   1723     bbuf = *pbuf;
   1724     GET_CODE_POINT(n, bbuf->p);
   1725   }
   1726   data = (OnigCodePoint* )(bbuf->p);
   1727   data++;
   1728 
   1729   for (low = 0, bound = n; low < bound; ) {
   1730     x = (low + bound) >> 1;
   1731     if (from > data[x*2 + 1])
   1732       low = x + 1;
   1733     else
   1734       bound = x;
   1735   }
   1736 
   1737   for (high = low, bound = n; high < bound; ) {
   1738     x = (high + bound) >> 1;
   1739     if (to >= data[x*2] - 1)
   1740       high = x + 1;
   1741     else
   1742       bound = x;
   1743   }
   1744 
   1745   inc_n = low + 1 - high;
   1746   if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
   1747     return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
   1748 
   1749   if (inc_n != 1) {
   1750     if (from > data[low*2])
   1751       from = data[low*2];
   1752     if (to < data[(high - 1)*2 + 1])
   1753       to = data[(high - 1)*2 + 1];
   1754   }
   1755 
   1756   if (inc_n != 0 && (OnigCodePoint )high < n) {
   1757     int from_pos = SIZE_CODE_POINT * (1 + high * 2);
   1758     int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
   1759     int size = (n - high) * 2 * SIZE_CODE_POINT;
   1760 
   1761     if (inc_n > 0) {
   1762       BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
   1763     }
   1764     else {
   1765       BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
   1766     }
   1767   }
   1768 
   1769   pos = SIZE_CODE_POINT * (1 + low * 2);
   1770   BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
   1771   BBUF_WRITE_CODE_POINT(bbuf, pos, from);
   1772   BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
   1773   n += inc_n;
   1774   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
   1775 
   1776   return 0;
   1777 }
   1778 
   1779 static int
   1780 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
   1781 {
   1782   if (from > to) {
   1783     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
   1784       return 0;
   1785     else
   1786       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
   1787   }
   1788 
   1789   return add_code_range_to_buf(pbuf, from, to);
   1790 }
   1791 
   1792 static int
   1793 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
   1794 {
   1795   int r, i, n;
   1796   OnigCodePoint pre, from, *data, to = 0;
   1797 
   1798   *pbuf = (BBuf* )NULL;
   1799   if (IS_NULL(bbuf)) {
   1800   set_all:
   1801     return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
   1802   }
   1803 
   1804   data = (OnigCodePoint* )(bbuf->p);
   1805   GET_CODE_POINT(n, data);
   1806   data++;
   1807   if (n <= 0) goto set_all;
   1808 
   1809   r = 0;
   1810   pre = MBCODE_START_POS(enc);
   1811   for (i = 0; i < n; i++) {
   1812     from = data[i*2];
   1813     to   = data[i*2+1];
   1814     if (pre <= from - 1) {
   1815       r = add_code_range_to_buf(pbuf, pre, from - 1);
   1816       if (r != 0) return r;
   1817     }
   1818     if (to == ~((OnigCodePoint )0)) break;
   1819     pre = to + 1;
   1820   }
   1821   if (to < ~((OnigCodePoint )0)) {
   1822     r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
   1823   }
   1824   return r;
   1825 }
   1826 
   1827 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
   1828   BBuf *tbuf; \
   1829   int  tnot; \
   1830   tnot = not1;  not1  = not2;  not2  = tnot; \
   1831   tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
   1832 } while (0)
   1833 
   1834 static int
   1835 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
   1836                   BBuf* bbuf2, int not2, BBuf** pbuf)
   1837 {
   1838   int r;
   1839   OnigCodePoint i, n1, *data1;
   1840   OnigCodePoint from, to;
   1841 
   1842   *pbuf = (BBuf* )NULL;
   1843   if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
   1844     if (not1 != 0 || not2 != 0)
   1845       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
   1846     return 0;
   1847   }
   1848 
   1849   r = 0;
   1850   if (IS_NULL(bbuf2))
   1851     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
   1852 
   1853   if (IS_NULL(bbuf1)) {
   1854     if (not1 != 0) {
   1855       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
   1856     }
   1857     else {
   1858       if (not2 == 0) {
   1859 	return bbuf_clone(pbuf, bbuf2);
   1860       }
   1861       else {
   1862 	return not_code_range_buf(enc, bbuf2, pbuf);
   1863       }
   1864     }
   1865   }
   1866 
   1867   if (not1 != 0)
   1868     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
   1869 
   1870   data1 = (OnigCodePoint* )(bbuf1->p);
   1871   GET_CODE_POINT(n1, data1);
   1872   data1++;
   1873 
   1874   if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
   1875     r = bbuf_clone(pbuf, bbuf2);
   1876   }
   1877   else if (not1 == 0) { /* 1 OR (not 2) */
   1878     r = not_code_range_buf(enc, bbuf2, pbuf);
   1879   }
   1880   if (r != 0) return r;
   1881 
   1882   for (i = 0; i < n1; i++) {
   1883     from = data1[i*2];
   1884     to   = data1[i*2+1];
   1885     r = add_code_range_to_buf(pbuf, from, to);
   1886     if (r != 0) return r;
   1887   }
   1888   return 0;
   1889 }
   1890 
   1891 static int
   1892 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
   1893 	        OnigCodePoint* data, int n)
   1894 {
   1895   int i, r;
   1896   OnigCodePoint from2, to2;
   1897 
   1898   for (i = 0; i < n; i++) {
   1899     from2 = data[i*2];
   1900     to2   = data[i*2+1];
   1901     if (from2 < from1) {
   1902       if (to2 < from1) continue;
   1903       else {
   1904 	from1 = to2 + 1;
   1905       }
   1906     }
   1907     else if (from2 <= to1) {
   1908       if (to2 < to1) {
   1909 	if (from1 <= from2 - 1) {
   1910 	  r = add_code_range_to_buf(pbuf, from1, from2-1);
   1911 	  if (r != 0) return r;
   1912 	}
   1913 	from1 = to2 + 1;
   1914       }
   1915       else {
   1916 	to1 = from2 - 1;
   1917       }
   1918     }
   1919     else {
   1920       from1 = from2;
   1921     }
   1922     if (from1 > to1) break;
   1923   }
   1924   if (from1 <= to1) {
   1925     r = add_code_range_to_buf(pbuf, from1, to1);
   1926     if (r != 0) return r;
   1927   }
   1928   return 0;
   1929 }
   1930 
   1931 static int
   1932 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
   1933 {
   1934   int r;
   1935   OnigCodePoint i, j, n1, n2, *data1, *data2;
   1936   OnigCodePoint from, to, from1, to1, from2, to2;
   1937 
   1938   *pbuf = (BBuf* )NULL;
   1939   if (IS_NULL(bbuf1)) {
   1940     if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
   1941       return bbuf_clone(pbuf, bbuf2);
   1942     return 0;
   1943   }
   1944   else if (IS_NULL(bbuf2)) {
   1945     if (not2 != 0)
   1946       return bbuf_clone(pbuf, bbuf1);
   1947     return 0;
   1948   }
   1949 
   1950   if (not1 != 0)
   1951     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
   1952 
   1953   data1 = (OnigCodePoint* )(bbuf1->p);
   1954   data2 = (OnigCodePoint* )(bbuf2->p);
   1955   GET_CODE_POINT(n1, data1);
   1956   GET_CODE_POINT(n2, data2);
   1957   data1++;
   1958   data2++;
   1959 
   1960   if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
   1961     for (i = 0; i < n1; i++) {
   1962       from1 = data1[i*2];
   1963       to1   = data1[i*2+1];
   1964       for (j = 0; j < n2; j++) {
   1965 	from2 = data2[j*2];
   1966 	to2   = data2[j*2+1];
   1967 	if (from2 > to1) break;
   1968 	if (to2 < from1) continue;
   1969 	from = MAX(from1, from2);
   1970 	to   = MIN(to1, to2);
   1971 	r = add_code_range_to_buf(pbuf, from, to);
   1972 	if (r != 0) return r;
   1973       }
   1974     }
   1975   }
   1976   else if (not1 == 0) { /* 1 AND (not 2) */
   1977     for (i = 0; i < n1; i++) {
   1978       from1 = data1[i*2];
   1979       to1   = data1[i*2+1];
   1980       r = and_code_range1(pbuf, from1, to1, data2, n2);
   1981       if (r != 0) return r;
   1982     }
   1983   }
   1984 
   1985   return 0;
   1986 }
   1987 
   1988 static int
   1989 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
   1990 {
   1991   int r, not1, not2;
   1992   BBuf *buf1, *buf2, *pbuf;
   1993   BitSetRef bsr1, bsr2;
   1994   BitSet bs1, bs2;
   1995 
   1996   not1 = IS_NCCLASS_NOT(dest);
   1997   bsr1 = dest->bs;
   1998   buf1 = dest->mbuf;
   1999   not2 = IS_NCCLASS_NOT(cc);
   2000   bsr2 = cc->bs;
   2001   buf2 = cc->mbuf;
   2002 
   2003   if (not1 != 0) {
   2004     bitset_invert_to(bsr1, bs1);
   2005     bsr1 = bs1;
   2006   }
   2007   if (not2 != 0) {
   2008     bitset_invert_to(bsr2, bs2);
   2009     bsr2 = bs2;
   2010   }
   2011   bitset_and(bsr1, bsr2);
   2012   if (bsr1 != dest->bs) {
   2013     bitset_copy(dest->bs, bsr1);
   2014     bsr1 = dest->bs;
   2015   }
   2016   if (not1 != 0) {
   2017     bitset_invert(dest->bs);
   2018   }
   2019 
   2020   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
   2021     if (not1 != 0 && not2 != 0) {
   2022       r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
   2023     }
   2024     else {
   2025       r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
   2026       if (r == 0 && not1 != 0) {
   2027 	BBuf *tbuf;
   2028 	r = not_code_range_buf(enc, pbuf, &tbuf);
   2029 	if (r != 0) {
   2030 	  bbuf_free(pbuf);
   2031 	  return r;
   2032 	}
   2033 	bbuf_free(pbuf);
   2034 	pbuf = tbuf;
   2035       }
   2036     }
   2037     if (r != 0) return r;
   2038 
   2039     dest->mbuf = pbuf;
   2040     bbuf_free(buf1);
   2041     return r;
   2042   }
   2043   return 0;
   2044 }
   2045 
   2046 static int
   2047 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
   2048 {
   2049   int r, not1, not2;
   2050   BBuf *buf1, *buf2, *pbuf;
   2051   BitSetRef bsr1, bsr2;
   2052   BitSet bs1, bs2;
   2053 
   2054   not1 = IS_NCCLASS_NOT(dest);
   2055   bsr1 = dest->bs;
   2056   buf1 = dest->mbuf;
   2057   not2 = IS_NCCLASS_NOT(cc);
   2058   bsr2 = cc->bs;
   2059   buf2 = cc->mbuf;
   2060 
   2061   if (not1 != 0) {
   2062     bitset_invert_to(bsr1, bs1);
   2063     bsr1 = bs1;
   2064   }
   2065   if (not2 != 0) {
   2066     bitset_invert_to(bsr2, bs2);
   2067     bsr2 = bs2;
   2068   }
   2069   bitset_or(bsr1, bsr2);
   2070   if (bsr1 != dest->bs) {
   2071     bitset_copy(dest->bs, bsr1);
   2072     bsr1 = dest->bs;
   2073   }
   2074   if (not1 != 0) {
   2075     bitset_invert(dest->bs);
   2076   }
   2077 
   2078   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
   2079     if (not1 != 0 && not2 != 0) {
   2080       r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
   2081     }
   2082     else {
   2083       r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
   2084       if (r == 0 && not1 != 0) {
   2085 	BBuf *tbuf;
   2086 	r = not_code_range_buf(enc, pbuf, &tbuf);
   2087 	if (r != 0) {
   2088 	  bbuf_free(pbuf);
   2089 	  return r;
   2090 	}
   2091 	bbuf_free(pbuf);
   2092 	pbuf = tbuf;
   2093       }
   2094     }
   2095     if (r != 0) return r;
   2096 
   2097     dest->mbuf = pbuf;
   2098     bbuf_free(buf1);
   2099     return r;
   2100   }
   2101   else
   2102     return 0;
   2103 }
   2104 
   2105 static int
   2106 conv_backslash_value(int c, ScanEnv* env)
   2107 {
   2108   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
   2109     switch (c) {
   2110     case 'n': return '\n';
   2111     case 't': return '\t';
   2112     case 'r': return '\r';
   2113     case 'f': return '\f';
   2114     case 'a': return '\007';
   2115     case 'b': return '\010';
   2116     case 'e': return '\033';
   2117     case 'v':
   2118       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
   2119 	return '\v';
   2120       break;
   2121 
   2122     default:
   2123       break;
   2124     }
   2125   }
   2126   return c;
   2127 }
   2128 
   2129 static int
   2130 is_invalid_quantifier_target(Node* node)
   2131 {
   2132   switch (NTYPE(node)) {
   2133   case NT_ANCHOR:
   2134     return 1;
   2135     break;
   2136 
   2137   case NT_ENCLOSE:
   2138     /* allow enclosed elements */
   2139     /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
   2140     break;
   2141 
   2142   case NT_LIST:
   2143     do {
   2144       if (! is_invalid_quantifier_target(NCAR(node))) return 0;
   2145     } while (IS_NOT_NULL(node = NCDR(node)));
   2146     return 0;
   2147     break;
   2148 
   2149   case NT_ALT:
   2150     do {
   2151       if (is_invalid_quantifier_target(NCAR(node))) return 1;
   2152     } while (IS_NOT_NULL(node = NCDR(node)));
   2153     break;
   2154 
   2155   default:
   2156     break;
   2157   }
   2158   return 0;
   2159 }
   2160 
   2161 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
   2162 static int
   2163 popular_quantifier_num(QtfrNode* q)
   2164 {
   2165   if (q->greedy) {
   2166     if (q->lower == 0) {
   2167       if (q->upper == 1) return 0;
   2168       else if (IS_REPEAT_INFINITE(q->upper)) return 1;
   2169     }
   2170     else if (q->lower == 1) {
   2171       if (IS_REPEAT_INFINITE(q->upper)) return 2;
   2172     }
   2173   }
   2174   else {
   2175     if (q->lower == 0) {
   2176       if (q->upper == 1) return 3;
   2177       else if (IS_REPEAT_INFINITE(q->upper)) return 4;
   2178     }
   2179     else if (q->lower == 1) {
   2180       if (IS_REPEAT_INFINITE(q->upper)) return 5;
   2181     }
   2182   }
   2183   return -1;
   2184 }
   2185 
   2186 
   2187 enum ReduceType {
   2188   RQ_ASIS = 0, /* as is */
   2189   RQ_DEL  = 1, /* delete parent */
   2190   RQ_A,        /* to '*'    */
   2191   RQ_AQ,       /* to '*?'   */
   2192   RQ_QQ,       /* to '??'   */
   2193   RQ_P_QQ,     /* to '+)??' */
   2194   RQ_PQ_Q      /* to '+?)?' */
   2195 };
   2196 
   2197 static enum ReduceType ReduceTypeTable[6][6] = {
   2198   {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
   2199   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
   2200   {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
   2201   {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
   2202   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
   2203   {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
   2204 };
   2205 
   2206 extern void
   2207 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
   2208 {
   2209   int pnum, cnum;
   2210   QtfrNode *p, *c;
   2211 
   2212   p = NQTFR(pnode);
   2213   c = NQTFR(cnode);
   2214   pnum = popular_quantifier_num(p);
   2215   cnum = popular_quantifier_num(c);
   2216   if (pnum < 0 || cnum < 0) return ;
   2217 
   2218   switch(ReduceTypeTable[cnum][pnum]) {
   2219   case RQ_DEL:
   2220     CopyMem (pnode, cnode, sizeof (Node));
   2221     break;
   2222   case RQ_A:
   2223     p->target = c->target;
   2224     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;
   2225     break;
   2226   case RQ_AQ:
   2227     p->target = c->target;
   2228     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;
   2229     break;
   2230   case RQ_QQ:
   2231     p->target = c->target;
   2232     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
   2233     break;
   2234   case RQ_P_QQ:
   2235     p->target = cnode;
   2236     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
   2237     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;
   2238     return ;
   2239     break;
   2240   case RQ_PQ_Q:
   2241     p->target = cnode;
   2242     p->lower  = 0;  p->upper = 1;  p->greedy = 1;
   2243     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;
   2244     return ;
   2245     break;
   2246   case RQ_ASIS:
   2247     p->target = cnode;
   2248     return ;
   2249     break;
   2250   }
   2251 
   2252   c->target = NULL_NODE;
   2253   onig_node_free(cnode);
   2254 }
   2255 
   2256 
   2257 enum TokenSyms {
   2258   TK_EOT      = 0,   /* end of token */
   2259   TK_RAW_BYTE = 1,
   2260   TK_CHAR,
   2261   TK_STRING,
   2262   TK_CODE_POINT,
   2263   TK_ANYCHAR,
   2264   TK_CHAR_TYPE,
   2265   TK_BACKREF,
   2266   TK_CALL,
   2267   TK_ANCHOR,
   2268   TK_OP_REPEAT,
   2269   TK_INTERVAL,
   2270   TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
   2271   TK_ALT,
   2272   TK_SUBEXP_OPEN,
   2273   TK_SUBEXP_CLOSE,
   2274   TK_CC_OPEN,
   2275   TK_QUOTE_OPEN,
   2276   TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
   2277   /* in cc */
   2278   TK_CC_CLOSE,
   2279   TK_CC_RANGE,
   2280   TK_POSIX_BRACKET_OPEN,
   2281   TK_CC_AND,             /* && */
   2282   TK_CC_CC_OPEN          /* [ */
   2283 };
   2284 
   2285 typedef struct {
   2286   enum TokenSyms type;
   2287   int escaped;
   2288   int base;   /* is number: 8, 16 (used in [....]) */
   2289   UChar* backp;
   2290   union {
   2291     UChar* s;
   2292     int   c;
   2293     OnigCodePoint code;
   2294     int   anchor;
   2295     int   subtype;
   2296     struct {
   2297       int lower;
   2298       int upper;
   2299       int greedy;
   2300       int possessive;
   2301     } repeat;
   2302     struct {
   2303       int  num;
   2304       int  ref1;
   2305       int* refs;
   2306       int  by_name;
   2307 #ifdef USE_BACKREF_WITH_LEVEL
   2308       int  exist_level;
   2309       int  level;   /* \k<name+n> */
   2310 #endif
   2311     } backref;
   2312     struct {
   2313       UChar* name;
   2314       UChar* name_end;
   2315       int    gnum;
   2316     } call;
   2317     struct {
   2318       int ctype;
   2319       int not;
   2320     } prop;
   2321   } u;
   2322 } OnigToken;
   2323 
   2324 
   2325 static int
   2326 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
   2327 {
   2328   int low, up, syn_allow, non_low = 0;
   2329   int r = 0;
   2330   OnigCodePoint c;
   2331   OnigEncoding enc = env->enc;
   2332   UChar* p = *src;
   2333   PFETCH_READY;
   2334 
   2335   syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
   2336 
   2337   if (PEND) {
   2338     if (syn_allow)
   2339       return 1;  /* "....{" : OK! */
   2340     else
   2341       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
   2342   }
   2343 
   2344   if (! syn_allow) {
   2345     c = PPEEK;
   2346     if (c == ')' || c == '(' || c == '|') {
   2347       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
   2348     }
   2349   }
   2350 
   2351   low = onig_scan_unsigned_number(&p, end, env->enc);
   2352   if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
   2353   if (low > ONIG_MAX_REPEAT_NUM)
   2354     return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
   2355 
   2356   if (p == *src) { /* can't read low */
   2357     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
   2358       /* allow {,n} as {0,n} */
   2359       low = 0;
   2360       non_low = 1;
   2361     }
   2362     else
   2363       goto invalid;
   2364   }
   2365 
   2366   if (PEND) goto invalid;
   2367   PFETCH(c);
   2368   if (c == ',') {
   2369     UChar* prev = p;
   2370     up = onig_scan_unsigned_number(&p, end, env->enc);
   2371     if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
   2372     if (up > ONIG_MAX_REPEAT_NUM)
   2373       return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
   2374 
   2375     if (p == prev) {
   2376       if (non_low != 0)
   2377 	goto invalid;
   2378       up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */
   2379     }
   2380   }
   2381   else {
   2382     if (non_low != 0)
   2383       goto invalid;
   2384 
   2385     PUNFETCH;
   2386     up = low;  /* {n} : exact n times */
   2387     r = 2;     /* fixed */
   2388   }
   2389 
   2390   if (PEND) goto invalid;
   2391   PFETCH(c);
   2392   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
   2393     if (c != MC_ESC(env->syntax)) goto invalid;
   2394     PFETCH(c);
   2395   }
   2396   if (c != '}') goto invalid;
   2397 
   2398   if (!IS_REPEAT_INFINITE(up) && low > up) {
   2399     return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
   2400   }
   2401 
   2402   tok->type = TK_INTERVAL;
   2403   tok->u.repeat.lower = low;
   2404   tok->u.repeat.upper = up;
   2405   *src = p;
   2406   return r; /* 0: normal {n,m}, 2: fixed {n} */
   2407 
   2408  invalid:
   2409   if (syn_allow)
   2410     return 1;  /* OK */
   2411   else
   2412     return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
   2413 }
   2414 
   2415 /* \M-, \C-, \c, or \... */
   2416 static int
   2417 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
   2418 {
   2419   int v;
   2420   OnigCodePoint c;
   2421   OnigEncoding enc = env->enc;
   2422   UChar* p = *src;
   2423 
   2424   if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
   2425 
   2426   PFETCH_S(c);
   2427   switch (c) {
   2428   case 'M':
   2429     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
   2430       if (PEND) return ONIGERR_END_PATTERN_AT_META;
   2431       PFETCH_S(c);
   2432       if (c != '-') return ONIGERR_META_CODE_SYNTAX;
   2433       if (PEND) return ONIGERR_END_PATTERN_AT_META;
   2434       PFETCH_S(c);
   2435       if (c == MC_ESC(env->syntax)) {
   2436         v = fetch_escaped_value(&p, end, env);
   2437         if (v < 0) return v;
   2438         c = (OnigCodePoint )v;
   2439       }
   2440       c = ((c & 0xff) | 0x80);
   2441     }
   2442     else
   2443       goto backslash;
   2444     break;
   2445 
   2446   case 'C':
   2447     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
   2448       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
   2449       PFETCH_S(c);
   2450       if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
   2451       goto control;
   2452     }
   2453     else
   2454       goto backslash;
   2455 
   2456   case 'c':
   2457     if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
   2458     control:
   2459       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
   2460       PFETCH_S(c);
   2461       if (c == '?') {
   2462         c = 0177;
   2463       }
   2464       else {
   2465         if (c == MC_ESC(env->syntax)) {
   2466           v = fetch_escaped_value(&p, end, env);
   2467           if (v < 0) return v;
   2468           c = (OnigCodePoint )v;
   2469         }
   2470         c &= 0x9f;
   2471       }
   2472       break;
   2473     }
   2474     /* fall through */
   2475 
   2476   default:
   2477     {
   2478     backslash:
   2479       c = conv_backslash_value(c, env);
   2480     }
   2481     break;
   2482   }
   2483 
   2484   *src = p;
   2485   return c;
   2486 }
   2487 
   2488 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
   2489 
   2490 static OnigCodePoint
   2491 get_name_end_code_point(OnigCodePoint start)
   2492 {
   2493   switch (start) {
   2494   case '<':  return (OnigCodePoint )'>'; break;
   2495   case '\'': return (OnigCodePoint )'\''; break;
   2496   default:
   2497     break;
   2498   }
   2499 
   2500   return (OnigCodePoint )0;
   2501 }
   2502 
   2503 #ifdef USE_NAMED_GROUP
   2504 #ifdef USE_BACKREF_WITH_LEVEL
   2505 /*
   2506    \k<name+n>, \k<name-n>
   2507    \k<num+n>,  \k<num-n>
   2508    \k<-num+n>, \k<-num-n>
   2509 */
   2510 static int
   2511 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
   2512 		      UChar** rname_end, ScanEnv* env,
   2513 		      int* rback_num, int* rlevel)
   2514 {
   2515   int r, sign, is_num, exist_level;
   2516   OnigCodePoint end_code;
   2517   OnigCodePoint c = 0;
   2518   OnigEncoding enc = env->enc;
   2519   UChar *name_end;
   2520   UChar *pnum_head;
   2521   UChar *p = *src;
   2522   PFETCH_READY;
   2523 
   2524   *rback_num = 0;
   2525   is_num = exist_level = 0;
   2526   sign = 1;
   2527   pnum_head = *src;
   2528 
   2529   end_code = get_name_end_code_point(start_code);
   2530 
   2531   name_end = end;
   2532   r = 0;
   2533   if (PEND) {
   2534     return ONIGERR_EMPTY_GROUP_NAME;
   2535   }
   2536   else {
   2537     PFETCH(c);
   2538     if (c == end_code)
   2539       return ONIGERR_EMPTY_GROUP_NAME;
   2540 
   2541     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
   2542       is_num = 1;
   2543     }
   2544     else if (c == '-') {
   2545       is_num = 2;
   2546       sign = -1;
   2547       pnum_head = p;
   2548     }
   2549     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
   2550       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   2551     }
   2552   }
   2553 
   2554   while (!PEND) {
   2555     name_end = p;
   2556     PFETCH(c);
   2557     if (c == end_code || c == ')' || c == '+' || c == '-') {
   2558       if (is_num == 2) 	r = ONIGERR_INVALID_GROUP_NAME;
   2559       break;
   2560     }
   2561 
   2562     if (is_num != 0) {
   2563       if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
   2564         is_num = 1;
   2565       }
   2566       else {
   2567         r = ONIGERR_INVALID_GROUP_NAME;
   2568         is_num = 0;
   2569       }
   2570     }
   2571     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
   2572       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   2573     }
   2574   }
   2575 
   2576   if (r == 0 && c != end_code) {
   2577     if (c == '+' || c == '-') {
   2578       int level;
   2579       int flag = (c == '-' ? -1 : 1);
   2580 
   2581       PFETCH(c);
   2582       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
   2583       PUNFETCH;
   2584       level = onig_scan_unsigned_number(&p, end, enc);
   2585       if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
   2586       *rlevel = (level * flag);
   2587       exist_level = 1;
   2588 
   2589       PFETCH(c);
   2590       if (c == end_code)
   2591 	goto end;
   2592     }
   2593 
   2594   err:
   2595     r = ONIGERR_INVALID_GROUP_NAME;
   2596     name_end = end;
   2597   }
   2598 
   2599  end:
   2600   if (r == 0) {
   2601     if (is_num != 0) {
   2602       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
   2603       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
   2604       else if (*rback_num == 0) goto err;
   2605 
   2606       *rback_num *= sign;
   2607     }
   2608 
   2609     *rname_end = name_end;
   2610     *src = p;
   2611     return (exist_level ? 1 : 0);
   2612   }
   2613   else {
   2614     onig_scan_env_set_error_string(env, r, *src, name_end);
   2615     return r;
   2616   }
   2617 }
   2618 #endif /* USE_BACKREF_WITH_LEVEL */
   2619 
   2620 /*
   2621   def: 0 -> define name    (don't allow number name)
   2622        1 -> reference name (allow number name)
   2623 */
   2624 static int
   2625 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
   2626 	   UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
   2627 {
   2628   int r, is_num, sign;
   2629   OnigCodePoint end_code;
   2630   OnigCodePoint c = 0;
   2631   OnigEncoding enc = env->enc;
   2632   UChar *name_end;
   2633   UChar *pnum_head;
   2634   UChar *p = *src;
   2635 
   2636   *rback_num = 0;
   2637 
   2638   end_code = get_name_end_code_point(start_code);
   2639 
   2640   name_end = end;
   2641   pnum_head = *src;
   2642   r = 0;
   2643   is_num = 0;
   2644   sign = 1;
   2645   if (PEND) {
   2646     return ONIGERR_EMPTY_GROUP_NAME;
   2647   }
   2648   else {
   2649     PFETCH_S(c);
   2650     if (c == end_code)
   2651       return ONIGERR_EMPTY_GROUP_NAME;
   2652 
   2653     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
   2654       if (ref == 1)
   2655         is_num = 1;
   2656       else {
   2657         r = ONIGERR_INVALID_GROUP_NAME;
   2658         is_num = 0;
   2659       }
   2660     }
   2661     else if (c == '-') {
   2662       if (ref == 1) {
   2663         is_num = 2;
   2664         sign = -1;
   2665         pnum_head = p;
   2666       }
   2667       else {
   2668         r = ONIGERR_INVALID_GROUP_NAME;
   2669         is_num = 0;
   2670       }
   2671     }
   2672     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
   2673       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   2674     }
   2675   }
   2676 
   2677   if (r == 0) {
   2678     while (!PEND) {
   2679       name_end = p;
   2680       PFETCH_S(c);
   2681       if (c == end_code || c == ')') {
   2682         if (is_num == 2) 	r = ONIGERR_INVALID_GROUP_NAME;
   2683         break;
   2684       }
   2685 
   2686       if (is_num != 0) {
   2687         if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
   2688           is_num = 1;
   2689         }
   2690         else {
   2691           if (!ONIGENC_IS_CODE_WORD(enc, c))
   2692             r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   2693           else
   2694             r = ONIGERR_INVALID_GROUP_NAME;
   2695           is_num = 0;
   2696         }
   2697       }
   2698       else {
   2699         if (!ONIGENC_IS_CODE_WORD(enc, c)) {
   2700           r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   2701         }
   2702       }
   2703     }
   2704 
   2705     if (c != end_code) {
   2706       r = ONIGERR_INVALID_GROUP_NAME;
   2707       name_end = end;
   2708     }
   2709 
   2710     if (is_num != 0) {
   2711       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
   2712       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
   2713       else if (*rback_num == 0) {
   2714         r = ONIGERR_INVALID_GROUP_NAME;
   2715         goto err;
   2716       }
   2717 
   2718       *rback_num *= sign;
   2719     }
   2720 
   2721     *rname_end = name_end;
   2722     *src = p;
   2723     return 0;
   2724   }
   2725   else {
   2726     while (!PEND) {
   2727       name_end = p;
   2728       PFETCH_S(c);
   2729       if (c == end_code || c == ')')
   2730         break;
   2731     }
   2732     if (PEND)
   2733       name_end = end;
   2734 
   2735   err:
   2736     onig_scan_env_set_error_string(env, r, *src, name_end);
   2737     return r;
   2738   }
   2739 }
   2740 #else
   2741 static int
   2742 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
   2743 	   UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
   2744 {
   2745   int r, is_num, sign;
   2746   OnigCodePoint end_code;
   2747   OnigCodePoint c = 0;
   2748   UChar *name_end;
   2749   OnigEncoding enc = env->enc;
   2750   UChar *pnum_head;
   2751   UChar *p = *src;
   2752   PFETCH_READY;
   2753 
   2754   *rback_num = 0;
   2755 
   2756   end_code = get_name_end_code_point(start_code);
   2757 
   2758   *rname_end = name_end = end;
   2759   r = 0;
   2760   pnum_head = *src;
   2761   is_num = 0;
   2762   sign = 1;
   2763 
   2764   if (PEND) {
   2765     return ONIGERR_EMPTY_GROUP_NAME;
   2766   }
   2767   else {
   2768     PFETCH(c);
   2769     if (c == end_code)
   2770       return ONIGERR_EMPTY_GROUP_NAME;
   2771 
   2772     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
   2773       is_num = 1;
   2774     }
   2775     else if (c == '-') {
   2776       is_num = 2;
   2777       sign = -1;
   2778       pnum_head = p;
   2779     }
   2780     else {
   2781       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   2782     }
   2783   }
   2784 
   2785   while (!PEND) {
   2786     name_end = p;
   2787 
   2788     PFETCH(c);
   2789     if (c == end_code || c == ')') break;
   2790     if (! ONIGENC_IS_CODE_DIGIT(enc, c))
   2791       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   2792   }
   2793   if (r == 0 && c != end_code) {
   2794     r = ONIGERR_INVALID_GROUP_NAME;
   2795     name_end = end;
   2796   }
   2797 
   2798   if (r == 0) {
   2799     *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
   2800     if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
   2801     else if (*rback_num == 0) {
   2802       r = ONIGERR_INVALID_GROUP_NAME;
   2803       goto err;
   2804     }
   2805     *rback_num *= sign;
   2806 
   2807     *rname_end = name_end;
   2808     *src = p;
   2809     return 0;
   2810   }
   2811   else {
   2812   err:
   2813     onig_scan_env_set_error_string(env, r, *src, name_end);
   2814     return r;
   2815   }
   2816 }
   2817 #endif /* USE_NAMED_GROUP */
   2818 
   2819 static void
   2820 CC_ESC_WARN(ScanEnv* env, UChar *c)
   2821 {
   2822   if (onig_warn == onig_null_warn) return ;
   2823 
   2824   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
   2825       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
   2826     UChar buf[WARN_BUFSIZE];
   2827     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
   2828 		env->pattern, env->pattern_end,
   2829                 (UChar* )"character class has '%s' without escape", c);
   2830     (*onig_warn)((char* )buf);
   2831   }
   2832 }
   2833 
   2834 static void
   2835 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
   2836 {
   2837   if (onig_warn == onig_null_warn) return ;
   2838 
   2839   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
   2840     UChar buf[WARN_BUFSIZE];
   2841     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
   2842 		(env)->pattern, (env)->pattern_end,
   2843 		(UChar* )"regular expression has '%s' without escape", c);
   2844     (*onig_warn)((char* )buf);
   2845   }
   2846 }
   2847 
   2848 static UChar*
   2849 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
   2850 		  UChar **next, OnigEncoding enc)
   2851 {
   2852   int i;
   2853   OnigCodePoint x;
   2854   UChar *q;
   2855   UChar *p = from;
   2856 
   2857   while (p < to) {
   2858     x = ONIGENC_MBC_TO_CODE(enc, p, to);
   2859     q = p + enclen(enc, p);
   2860     if (x == s[0]) {
   2861       for (i = 1; i < n && q < to; i++) {
   2862 	x = ONIGENC_MBC_TO_CODE(enc, q, to);
   2863 	if (x != s[i]) break;
   2864 	q += enclen(enc, q);
   2865       }
   2866       if (i >= n) {
   2867 	if (IS_NOT_NULL(next))
   2868 	  *next = q;
   2869 	return p;
   2870       }
   2871     }
   2872     p = q;
   2873   }
   2874   return NULL_UCHARP;
   2875 }
   2876 
   2877 static int
   2878 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
   2879 		 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
   2880 {
   2881   int i, in_esc;
   2882   OnigCodePoint x;
   2883   UChar *q;
   2884   UChar *p = from;
   2885 
   2886   in_esc = 0;
   2887   while (p < to) {
   2888     if (in_esc) {
   2889       in_esc = 0;
   2890       p += enclen(enc, p);
   2891     }
   2892     else {
   2893       x = ONIGENC_MBC_TO_CODE(enc, p, to);
   2894       q = p + enclen(enc, p);
   2895       if (x == s[0]) {
   2896 	for (i = 1; i < n && q < to; i++) {
   2897 	  x = ONIGENC_MBC_TO_CODE(enc, q, to);
   2898 	  if (x != s[i]) break;
   2899 	  q += enclen(enc, q);
   2900 	}
   2901 	if (i >= n) return 1;
   2902 	p += enclen(enc, p);
   2903       }
   2904       else {
   2905 	x = ONIGENC_MBC_TO_CODE(enc, p, to);
   2906 	if (x == bad) return 0;
   2907 	else if (x == MC_ESC(syn)) in_esc = 1;
   2908 	p = q;
   2909       }
   2910     }
   2911   }
   2912   return 0;
   2913 }
   2914 
   2915 static int
   2916 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
   2917 {
   2918   int num;
   2919   OnigCodePoint c, c2;
   2920   OnigSyntaxType* syn = env->syntax;
   2921   OnigEncoding enc = env->enc;
   2922   UChar* prev;
   2923   UChar* p = *src;
   2924   PFETCH_READY;
   2925 
   2926   if (PEND) {
   2927     tok->type = TK_EOT;
   2928     return tok->type;
   2929   }
   2930 
   2931   PFETCH(c);
   2932   tok->type = TK_CHAR;
   2933   tok->base = 0;
   2934   tok->u.c  = c;
   2935   tok->escaped = 0;
   2936 
   2937   if (c == ']') {
   2938     tok->type = TK_CC_CLOSE;
   2939   }
   2940   else if (c == '-') {
   2941     tok->type = TK_CC_RANGE;
   2942   }
   2943   else if (c == MC_ESC(syn)) {
   2944     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
   2945       goto end;
   2946 
   2947     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
   2948 
   2949     PFETCH(c);
   2950     tok->escaped = 1;
   2951     tok->u.c = c;
   2952     switch (c) {
   2953     case 'w':
   2954       tok->type = TK_CHAR_TYPE;
   2955       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
   2956       tok->u.prop.not   = 0;
   2957       break;
   2958     case 'W':
   2959       tok->type = TK_CHAR_TYPE;
   2960       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
   2961       tok->u.prop.not   = 1;
   2962       break;
   2963     case 'd':
   2964       tok->type = TK_CHAR_TYPE;
   2965       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
   2966       tok->u.prop.not   = 0;
   2967       break;
   2968     case 'D':
   2969       tok->type = TK_CHAR_TYPE;
   2970       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
   2971       tok->u.prop.not   = 1;
   2972       break;
   2973     case 's':
   2974       tok->type = TK_CHAR_TYPE;
   2975       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
   2976       tok->u.prop.not   = 0;
   2977       break;
   2978     case 'S':
   2979       tok->type = TK_CHAR_TYPE;
   2980       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
   2981       tok->u.prop.not   = 1;
   2982       break;
   2983     case 'h':
   2984       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
   2985       tok->type = TK_CHAR_TYPE;
   2986       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
   2987       tok->u.prop.not   = 0;
   2988       break;
   2989     case 'H':
   2990       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
   2991       tok->type = TK_CHAR_TYPE;
   2992       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
   2993       tok->u.prop.not   = 1;
   2994       break;
   2995 
   2996     case 'p':
   2997     case 'P':
   2998       c2 = PPEEK;
   2999       if (c2 == '{' &&
   3000 	  IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
   3001 	PINC;
   3002 	tok->type = TK_CHAR_PROPERTY;
   3003 	tok->u.prop.not = (c == 'P' ? 1 : 0);
   3004 
   3005 	if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
   3006 	  PFETCH(c2);
   3007 	  if (c2 == '^') {
   3008 	    tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
   3009 	  }
   3010 	  else
   3011 	    PUNFETCH;
   3012 	}
   3013       }
   3014       break;
   3015 
   3016     case 'x':
   3017       if (PEND) break;
   3018 
   3019       prev = p;
   3020       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
   3021 	PINC;
   3022 	num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
   3023 	if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
   3024 	if (!PEND) {
   3025           c2 = PPEEK;
   3026           if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
   3027             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
   3028         }
   3029 
   3030 	if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
   3031 	  PINC;
   3032 	  tok->type   = TK_CODE_POINT;
   3033 	  tok->base   = 16;
   3034 	  tok->u.code = (OnigCodePoint )num;
   3035 	}
   3036 	else {
   3037 	  /* can't read nothing or invalid format */
   3038 	  p = prev;
   3039 	}
   3040       }
   3041       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
   3042 	num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
   3043 	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
   3044 	if (p == prev) {  /* can't read nothing. */
   3045 	  num = 0; /* but, it's not error */
   3046 	}
   3047 	tok->type = TK_RAW_BYTE;
   3048 	tok->base = 16;
   3049 	tok->u.c  = num;
   3050       }
   3051       break;
   3052 
   3053     case 'u':
   3054       if (PEND) break;
   3055 
   3056       prev = p;
   3057       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
   3058 	num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
   3059 	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
   3060 	if (p == prev) {  /* can't read nothing. */
   3061 	  num = 0; /* but, it's not error */
   3062 	}
   3063 	tok->type   = TK_CODE_POINT;
   3064 	tok->base   = 16;
   3065 	tok->u.code = (OnigCodePoint )num;
   3066       }
   3067       break;
   3068 
   3069     case '0':
   3070     case '1': case '2': case '3': case '4': case '5': case '6': case '7':
   3071       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
   3072 	PUNFETCH;
   3073 	prev = p;
   3074 	num = scan_unsigned_octal_number(&p, end, 3, enc);
   3075 	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
   3076 	if (p == prev) {  /* can't read nothing. */
   3077 	  num = 0; /* but, it's not error */
   3078 	}
   3079 	tok->type = TK_RAW_BYTE;
   3080 	tok->base = 8;
   3081 	tok->u.c  = num;
   3082       }
   3083       break;
   3084 
   3085     default:
   3086       PUNFETCH;
   3087       num = fetch_escaped_value(&p, end, env);
   3088       if (num < 0) return num;
   3089       if (tok->u.c != num) {
   3090 	tok->u.code = (OnigCodePoint )num;
   3091 	tok->type   = TK_CODE_POINT;
   3092       }
   3093       break;
   3094     }
   3095   }
   3096   else if (c == '[') {
   3097     if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
   3098       OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
   3099       tok->backp = p; /* point at '[' is readed */
   3100       PINC;
   3101       if (str_exist_check_with_esc(send, 2, p, end,
   3102                                    (OnigCodePoint )']', enc, syn)) {
   3103 	tok->type = TK_POSIX_BRACKET_OPEN;
   3104       }
   3105       else {
   3106 	PUNFETCH;
   3107 	goto cc_in_cc;
   3108       }
   3109     }
   3110     else {
   3111     cc_in_cc:
   3112       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
   3113 	tok->type = TK_CC_CC_OPEN;
   3114       }
   3115       else {
   3116 	CC_ESC_WARN(env, (UChar* )"[");
   3117       }
   3118     }
   3119   }
   3120   else if (c == '&') {
   3121     if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
   3122 	!PEND && (PPEEK_IS('&'))) {
   3123       PINC;
   3124       tok->type = TK_CC_AND;
   3125     }
   3126   }
   3127 
   3128  end:
   3129   *src = p;
   3130   return tok->type;
   3131 }
   3132 
   3133 static int
   3134 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
   3135 {
   3136   int r, num;
   3137   OnigCodePoint c;
   3138   OnigEncoding enc = env->enc;
   3139   OnigSyntaxType* syn = env->syntax;
   3140   UChar* prev;
   3141   UChar* p = *src;
   3142   PFETCH_READY;
   3143 
   3144  start:
   3145   if (PEND) {
   3146     tok->type = TK_EOT;
   3147     return tok->type;
   3148   }
   3149 
   3150   tok->type  = TK_STRING;
   3151   tok->base  = 0;
   3152   tok->backp = p;
   3153 
   3154   PFETCH(c);
   3155   if (IS_MC_ESC_CODE(c, syn)) {
   3156     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
   3157 
   3158     tok->backp = p;
   3159     PFETCH(c);
   3160 
   3161     tok->u.c = c;
   3162     tok->escaped = 1;
   3163     switch (c) {
   3164     case '*':
   3165       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
   3166       tok->type = TK_OP_REPEAT;
   3167       tok->u.repeat.lower = 0;
   3168       tok->u.repeat.upper = REPEAT_INFINITE;
   3169       goto greedy_check;
   3170       break;
   3171 
   3172     case '+':
   3173       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
   3174       tok->type = TK_OP_REPEAT;
   3175       tok->u.repeat.lower = 1;
   3176       tok->u.repeat.upper = REPEAT_INFINITE;
   3177       goto greedy_check;
   3178       break;
   3179 
   3180     case '?':
   3181       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
   3182       tok->type = TK_OP_REPEAT;
   3183       tok->u.repeat.lower = 0;
   3184       tok->u.repeat.upper = 1;
   3185     greedy_check:
   3186       if (!PEND && PPEEK_IS('?') &&
   3187 	  IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
   3188 	PFETCH(c);
   3189 	tok->u.repeat.greedy     = 0;
   3190 	tok->u.repeat.possessive = 0;
   3191       }
   3192       else {
   3193       possessive_check:
   3194 	if (!PEND && PPEEK_IS('+') &&
   3195 	    ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
   3196 	      tok->type != TK_INTERVAL)  ||
   3197 	     (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
   3198 	      tok->type == TK_INTERVAL))) {
   3199 	  PFETCH(c);
   3200 	  tok->u.repeat.greedy     = 1;
   3201 	  tok->u.repeat.possessive = 1;
   3202 	}
   3203 	else {
   3204 	  tok->u.repeat.greedy     = 1;
   3205 	  tok->u.repeat.possessive = 0;
   3206 	}
   3207       }
   3208       break;
   3209 
   3210     case '{':
   3211       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
   3212       r = fetch_range_quantifier(&p, end, tok, env);
   3213       if (r < 0) return r;  /* error */
   3214       if (r == 0) goto greedy_check;
   3215       else if (r == 2) { /* {n} */
   3216 	if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
   3217 	  goto possessive_check;
   3218 
   3219 	goto greedy_check;
   3220       }
   3221       /* r == 1 : normal char */
   3222       break;
   3223 
   3224     case '|':
   3225       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
   3226       tok->type = TK_ALT;
   3227       break;
   3228 
   3229     case '(':
   3230       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
   3231       tok->type = TK_SUBEXP_OPEN;
   3232       break;
   3233 
   3234     case ')':
   3235       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
   3236       tok->type = TK_SUBEXP_CLOSE;
   3237       break;
   3238 
   3239     case 'w':
   3240       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
   3241       tok->type = TK_CHAR_TYPE;
   3242       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
   3243       tok->u.prop.not   = 0;
   3244       break;
   3245 
   3246     case 'W':
   3247       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
   3248       tok->type = TK_CHAR_TYPE;
   3249       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
   3250       tok->u.prop.not   = 1;
   3251       break;
   3252 
   3253     case 'b':
   3254       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
   3255       tok->type = TK_ANCHOR;
   3256       tok->u.anchor = ANCHOR_WORD_BOUND;
   3257       break;
   3258 
   3259     case 'B':
   3260       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
   3261       tok->type = TK_ANCHOR;
   3262       tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
   3263       break;
   3264 
   3265 #ifdef USE_WORD_BEGIN_END
   3266     case '<':
   3267       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
   3268       tok->type = TK_ANCHOR;
   3269       tok->u.anchor = ANCHOR_WORD_BEGIN;
   3270       break;
   3271 
   3272     case '>':
   3273       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
   3274       tok->type = TK_ANCHOR;
   3275       tok->u.anchor = ANCHOR_WORD_END;
   3276       break;
   3277 #endif
   3278 
   3279     case 's':
   3280       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
   3281       tok->type = TK_CHAR_TYPE;
   3282       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
   3283       tok->u.prop.not   = 0;
   3284       break;
   3285 
   3286     case 'S':
   3287       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
   3288       tok->type = TK_CHAR_TYPE;
   3289       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
   3290       tok->u.prop.not   = 1;
   3291       break;
   3292 
   3293     case 'd':
   3294       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
   3295       tok->type = TK_CHAR_TYPE;
   3296       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
   3297       tok->u.prop.not   = 0;
   3298       break;
   3299 
   3300     case 'D':
   3301       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
   3302       tok->type = TK_CHAR_TYPE;
   3303       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
   3304       tok->u.prop.not   = 1;
   3305       break;
   3306 
   3307     case 'h':
   3308       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
   3309       tok->type = TK_CHAR_TYPE;
   3310       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
   3311       tok->u.prop.not   = 0;
   3312       break;
   3313 
   3314     case 'H':
   3315       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
   3316       tok->type = TK_CHAR_TYPE;
   3317       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
   3318       tok->u.prop.not   = 1;
   3319       break;
   3320 
   3321     case 'A':
   3322       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
   3323     begin_buf:
   3324       tok->type = TK_ANCHOR;
   3325       tok->u.subtype = ANCHOR_BEGIN_BUF;
   3326       break;
   3327 
   3328     case 'Z':
   3329       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
   3330       tok->type = TK_ANCHOR;
   3331       tok->u.subtype = ANCHOR_SEMI_END_BUF;
   3332       break;
   3333 
   3334     case 'z':
   3335       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
   3336     end_buf:
   3337       tok->type = TK_ANCHOR;
   3338       tok->u.subtype = ANCHOR_END_BUF;
   3339       break;
   3340 
   3341     case 'G':
   3342       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
   3343       tok->type = TK_ANCHOR;
   3344       tok->u.subtype = ANCHOR_BEGIN_POSITION;
   3345       break;
   3346 
   3347     case '`':
   3348       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
   3349       goto begin_buf;
   3350       break;
   3351 
   3352     case '\'':
   3353       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
   3354       goto end_buf;
   3355       break;
   3356 
   3357     case 'x':
   3358       if (PEND) break;
   3359 
   3360       prev = p;
   3361       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
   3362 	PINC;
   3363 	num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
   3364 	if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
   3365 	if (!PEND) {
   3366           if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
   3367             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
   3368         }
   3369 
   3370 	if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
   3371 	  PINC;
   3372 	  tok->type   = TK_CODE_POINT;
   3373 	  tok->u.code = (OnigCodePoint )num;
   3374 	}
   3375 	else {
   3376 	  /* can't read nothing or invalid format */
   3377 	  p = prev;
   3378 	}
   3379       }
   3380       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
   3381 	num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
   3382 	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
   3383 	if (p == prev) {  /* can't read nothing. */
   3384 	  num = 0; /* but, it's not error */
   3385 	}
   3386 	tok->type = TK_RAW_BYTE;
   3387 	tok->base = 16;
   3388 	tok->u.c  = num;
   3389       }
   3390       break;
   3391 
   3392     case 'u':
   3393       if (PEND) break;
   3394 
   3395       prev = p;
   3396       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
   3397 	num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
   3398 	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
   3399 	if (p == prev) {  /* can't read nothing. */
   3400 	  num = 0; /* but, it's not error */
   3401 	}
   3402 	tok->type   = TK_CODE_POINT;
   3403 	tok->base   = 16;
   3404 	tok->u.code = (OnigCodePoint )num;
   3405       }
   3406       break;
   3407 
   3408     case '1': case '2': case '3': case '4':
   3409     case '5': case '6': case '7': case '8': case '9':
   3410       PUNFETCH;
   3411       prev = p;
   3412       num = onig_scan_unsigned_number(&p, end, enc);
   3413       if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
   3414         goto skip_backref;
   3415       }
   3416 
   3417       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
   3418 	  (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
   3419 	if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
   3420 	  if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
   3421 	    return ONIGERR_INVALID_BACKREF;
   3422 	}
   3423 
   3424 	tok->type = TK_BACKREF;
   3425 	tok->u.backref.num     = 1;
   3426 	tok->u.backref.ref1    = num;
   3427 	tok->u.backref.by_name = 0;
   3428 #ifdef USE_BACKREF_WITH_LEVEL
   3429 	tok->u.backref.exist_level = 0;
   3430 #endif
   3431 	break;
   3432       }
   3433 
   3434     skip_backref:
   3435       if (c == '8' || c == '9') {
   3436 	/* normal char */
   3437 	p = prev; PINC;
   3438 	break;
   3439       }
   3440 
   3441       p = prev;
   3442       /* fall through */
   3443     case '0':
   3444       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
   3445 	prev = p;
   3446 	num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
   3447 	if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
   3448 	if (p == prev) {  /* can't read nothing. */
   3449 	  num = 0; /* but, it's not error */
   3450 	}
   3451 	tok->type = TK_RAW_BYTE;
   3452 	tok->base = 8;
   3453 	tok->u.c  = num;
   3454       }
   3455       else if (c != '0') {
   3456 	PINC;
   3457       }
   3458       break;
   3459 
   3460 #ifdef USE_NAMED_GROUP
   3461     case 'k':
   3462       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
   3463 	PFETCH(c);
   3464 	if (c == '<' || c == '\'') {
   3465 	  UChar* name_end;
   3466 	  int* backs;
   3467 	  int back_num;
   3468 
   3469 	  prev = p;
   3470 
   3471 #ifdef USE_BACKREF_WITH_LEVEL
   3472 	  name_end = NULL_UCHARP; /* no need. escape gcc warning. */
   3473 	  r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
   3474 				    env, &back_num, &tok->u.backref.level);
   3475 	  if (r == 1) tok->u.backref.exist_level = 1;
   3476 	  else        tok->u.backref.exist_level = 0;
   3477 #else
   3478 	  r = fetch_name(&p, end, &name_end, env, &back_num, 1);
   3479 #endif
   3480 	  if (r < 0) return r;
   3481 
   3482 	  if (back_num != 0) {
   3483 	    if (back_num < 0) {
   3484 	      back_num = BACKREF_REL_TO_ABS(back_num, env);
   3485 	      if (back_num <= 0)
   3486 		return ONIGERR_INVALID_BACKREF;
   3487 	    }
   3488 
   3489 	    if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
   3490 	      if (back_num > env->num_mem ||
   3491 		  IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
   3492 		return ONIGERR_INVALID_BACKREF;
   3493 	    }
   3494 	    tok->type = TK_BACKREF;
   3495 	    tok->u.backref.by_name = 0;
   3496 	    tok->u.backref.num  = 1;
   3497 	    tok->u.backref.ref1 = back_num;
   3498 	  }
   3499 	  else {
   3500 	    num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
   3501 	    if (num <= 0) {
   3502 	      onig_scan_env_set_error_string(env,
   3503 			     ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
   3504 	      return ONIGERR_UNDEFINED_NAME_REFERENCE;
   3505 	    }
   3506 	    if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
   3507 	      int i;
   3508 	      for (i = 0; i < num; i++) {
   3509 		if (backs[i] > env->num_mem ||
   3510 		    IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
   3511 		  return ONIGERR_INVALID_BACKREF;
   3512 	      }
   3513 	    }
   3514 
   3515 	    tok->type = TK_BACKREF;
   3516 	    tok->u.backref.by_name = 1;
   3517 	    if (num == 1) {
   3518 	      tok->u.backref.num  = 1;
   3519 	      tok->u.backref.ref1 = backs[0];
   3520 	    }
   3521 	    else {
   3522 	      tok->u.backref.num  = num;
   3523 	      tok->u.backref.refs = backs;
   3524 	    }
   3525 	  }
   3526 	}
   3527 	else
   3528 	  PUNFETCH;
   3529       }
   3530       break;
   3531 #endif
   3532 
   3533 #ifdef USE_SUBEXP_CALL
   3534     case 'g':
   3535       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
   3536 	PFETCH(c);
   3537 	if (c == '<' || c == '\'') {
   3538 	  int gnum;
   3539 	  UChar* name_end;
   3540 
   3541 	  prev = p;
   3542 	  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
   3543 	  if (r < 0) return r;
   3544 
   3545 	  tok->type = TK_CALL;
   3546 	  tok->u.call.name     = prev;
   3547 	  tok->u.call.name_end = name_end;
   3548 	  tok->u.call.gnum     = gnum;
   3549 	}
   3550 	else
   3551 	  PUNFETCH;
   3552       }
   3553       break;
   3554 #endif
   3555 
   3556     case 'Q':
   3557       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
   3558 	tok->type = TK_QUOTE_OPEN;
   3559       }
   3560       break;
   3561 
   3562     case 'p':
   3563     case 'P':
   3564       if (PPEEK_IS('{') &&
   3565 	  IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
   3566 	PINC;
   3567 	tok->type = TK_CHAR_PROPERTY;
   3568 	tok->u.prop.not = (c == 'P' ? 1 : 0);
   3569 
   3570 	if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
   3571 	  PFETCH(c);
   3572 	  if (c == '^') {
   3573 	    tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
   3574 	  }
   3575 	  else
   3576 	    PUNFETCH;
   3577 	}
   3578       }
   3579       break;
   3580 
   3581     default:
   3582       PUNFETCH;
   3583       num = fetch_escaped_value(&p, end, env);
   3584       if (num < 0) return num;
   3585       /* set_raw: */
   3586       if (tok->u.c != num) {
   3587 	tok->type = TK_CODE_POINT;
   3588 	tok->u.code = (OnigCodePoint )num;
   3589       }
   3590       else { /* string */
   3591 	p = tok->backp + enclen(enc, tok->backp);
   3592       }
   3593       break;
   3594     }
   3595   }
   3596   else {
   3597     tok->u.c = c;
   3598     tok->escaped = 0;
   3599 
   3600 #ifdef USE_VARIABLE_META_CHARS
   3601     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
   3602 	IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
   3603       if (c == MC_ANYCHAR(syn))
   3604 	goto any_char;
   3605       else if (c == MC_ANYTIME(syn))
   3606 	goto anytime;
   3607       else if (c == MC_ZERO_OR_ONE_TIME(syn))
   3608 	goto zero_or_one_time;
   3609       else if (c == MC_ONE_OR_MORE_TIME(syn))
   3610 	goto one_or_more_time;
   3611       else if (c == MC_ANYCHAR_ANYTIME(syn)) {
   3612 	tok->type = TK_ANYCHAR_ANYTIME;
   3613 	goto out;
   3614       }
   3615     }
   3616 #endif
   3617 
   3618     switch (c) {
   3619     case '.':
   3620       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
   3621 #ifdef USE_VARIABLE_META_CHARS
   3622     any_char:
   3623 #endif
   3624       tok->type = TK_ANYCHAR;
   3625       break;
   3626 
   3627     case '*':
   3628       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
   3629 #ifdef USE_VARIABLE_META_CHARS
   3630     anytime:
   3631 #endif
   3632       tok->type = TK_OP_REPEAT;
   3633       tok->u.repeat.lower = 0;
   3634       tok->u.repeat.upper = REPEAT_INFINITE;
   3635       goto greedy_check;
   3636       break;
   3637 
   3638     case '+':
   3639       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
   3640 #ifdef USE_VARIABLE_META_CHARS
   3641     one_or_more_time:
   3642 #endif
   3643       tok->type = TK_OP_REPEAT;
   3644       tok->u.repeat.lower = 1;
   3645       tok->u.repeat.upper = REPEAT_INFINITE;
   3646       goto greedy_check;
   3647       break;
   3648 
   3649     case '?':
   3650       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
   3651 #ifdef USE_VARIABLE_META_CHARS
   3652     zero_or_one_time:
   3653 #endif
   3654       tok->type = TK_OP_REPEAT;
   3655       tok->u.repeat.lower = 0;
   3656       tok->u.repeat.upper = 1;
   3657       goto greedy_check;
   3658       break;
   3659 
   3660     case '{':
   3661       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
   3662       r = fetch_range_quantifier(&p, end, tok, env);
   3663       if (r < 0) return r;  /* error */
   3664       if (r == 0) goto greedy_check;
   3665       else if (r == 2) { /* {n} */
   3666 	if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
   3667 	  goto possessive_check;
   3668 
   3669 	goto greedy_check;
   3670       }
   3671       /* r == 1 : normal char */
   3672       break;
   3673 
   3674     case '|':
   3675       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
   3676       tok->type = TK_ALT;
   3677       break;
   3678 
   3679     case '(':
   3680       if (PPEEK_IS('?') &&
   3681           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
   3682         PINC;
   3683         if (PPEEK_IS('#')) {
   3684           PFETCH(c);
   3685           while (1) {
   3686             if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
   3687             PFETCH(c);
   3688             if (c == MC_ESC(syn)) {
   3689               if (!PEND) PFETCH(c);
   3690             }
   3691             else {
   3692               if (c == ')') break;
   3693             }
   3694           }
   3695           goto start;
   3696         }
   3697         PUNFETCH;
   3698       }
   3699 
   3700       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
   3701       tok->type = TK_SUBEXP_OPEN;
   3702       break;
   3703 
   3704     case ')':
   3705       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
   3706       tok->type = TK_SUBEXP_CLOSE;
   3707       break;
   3708 
   3709     case '^':
   3710       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
   3711       tok->type = TK_ANCHOR;
   3712       tok->u.subtype = (IS_SINGLELINE(env->option)
   3713 			? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
   3714       break;
   3715 
   3716     case '$':
   3717       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
   3718       tok->type = TK_ANCHOR;
   3719       tok->u.subtype = (IS_SINGLELINE(env->option)
   3720 			? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
   3721       break;
   3722 
   3723     case '[':
   3724       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
   3725       tok->type = TK_CC_OPEN;
   3726       break;
   3727 
   3728     case ']':
   3729       if (*src > env->pattern)   /* /].../ is allowed. */
   3730 	CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
   3731       break;
   3732 
   3733     case '#':
   3734       if (IS_EXTEND(env->option)) {
   3735 	while (!PEND) {
   3736 	  PFETCH(c);
   3737 	  if (ONIGENC_IS_CODE_NEWLINE(enc, c))
   3738 	    break;
   3739 	}
   3740 	goto start;
   3741 	break;
   3742       }
   3743       break;
   3744 
   3745     case ' ': case '\t': case '\n': case '\r': case '\f':
   3746       if (IS_EXTEND(env->option))
   3747 	goto start;
   3748       break;
   3749 
   3750     default:
   3751       /* string */
   3752       break;
   3753     }
   3754   }
   3755 
   3756 #ifdef USE_VARIABLE_META_CHARS
   3757  out:
   3758 #endif
   3759   *src = p;
   3760   return tok->type;
   3761 }
   3762 
   3763 static int
   3764 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
   3765 			 OnigEncoding enc ARG_UNUSED,
   3766                          OnigCodePoint sb_out, const OnigCodePoint mbr[])
   3767 {
   3768   int i, r;
   3769   OnigCodePoint j;
   3770 
   3771   int n = ONIGENC_CODE_RANGE_NUM(mbr);
   3772 
   3773   if (not == 0) {
   3774     for (i = 0; i < n; i++) {
   3775       for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
   3776            j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
   3777 	if (j >= sb_out) {
   3778 	  if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
   3779 	  else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
   3780 	    r = add_code_range_to_buf(&(cc->mbuf), j,
   3781 				      ONIGENC_CODE_RANGE_TO(mbr, i));
   3782 	    if (r != 0) return r;
   3783 	    i++;
   3784 	  }
   3785 
   3786 	  goto sb_end;
   3787 	}
   3788         BITSET_SET_BIT(cc->bs, j);
   3789       }
   3790     }
   3791 
   3792   sb_end:
   3793     for ( ; i < n; i++) {
   3794       r = add_code_range_to_buf(&(cc->mbuf),
   3795                                 ONIGENC_CODE_RANGE_FROM(mbr, i),
   3796                                 ONIGENC_CODE_RANGE_TO(mbr, i));
   3797       if (r != 0) return r;
   3798     }
   3799   }
   3800   else {
   3801     OnigCodePoint prev = 0;
   3802 
   3803     for (i = 0; i < n; i++) {
   3804       for (j = prev;
   3805 	   j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
   3806 	if (j >= sb_out) {
   3807 	  goto sb_end2;
   3808 	}
   3809 	BITSET_SET_BIT(cc->bs, j);
   3810       }
   3811       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
   3812     }
   3813     for (j = prev; j < sb_out; j++) {
   3814       BITSET_SET_BIT(cc->bs, j);
   3815     }
   3816 
   3817   sb_end2:
   3818     prev = sb_out;
   3819 
   3820     for (i = 0; i < n; i++) {
   3821       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
   3822 	r = add_code_range_to_buf(&(cc->mbuf), prev,
   3823                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
   3824 	if (r != 0) return r;
   3825       }
   3826       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
   3827     }
   3828     if (prev < 0x7fffffff) {
   3829       r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
   3830       if (r != 0) return r;
   3831     }
   3832   }
   3833 
   3834   return 0;
   3835 }
   3836 
   3837 static int
   3838 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
   3839 {
   3840   int c, r;
   3841   const OnigCodePoint *ranges;
   3842   OnigCodePoint sb_out;
   3843   OnigEncoding enc = env->enc;
   3844 
   3845   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
   3846   if (r == 0) {
   3847     return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
   3848   }
   3849   else if (r != ONIG_NO_SUPPORT_CONFIG) {
   3850     return r;
   3851   }
   3852 
   3853   r = 0;
   3854   switch (ctype) {
   3855   case ONIGENC_CTYPE_ALPHA:
   3856   case ONIGENC_CTYPE_BLANK:
   3857   case ONIGENC_CTYPE_CNTRL:
   3858   case ONIGENC_CTYPE_DIGIT:
   3859   case ONIGENC_CTYPE_LOWER:
   3860   case ONIGENC_CTYPE_PUNCT:
   3861   case ONIGENC_CTYPE_SPACE:
   3862   case ONIGENC_CTYPE_UPPER:
   3863   case ONIGENC_CTYPE_XDIGIT:
   3864   case ONIGENC_CTYPE_ASCII:
   3865   case ONIGENC_CTYPE_ALNUM:
   3866     if (not != 0) {
   3867       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
   3868 	if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
   3869 	  BITSET_SET_BIT(cc->bs, c);
   3870       }
   3871       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
   3872     }
   3873     else {
   3874       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
   3875 	if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
   3876 	  BITSET_SET_BIT(cc->bs, c);
   3877       }
   3878     }
   3879     break;
   3880 
   3881   case ONIGENC_CTYPE_GRAPH:
   3882   case ONIGENC_CTYPE_PRINT:
   3883     if (not != 0) {
   3884       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
   3885 	if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
   3886 	  BITSET_SET_BIT(cc->bs, c);
   3887       }
   3888     }
   3889     else {
   3890       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
   3891 	if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
   3892 	  BITSET_SET_BIT(cc->bs, c);
   3893       }
   3894       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
   3895     }
   3896     break;
   3897 
   3898   case ONIGENC_CTYPE_WORD:
   3899     if (not == 0) {
   3900       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
   3901 	if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
   3902       }
   3903       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
   3904     }
   3905     else {
   3906       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
   3907         if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
   3908 	    && ! ONIGENC_IS_CODE_WORD(enc, c))
   3909 	  BITSET_SET_BIT(cc->bs, c);
   3910       }
   3911     }
   3912     break;
   3913 
   3914   default:
   3915     return ONIGERR_PARSER_BUG;
   3916     break;
   3917   }
   3918 
   3919   return r;
   3920 }
   3921 
   3922 static int
   3923 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
   3924 {
   3925 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
   3926 #define POSIX_BRACKET_NAME_MIN_LEN         4
   3927 
   3928   static PosixBracketEntryType PBS[] = {
   3929     { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
   3930     { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
   3931     { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
   3932     { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
   3933     { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
   3934     { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
   3935     { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
   3936     { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
   3937     { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
   3938     { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
   3939     { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
   3940     { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
   3941     { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
   3942     { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
   3943     { (UChar* )NULL,     -1, 0 }
   3944   };
   3945 
   3946   PosixBracketEntryType *pb;
   3947   int not, i, r;
   3948   OnigCodePoint c;
   3949   OnigEncoding enc = env->enc;
   3950   UChar *p = *src;
   3951 
   3952   if (PPEEK_IS('^')) {
   3953     PINC_S;
   3954     not = 1;
   3955   }
   3956   else
   3957     not = 0;
   3958 
   3959   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
   3960     goto not_posix_bracket;
   3961 
   3962   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
   3963     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
   3964       p = (UChar* )onigenc_step(enc, p, end, pb->len);
   3965       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
   3966         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
   3967 
   3968       r = add_ctype_to_cc(cc, pb->ctype, not, env);
   3969       if (r != 0) return r;
   3970 
   3971       PINC_S; PINC_S;
   3972       *src = p;
   3973       return 0;
   3974     }
   3975   }
   3976 
   3977  not_posix_bracket:
   3978   c = 0;
   3979   i = 0;
   3980   while (!PEND && ((c = PPEEK) != ':') && c != ']') {
   3981     PINC_S;
   3982     if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
   3983   }
   3984   if (c == ':' && ! PEND) {
   3985     PINC_S;
   3986     if (! PEND) {
   3987       PFETCH_S(c);
   3988       if (c == ']')
   3989         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
   3990     }
   3991   }
   3992 
   3993   return 1;  /* 1: is not POSIX bracket, but no error. */
   3994 }
   3995 
   3996 static int
   3997 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
   3998 {
   3999   int r;
   4000   OnigCodePoint c;
   4001   OnigEncoding enc = env->enc;
   4002   UChar *prev, *start, *p = *src;
   4003 
   4004   r = 0;
   4005   start = prev = p;
   4006 
   4007   while (!PEND) {
   4008     prev = p;
   4009     PFETCH_S(c);
   4010     if (c == '}') {
   4011       r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
   4012       if (r < 0) break;
   4013 
   4014       *src = p;
   4015       return r;
   4016     }
   4017     else if (c == '(' || c == ')' || c == '{' || c == '|') {
   4018       r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
   4019       break;
   4020     }
   4021   }
   4022 
   4023   onig_scan_env_set_error_string(env, r, *src, prev);
   4024   return r;
   4025 }
   4026 
   4027 static int
   4028 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
   4029 		    ScanEnv* env)
   4030 {
   4031   int r, ctype;
   4032   CClassNode* cc;
   4033 
   4034   ctype = fetch_char_property_to_ctype(src, end, env);
   4035   if (ctype < 0) return ctype;
   4036 
   4037   *np = node_new_cclass();
   4038   CHECK_NULL_RETURN_MEMERR(*np);
   4039   cc = NCCLASS(*np);
   4040   r = add_ctype_to_cc(cc, ctype, 0, env);
   4041   if (r != 0) return r;
   4042   if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
   4043 
   4044   return 0;
   4045 }
   4046 
   4047 
   4048 enum CCSTATE {
   4049   CCS_VALUE,
   4050   CCS_RANGE,
   4051   CCS_COMPLETE,
   4052   CCS_START
   4053 };
   4054 
   4055 enum CCVALTYPE {
   4056   CCV_SB,
   4057   CCV_CODE_POINT,
   4058   CCV_CLASS
   4059 };
   4060 
   4061 static int
   4062 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
   4063 		 enum CCSTATE* state, ScanEnv* env)
   4064 {
   4065   int r;
   4066 
   4067   if (*state == CCS_RANGE)
   4068     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
   4069 
   4070   if (*state == CCS_VALUE && *type != CCV_CLASS) {
   4071     if (*type == CCV_SB)
   4072       BITSET_SET_BIT(cc->bs, (int )(*vs));
   4073     else if (*type == CCV_CODE_POINT) {
   4074       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
   4075       if (r < 0) return r;
   4076     }
   4077   }
   4078 
   4079   *state = CCS_VALUE;
   4080   *type  = CCV_CLASS;
   4081   return 0;
   4082 }
   4083 
   4084 static int
   4085 next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
   4086 	       int* vs_israw, int v_israw,
   4087 	       enum CCVALTYPE intype, enum CCVALTYPE* type,
   4088 	       enum CCSTATE* state, ScanEnv* env)
   4089 {
   4090   int r;
   4091 
   4092   switch (*state) {
   4093   case CCS_VALUE:
   4094     if (*type == CCV_SB)
   4095       BITSET_SET_BIT(cc->bs, (int )(*vs));
   4096     else if (*type == CCV_CODE_POINT) {
   4097       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
   4098       if (r < 0) return r;
   4099     }
   4100     break;
   4101 
   4102   case CCS_RANGE:
   4103     if (intype == *type) {
   4104       if (intype == CCV_SB) {
   4105         if (*vs > 0xff || v > 0xff)
   4106           return ONIGERR_INVALID_CODE_POINT_VALUE;
   4107 
   4108 	if (*vs > v) {
   4109 	  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
   4110 	    goto ccs_range_end;
   4111 	  else
   4112 	    return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
   4113 	}
   4114 	bitset_set_range(cc->bs, (int )*vs, (int )v);
   4115       }
   4116       else {
   4117 	r = add_code_range(&(cc->mbuf), env, *vs, v);
   4118 	if (r < 0) return r;
   4119       }
   4120     }
   4121     else {
   4122 #if 0
   4123       if (intype == CCV_CODE_POINT && *type == CCV_SB) {
   4124 #endif
   4125 	if (*vs > v) {
   4126 	  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
   4127 	    goto ccs_range_end;
   4128 	  else
   4129 	    return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
   4130 	}
   4131 	bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
   4132 	r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
   4133 	if (r < 0) return r;
   4134 #if 0
   4135       }
   4136       else
   4137 	return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
   4138 #endif
   4139     }
   4140   ccs_range_end:
   4141     *state = CCS_COMPLETE;
   4142     break;
   4143 
   4144   case CCS_COMPLETE:
   4145   case CCS_START:
   4146     *state = CCS_VALUE;
   4147     break;
   4148 
   4149   default:
   4150     break;
   4151   }
   4152 
   4153   *vs_israw = v_israw;
   4154   *vs       = v;
   4155   *type     = intype;
   4156   return 0;
   4157 }
   4158 
   4159 static int
   4160 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
   4161 		 ScanEnv* env)
   4162 {
   4163   int in_esc;
   4164   OnigCodePoint code;
   4165   OnigEncoding enc = env->enc;
   4166   UChar* p = from;
   4167 
   4168   in_esc = 0;
   4169   while (! PEND) {
   4170     if (ignore_escaped && in_esc) {
   4171       in_esc = 0;
   4172     }
   4173     else {
   4174       PFETCH_S(code);
   4175       if (code == c) return 1;
   4176       if (code == MC_ESC(env->syntax)) in_esc = 1;
   4177     }
   4178   }
   4179   return 0;
   4180 }
   4181 
   4182 static int
   4183 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
   4184 		 ScanEnv* env)
   4185 {
   4186   int r, neg, len, fetched, and_start;
   4187   OnigCodePoint v, vs;
   4188   UChar *p;
   4189   Node* node;
   4190   CClassNode *cc, *prev_cc;
   4191   CClassNode work_cc;
   4192 
   4193   enum CCSTATE state;
   4194   enum CCVALTYPE val_type, in_type;
   4195   int val_israw, in_israw;
   4196 
   4197   prev_cc = (CClassNode* )NULL;
   4198   *np = NULL_NODE;
   4199   r = fetch_token_in_cc(tok, src, end, env);
   4200   if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
   4201     neg = 1;
   4202     r = fetch_token_in_cc(tok, src, end, env);
   4203   }
   4204   else {
   4205     neg = 0;
   4206   }
   4207 
   4208   if (r < 0) return r;
   4209   if (r == TK_CC_CLOSE) {
   4210     if (! code_exist_check((OnigCodePoint )']',
   4211                            *src, env->pattern_end, 1, env))
   4212       return ONIGERR_EMPTY_CHAR_CLASS;
   4213 
   4214     CC_ESC_WARN(env, (UChar* )"]");
   4215     r = tok->type = TK_CHAR;  /* allow []...] */
   4216   }
   4217 
   4218   *np = node = node_new_cclass();
   4219   CHECK_NULL_RETURN_MEMERR(node);
   4220   cc = NCCLASS(node);
   4221 
   4222   and_start = 0;
   4223   state = CCS_START;
   4224   p = *src;
   4225   while (r != TK_CC_CLOSE) {
   4226     fetched = 0;
   4227     switch (r) {
   4228     case TK_CHAR:
   4229       len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
   4230       if (len > 1) {
   4231 	in_type = CCV_CODE_POINT;
   4232       }
   4233       else if (len < 0) {
   4234 	r = len;
   4235 	goto err;
   4236       }
   4237       else {
   4238       sb_char:
   4239 	in_type = CCV_SB;
   4240       }
   4241       v = (OnigCodePoint )tok->u.c;
   4242       in_israw = 0;
   4243       goto val_entry2;
   4244       break;
   4245 
   4246     case TK_RAW_BYTE:
   4247       /* tok->base != 0 : octal or hexadec. */
   4248       if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
   4249 	UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
   4250 	UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
   4251 	UChar* psave = p;
   4252 	int i, base = tok->base;
   4253 
   4254 	buf[0] = (UChar)tok->u.c;
   4255 	for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
   4256 	  r = fetch_token_in_cc(tok, &p, end, env);
   4257 	  if (r < 0) goto err;
   4258 	  if (r != TK_RAW_BYTE || tok->base != base) {
   4259 	    fetched = 1;
   4260 	    break;
   4261 	  }
   4262 	  buf[i] = (UChar)tok->u.c;
   4263 	}
   4264 
   4265 	if (i < ONIGENC_MBC_MINLEN(env->enc)) {
   4266 	  r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
   4267 	  goto err;
   4268 	}
   4269 
   4270 	len = enclen(env->enc, buf);
   4271 	if (i < len) {
   4272 	  r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
   4273 	  goto err;
   4274 	}
   4275 	else if (i > len) { /* fetch back */
   4276 	  p = psave;
   4277 	  for (i = 1; i < len; i++) {
   4278 	    r = fetch_token_in_cc(tok, &p, end, env);
   4279 	  }
   4280 	  fetched = 0;
   4281 	}
   4282 
   4283 	if (i == 1) {
   4284 	  v = (OnigCodePoint )buf[0];
   4285 	  goto raw_single;
   4286 	}
   4287 	else {
   4288 	  v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
   4289 	  in_type = CCV_CODE_POINT;
   4290 	}
   4291       }
   4292       else {
   4293 	v = (OnigCodePoint )tok->u.c;
   4294       raw_single:
   4295 	in_type = CCV_SB;
   4296       }
   4297       in_israw = 1;
   4298       goto val_entry2;
   4299       break;
   4300 
   4301     case TK_CODE_POINT:
   4302       v = tok->u.code;
   4303       in_israw = 1;
   4304     val_entry:
   4305       len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
   4306       if (len < 0) {
   4307 	r = len;
   4308 	goto err;
   4309       }
   4310       in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
   4311     val_entry2:
   4312       r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
   4313 			 &state, env);
   4314       if (r != 0) goto err;
   4315       break;
   4316 
   4317     case TK_POSIX_BRACKET_OPEN:
   4318       r = parse_posix_bracket(cc, &p, end, env);
   4319       if (r < 0) goto err;
   4320       if (r == 1) {  /* is not POSIX bracket */
   4321 	CC_ESC_WARN(env, (UChar* )"[");
   4322 	p = tok->backp;
   4323 	v = (OnigCodePoint )tok->u.c;
   4324 	in_israw = 0;
   4325 	goto val_entry;
   4326       }
   4327       goto next_class;
   4328       break;
   4329 
   4330     case TK_CHAR_TYPE:
   4331       r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
   4332       if (r != 0) return r;
   4333 
   4334     next_class:
   4335       r = next_state_class(cc, &vs, &val_type, &state, env);
   4336       if (r != 0) goto err;
   4337       break;
   4338 
   4339     case TK_CHAR_PROPERTY:
   4340       {
   4341 	int ctype;
   4342 
   4343 	ctype = fetch_char_property_to_ctype(&p, end, env);
   4344 	if (ctype < 0) return ctype;
   4345 	r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
   4346 	if (r != 0) return r;
   4347 	goto next_class;
   4348       }
   4349       break;
   4350 
   4351     case TK_CC_RANGE:
   4352       if (state == CCS_VALUE) {
   4353 	r = fetch_token_in_cc(tok, &p, end, env);
   4354 	if (r < 0) goto err;
   4355 	fetched = 1;
   4356 	if (r == TK_CC_CLOSE) { /* allow [x-] */
   4357 	range_end_val:
   4358 	  v = (OnigCodePoint )'-';
   4359 	  in_israw = 0;
   4360 	  goto val_entry;
   4361 	}
   4362 	else if (r == TK_CC_AND) {
   4363 	  CC_ESC_WARN(env, (UChar* )"-");
   4364 	  goto range_end_val;
   4365 	}
   4366 	state = CCS_RANGE;
   4367       }
   4368       else if (state == CCS_START) {
   4369 	/* [-xa] is allowed */
   4370 	v = (OnigCodePoint )tok->u.c;
   4371 	in_israw = 0;
   4372 
   4373 	r = fetch_token_in_cc(tok, &p, end, env);
   4374 	if (r < 0) goto err;
   4375 	fetched = 1;
   4376 	/* [--x] or [a&&-x] is warned. */
   4377 	if (r == TK_CC_RANGE || and_start != 0)
   4378 	  CC_ESC_WARN(env, (UChar* )"-");
   4379 
   4380 	goto val_entry;
   4381       }
   4382       else if (state == CCS_RANGE) {
   4383 	CC_ESC_WARN(env, (UChar* )"-");
   4384 	goto sb_char;  /* [!--x] is allowed */
   4385       }
   4386       else { /* CCS_COMPLETE */
   4387 	r = fetch_token_in_cc(tok, &p, end, env);
   4388 	if (r < 0) goto err;
   4389 	fetched = 1;
   4390 	if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
   4391 	else if (r == TK_CC_AND) {
   4392 	  CC_ESC_WARN(env, (UChar* )"-");
   4393 	  goto range_end_val;
   4394 	}
   4395 
   4396 	if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
   4397 	  CC_ESC_WARN(env, (UChar* )"-");
   4398 	  goto sb_char;   /* [0-9-a] is allowed as [0-9\-a] */
   4399 	}
   4400 	r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
   4401 	goto err;
   4402       }
   4403       break;
   4404 
   4405     case TK_CC_CC_OPEN: /* [ */
   4406       {
   4407 	Node *anode;
   4408 	CClassNode* acc;
   4409 
   4410 	r = parse_char_class(&anode, tok, &p, end, env);
   4411 	if (r != 0) goto cc_open_err;
   4412 	acc = NCCLASS(anode);
   4413 	r = or_cclass(cc, acc, env->enc);
   4414 
   4415 	onig_node_free(anode);
   4416       cc_open_err:
   4417 	if (r != 0) goto err;
   4418       }
   4419       break;
   4420 
   4421     case TK_CC_AND: /* && */
   4422       {
   4423 	if (state == CCS_VALUE) {
   4424 	  r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
   4425 			     &val_type, &state, env);
   4426 	  if (r != 0) goto err;
   4427 	}
   4428 	/* initialize local variables */
   4429 	and_start = 1;
   4430 	state = CCS_START;
   4431 
   4432 	if (IS_NOT_NULL(prev_cc)) {
   4433 	  r = and_cclass(prev_cc, cc, env->enc);
   4434 	  if (r != 0) goto err;
   4435 	  bbuf_free(cc->mbuf);
   4436 	}
   4437 	else {
   4438 	  prev_cc = cc;
   4439 	  cc = &work_cc;
   4440 	}
   4441 	initialize_cclass(cc);
   4442       }
   4443       break;
   4444 
   4445     case TK_EOT:
   4446       r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
   4447       goto err;
   4448       break;
   4449     default:
   4450       r = ONIGERR_PARSER_BUG;
   4451       goto err;
   4452       break;
   4453     }
   4454 
   4455     if (fetched)
   4456       r = tok->type;
   4457     else {
   4458       r = fetch_token_in_cc(tok, &p, end, env);
   4459       if (r < 0) goto err;
   4460     }
   4461   }
   4462 
   4463   if (state == CCS_VALUE) {
   4464     r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
   4465 		       &val_type, &state, env);
   4466     if (r != 0) goto err;
   4467   }
   4468 
   4469   if (IS_NOT_NULL(prev_cc)) {
   4470     r = and_cclass(prev_cc, cc, env->enc);
   4471     if (r != 0) goto err;
   4472     bbuf_free(cc->mbuf);
   4473     cc = prev_cc;
   4474   }
   4475 
   4476   if (neg != 0)
   4477     NCCLASS_SET_NOT(cc);
   4478   else
   4479     NCCLASS_CLEAR_NOT(cc);
   4480   if (IS_NCCLASS_NOT(cc) &&
   4481       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
   4482     int is_empty;
   4483 
   4484     is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
   4485     if (is_empty != 0)
   4486       BITSET_IS_EMPTY(cc->bs, is_empty);
   4487 
   4488     if (is_empty == 0) {
   4489 #define NEWLINE_CODE    0x0a
   4490 
   4491       if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
   4492         if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
   4493           BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
   4494         else
   4495           add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
   4496       }
   4497     }
   4498   }
   4499   *src = p;
   4500   return 0;
   4501 
   4502  err:
   4503   if (cc != NCCLASS(*np))
   4504     bbuf_free(cc->mbuf);
   4505   onig_node_free(*np);
   4506   return r;
   4507 }
   4508 
   4509 static int parse_subexp(Node** top, OnigToken* tok, int term,
   4510 			UChar** src, UChar* end, ScanEnv* env);
   4511 
   4512 static int
   4513 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
   4514 	      ScanEnv* env)
   4515 {
   4516   int r, num;
   4517   Node *target;
   4518   OnigOptionType option;
   4519   OnigCodePoint c;
   4520   OnigEncoding enc = env->enc;
   4521 
   4522 #ifdef USE_NAMED_GROUP
   4523   int list_capture;
   4524 #endif
   4525 
   4526   UChar* p = *src;
   4527   PFETCH_READY;
   4528 
   4529   *np = NULL;
   4530   if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
   4531 
   4532   option = env->option;
   4533   if (PPEEK_IS('?') &&
   4534       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
   4535     PINC;
   4536     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
   4537 
   4538     PFETCH(c);
   4539     switch (c) {
   4540     case ':':   /* (?:...) grouping only */
   4541     group:
   4542       r = fetch_token(tok, &p, end, env);
   4543       if (r < 0) return r;
   4544       r = parse_subexp(np, tok, term, &p, end, env);
   4545       if (r < 0) return r;
   4546       *src = p;
   4547       return 1; /* group */
   4548       break;
   4549 
   4550     case '=':
   4551       *np = onig_node_new_anchor(ANCHOR_PREC_READ);
   4552       break;
   4553     case '!':  /*         preceding read */
   4554       *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
   4555       break;
   4556     case '>':            /* (?>...) stop backtrack */
   4557       *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
   4558       break;
   4559 
   4560 #ifdef USE_NAMED_GROUP
   4561     case '\'':
   4562       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
   4563 	goto named_group1;
   4564       }
   4565       else
   4566 	return ONIGERR_UNDEFINED_GROUP_OPTION;
   4567       break;
   4568 #endif
   4569 
   4570     case '<':   /* look behind (?<=...), (?<!...) */
   4571       PFETCH(c);
   4572       if (c == '=')
   4573 	*np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
   4574       else if (c == '!')
   4575 	*np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
   4576 #ifdef USE_NAMED_GROUP
   4577       else {
   4578 	if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
   4579 	  UChar *name;
   4580 	  UChar *name_end;
   4581 
   4582 	  PUNFETCH;
   4583 	  c = '<';
   4584 
   4585 	named_group1:
   4586 	  list_capture = 0;
   4587 
   4588 	named_group2:
   4589 	  name = p;
   4590 	  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
   4591 	  if (r < 0) return r;
   4592 
   4593 	  num = scan_env_add_mem_entry(env);
   4594 	  if (num < 0) return num;
   4595 	  if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
   4596 	    return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
   4597 
   4598 	  r = name_add(env->reg, name, name_end, num, env);
   4599 	  if (r != 0) return r;
   4600 	  *np = node_new_enclose_memory(env->option, 1);
   4601 	  CHECK_NULL_RETURN_MEMERR(*np);
   4602 	  NENCLOSE(*np)->regnum = num;
   4603 	  if (list_capture != 0)
   4604 	    BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
   4605 	  env->num_named++;
   4606 	}
   4607 	else {
   4608 	  return ONIGERR_UNDEFINED_GROUP_OPTION;
   4609 	}
   4610       }
   4611 #else
   4612       else {
   4613 	return ONIGERR_UNDEFINED_GROUP_OPTION;
   4614       }
   4615 #endif
   4616       break;
   4617 
   4618     case '@':
   4619       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
   4620 #ifdef USE_NAMED_GROUP
   4621 	if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
   4622 	  PFETCH(c);
   4623 	  if (c == '<' || c == '\'') {
   4624 	    list_capture = 1;
   4625 	    goto named_group2; /* (?@<name>...) */
   4626 	  }
   4627 	  PUNFETCH;
   4628 	}
   4629 #endif
   4630 	*np = node_new_enclose_memory(env->option, 0);
   4631 	CHECK_NULL_RETURN_MEMERR(*np);
   4632 	num = scan_env_add_mem_entry(env);
   4633 	if (num < 0) {
   4634 	  onig_node_free(*np);
   4635 	  return num;
   4636 	}
   4637 	else if (num >= (int )BIT_STATUS_BITS_NUM) {
   4638 	  onig_node_free(*np);
   4639 	  return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
   4640 	}
   4641 	NENCLOSE(*np)->regnum = num;
   4642 	BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
   4643       }
   4644       else {
   4645 	return ONIGERR_UNDEFINED_GROUP_OPTION;
   4646       }
   4647       break;
   4648 
   4649 #ifdef USE_POSIXLINE_OPTION
   4650     case 'p':
   4651 #endif
   4652     case '-': case 'i': case 'm': case 's': case 'x':
   4653       {
   4654 	int neg = 0;
   4655 
   4656 	while (1) {
   4657 	  switch (c) {
   4658 	  case ':':
   4659 	  case ')':
   4660 	  break;
   4661 
   4662 	  case '-':  neg = 1; break;
   4663 	  case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;
   4664 	  case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
   4665 	  case 's':
   4666 	    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
   4667 	      ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
   4668 	    }
   4669 	    else
   4670 	      return ONIGERR_UNDEFINED_GROUP_OPTION;
   4671 	    break;
   4672 
   4673 	  case 'm':
   4674 	    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
   4675 	      ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
   4676 	    }
   4677 	    else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
   4678 	      ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
   4679 	    }
   4680 	    else
   4681 	      return ONIGERR_UNDEFINED_GROUP_OPTION;
   4682 	    break;
   4683 #ifdef USE_POSIXLINE_OPTION
   4684 	  case 'p':
   4685 	    ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
   4686 	    break;
   4687 #endif
   4688 	  default:
   4689 	    return ONIGERR_UNDEFINED_GROUP_OPTION;
   4690 	  }
   4691 
   4692 	  if (c == ')') {
   4693 	    *np = node_new_option(option);
   4694 	    CHECK_NULL_RETURN_MEMERR(*np);
   4695 	    *src = p;
   4696 	    return 2; /* option only */
   4697 	  }
   4698 	  else if (c == ':') {
   4699 	    OnigOptionType prev = env->option;
   4700 
   4701 	    env->option     = option;
   4702 	    r = fetch_token(tok, &p, end, env);
   4703 	    if (r < 0) return r;
   4704 	    r = parse_subexp(&target, tok, term, &p, end, env);
   4705 	    env->option = prev;
   4706 	    if (r < 0) return r;
   4707 	    *np = node_new_option(option);
   4708 	    CHECK_NULL_RETURN_MEMERR(*np);
   4709 	    NENCLOSE(*np)->target = target;
   4710 	    *src = p;
   4711 	    return 0;
   4712 	  }
   4713 
   4714 	  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
   4715 	  PFETCH(c);
   4716 	}
   4717       }
   4718       break;
   4719 
   4720     default:
   4721       return ONIGERR_UNDEFINED_GROUP_OPTION;
   4722     }
   4723   }
   4724   else {
   4725     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
   4726       goto group;
   4727 
   4728     *np = node_new_enclose_memory(env->option, 0);
   4729     CHECK_NULL_RETURN_MEMERR(*np);
   4730     num = scan_env_add_mem_entry(env);
   4731     if (num < 0) return num;
   4732     NENCLOSE(*np)->regnum = num;
   4733   }
   4734 
   4735   CHECK_NULL_RETURN_MEMERR(*np);
   4736   r = fetch_token(tok, &p, end, env);
   4737   if (r < 0) return r;
   4738   r = parse_subexp(&target, tok, term, &p, end, env);
   4739   if (r < 0) return r;
   4740 
   4741   if (NTYPE(*np) == NT_ANCHOR)
   4742     NANCHOR(*np)->target = target;
   4743   else {
   4744     NENCLOSE(*np)->target = target;
   4745     if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
   4746       /* Don't move this to previous of parse_subexp() */
   4747       r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
   4748       if (r != 0) return r;
   4749     }
   4750   }
   4751 
   4752   *src = p;
   4753   return 0;
   4754 }
   4755 
   4756 static const char* PopularQStr[] = {
   4757   "?", "*", "+", "??", "*?", "+?"
   4758 };
   4759 
   4760 static const char* ReduceQStr[] = {
   4761   "", "", "*", "*?", "??", "+ and ??", "+? and ?"
   4762 };
   4763 
   4764 static int
   4765 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
   4766 {
   4767   QtfrNode* qn;
   4768 
   4769   qn = NQTFR(qnode);
   4770   if (qn->lower == 1 && qn->upper == 1) {
   4771     return 1;
   4772   }
   4773 
   4774   switch (NTYPE(target)) {
   4775   case NT_STR:
   4776     if (! group) {
   4777       StrNode* sn = NSTR(target);
   4778       if (str_node_can_be_split(sn, env->enc)) {
   4779 	Node* n = str_node_split_last_char(sn, env->enc);
   4780 	if (IS_NOT_NULL(n)) {
   4781 	  qn->target = n;
   4782 	  return 2;
   4783 	}
   4784       }
   4785     }
   4786     break;
   4787 
   4788   case NT_QTFR:
   4789     { /* check redundant double repeat. */
   4790       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
   4791       QtfrNode* qnt   = NQTFR(target);
   4792       int nestq_num   = popular_quantifier_num(qn);
   4793       int targetq_num = popular_quantifier_num(qnt);
   4794       if (nestq_num < 0 || targetq_num < 0) {
   4795         return ONIGERR_TYPE_BUG;
   4796       }
   4797 
   4798 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
   4799       if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
   4800 	  IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
   4801         UChar buf[WARN_BUFSIZE];
   4802 
   4803         switch(ReduceTypeTable[targetq_num][nestq_num]) {
   4804         case RQ_ASIS:
   4805           break;
   4806 
   4807         case RQ_DEL:
   4808           if (onig_verb_warn != onig_null_warn) {
   4809             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
   4810                                  env->pattern, env->pattern_end,
   4811                                  (UChar* )"redundant nested repeat operator");
   4812             (*onig_verb_warn)((char* )buf);
   4813           }
   4814           goto warn_exit;
   4815           break;
   4816 
   4817         default:
   4818           if (onig_verb_warn != onig_null_warn) {
   4819             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
   4820                                        env->pattern, env->pattern_end,
   4821             (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
   4822             PopularQStr[targetq_num], PopularQStr[nestq_num],
   4823             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
   4824             (*onig_verb_warn)((char* )buf);
   4825           }
   4826           goto warn_exit;
   4827           break;
   4828         }
   4829       }
   4830 
   4831     warn_exit:
   4832 #endif
   4833       if (targetq_num >= 0) {
   4834 	if (nestq_num >= 0) {
   4835 	  onig_reduce_nested_quantifier(qnode, target);
   4836 	  goto q_exit;
   4837 	}
   4838 	else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
   4839 	  /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
   4840 	  if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
   4841 	    qn->upper = (qn->lower == 0 ? 1 : qn->lower);
   4842 	  }
   4843 	}
   4844       }
   4845     }
   4846     break;
   4847 
   4848   default:
   4849     break;
   4850   }
   4851 
   4852   qn->target = target;
   4853  q_exit:
   4854   return 0;
   4855 }
   4856 
   4857 
   4858 #ifdef USE_SHARED_CCLASS_TABLE
   4859 
   4860 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
   4861 
   4862 /* for ctype node hash table */
   4863 
   4864 typedef struct {
   4865   OnigEncoding enc;
   4866   int not;
   4867   int type;
   4868 } type_cclass_key;
   4869 
   4870 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
   4871 {
   4872   if (x->type != y->type) return 1;
   4873   if (x->enc  != y->enc)  return 1;
   4874   if (x->not  != y->not)  return 1;
   4875   return 0;
   4876 }
   4877 
   4878 static int type_cclass_hash(type_cclass_key* key)
   4879 {
   4880   int i, val;
   4881   UChar *p;
   4882 
   4883   val = 0;
   4884 
   4885   p = (UChar* )&(key->enc);
   4886   for (i = 0; i < (int )sizeof(key->enc); i++) {
   4887     val = val * 997 + (int )*p++;
   4888   }
   4889 
   4890   p = (UChar* )(&key->type);
   4891   for (i = 0; i < (int )sizeof(key->type); i++) {
   4892     val = val * 997 + (int )*p++;
   4893   }
   4894 
   4895   val += key->not;
   4896   return val + (val >> 5);
   4897 }
   4898 
   4899 static struct st_hash_type type_type_cclass_hash = {
   4900     type_cclass_cmp,
   4901     type_cclass_hash,
   4902 };
   4903 
   4904 static st_table* OnigTypeCClassTable;
   4905 
   4906 
   4907 static int
   4908 i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
   4909 {
   4910   if (IS_NOT_NULL(node)) {
   4911     CClassNode* cc = NCCLASS(node);
   4912     if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
   4913     xfree(node);
   4914   }
   4915 
   4916   if (IS_NOT_NULL(key)) xfree(key);
   4917   return ST_DELETE;
   4918 }
   4919 
   4920 extern int
   4921 onig_free_shared_cclass_table(void)
   4922 {
   4923   if (IS_NOT_NULL(OnigTypeCClassTable)) {
   4924     onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
   4925     onig_st_free_table(OnigTypeCClassTable);
   4926     OnigTypeCClassTable = NULL;
   4927   }
   4928 
   4929   return 0;
   4930 }
   4931 
   4932 #endif /* USE_SHARED_CCLASS_TABLE */
   4933 
   4934 
   4935 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
   4936 static int
   4937 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
   4938 {
   4939   BBuf *tbuf;
   4940   int r;
   4941 
   4942   if (IS_NCCLASS_NOT(cc)) {
   4943     bitset_invert(cc->bs);
   4944 
   4945     if (! ONIGENC_IS_SINGLEBYTE(enc)) {
   4946       r = not_code_range_buf(enc, cc->mbuf, &tbuf);
   4947       if (r != 0) return r;
   4948 
   4949       bbuf_free(cc->mbuf);
   4950       cc->mbuf = tbuf;
   4951     }
   4952 
   4953     NCCLASS_CLEAR_NOT(cc);
   4954   }
   4955 
   4956   return 0;
   4957 }
   4958 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
   4959 
   4960 typedef struct {
   4961   ScanEnv*    env;
   4962   CClassNode* cc;
   4963   Node*       alt_root;
   4964   Node**      ptail;
   4965 } IApplyCaseFoldArg;
   4966 
   4967 static int
   4968 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
   4969 		  int to_len, void* arg)
   4970 {
   4971   IApplyCaseFoldArg* iarg;
   4972   ScanEnv* env;
   4973   CClassNode* cc;
   4974   BitSetRef bs;
   4975 
   4976   iarg = (IApplyCaseFoldArg* )arg;
   4977   env = iarg->env;
   4978   cc  = iarg->cc;
   4979   bs = cc->bs;
   4980 
   4981   if (to_len == 1) {
   4982     int is_in = onig_is_code_in_cc(env->enc, from, cc);
   4983 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
   4984     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
   4985 	(is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
   4986       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
   4987 	add_code_range(&(cc->mbuf), env, *to, *to);
   4988       }
   4989       else {
   4990 	BITSET_SET_BIT(bs, *to);
   4991       }
   4992     }
   4993 #else
   4994     if (is_in != 0) {
   4995       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
   4996 	if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
   4997 	add_code_range(&(cc->mbuf), env, *to, *to);
   4998       }
   4999       else {
   5000 	if (IS_NCCLASS_NOT(cc)) {
   5001 	  BITSET_CLEAR_BIT(bs, *to);
   5002 	}
   5003 	else
   5004 	  BITSET_SET_BIT(bs, *to);
   5005       }
   5006     }
   5007 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
   5008   }
   5009   else {
   5010     int r, i, len;
   5011     UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
   5012     Node *snode = NULL_NODE;
   5013 
   5014     if (onig_is_code_in_cc(env->enc, from, cc)
   5015 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
   5016 	&& !IS_NCCLASS_NOT(cc)
   5017 #endif
   5018 	) {
   5019       for (i = 0; i < to_len; i++) {
   5020 	len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
   5021 	if (i == 0) {
   5022 	  snode = onig_node_new_str(buf, buf + len);
   5023 	  CHECK_NULL_RETURN_MEMERR(snode);
   5024 
   5025 	  /* char-class expanded multi-char only
   5026 	     compare with string folded at match time. */
   5027 	  NSTRING_SET_AMBIG(snode);
   5028 	}
   5029 	else {
   5030 	  r = onig_node_str_cat(snode, buf, buf + len);
   5031 	  if (r < 0) {
   5032 	    onig_node_free(snode);
   5033 	    return r;
   5034 	  }
   5035 	}
   5036       }
   5037 
   5038       *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
   5039       CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
   5040       iarg->ptail = &(NCDR((*(iarg->ptail))));
   5041     }
   5042   }
   5043 
   5044   return 0;
   5045 }
   5046 
   5047 static int
   5048 parse_exp(Node** np, OnigToken* tok, int term,
   5049 	  UChar** src, UChar* end, ScanEnv* env)
   5050 {
   5051   int r, len, group = 0;
   5052   Node* qn;
   5053   Node** targetp;
   5054 
   5055   *np = NULL;
   5056   if (tok->type == (enum TokenSyms )term)
   5057     goto end_of_token;
   5058 
   5059   switch (tok->type) {
   5060   case TK_ALT:
   5061   case TK_EOT:
   5062   end_of_token:
   5063   *np = node_new_empty();
   5064   return tok->type;
   5065   break;
   5066 
   5067   case TK_SUBEXP_OPEN:
   5068     r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
   5069     if (r < 0) return r;
   5070     if (r == 1) group = 1;
   5071     else if (r == 2) { /* option only */
   5072       Node* target;
   5073       OnigOptionType prev = env->option;
   5074 
   5075       env->option = NENCLOSE(*np)->option;
   5076       r = fetch_token(tok, src, end, env);
   5077       if (r < 0) return r;
   5078       r = parse_subexp(&target, tok, term, src, end, env);
   5079       env->option = prev;
   5080       if (r < 0) return r;
   5081       NENCLOSE(*np)->target = target;
   5082       return tok->type;
   5083     }
   5084     break;
   5085 
   5086   case TK_SUBEXP_CLOSE:
   5087     if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
   5088       return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
   5089 
   5090     if (tok->escaped) goto tk_raw_byte;
   5091     else goto tk_byte;
   5092     break;
   5093 
   5094   case TK_STRING:
   5095   tk_byte:
   5096     {
   5097       *np = node_new_str(tok->backp, *src);
   5098       CHECK_NULL_RETURN_MEMERR(*np);
   5099 
   5100       while (1) {
   5101 	r = fetch_token(tok, src, end, env);
   5102 	if (r < 0) return r;
   5103 	if (r != TK_STRING) break;
   5104 
   5105 	r = onig_node_str_cat(*np, tok->backp, *src);
   5106 	if (r < 0) return r;
   5107       }
   5108 
   5109     string_end:
   5110       targetp = np;
   5111       goto repeat;
   5112     }
   5113     break;
   5114 
   5115   case TK_RAW_BYTE:
   5116   tk_raw_byte:
   5117     {
   5118       *np = node_new_str_raw_char((UChar )tok->u.c);
   5119       CHECK_NULL_RETURN_MEMERR(*np);
   5120       len = 1;
   5121       while (1) {
   5122 	if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
   5123 	  if (len == enclen(env->enc, NSTR(*np)->s)) {
   5124 	    r = fetch_token(tok, src, end, env);
   5125 	    NSTRING_CLEAR_RAW(*np);
   5126 	    goto string_end;
   5127 	  }
   5128 	}
   5129 
   5130 	r = fetch_token(tok, src, end, env);
   5131 	if (r < 0) return r;
   5132 	if (r != TK_RAW_BYTE) {
   5133 	  /* Don't use this, it is wrong for little endian encodings. */
   5134 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
   5135 	  int rem;
   5136 	  if (len < ONIGENC_MBC_MINLEN(env->enc)) {
   5137 	    rem = ONIGENC_MBC_MINLEN(env->enc) - len;
   5138 	    (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
   5139 	    if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
   5140 	      NSTRING_CLEAR_RAW(*np);
   5141 	      goto string_end;
   5142 	    }
   5143 	  }
   5144 #endif
   5145 	  return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
   5146 	}
   5147 
   5148 	r = node_str_cat_char(*np, (UChar )tok->u.c);
   5149 	if (r < 0) return r;
   5150 
   5151 	len++;
   5152       }
   5153     }
   5154     break;
   5155 
   5156   case TK_CODE_POINT:
   5157     {
   5158       UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
   5159       int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
   5160       if (num < 0) return num;
   5161 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
   5162       *np = node_new_str_raw(buf, buf + num);
   5163 #else
   5164       *np = node_new_str(buf, buf + num);
   5165 #endif
   5166       CHECK_NULL_RETURN_MEMERR(*np);
   5167     }
   5168     break;
   5169 
   5170   case TK_QUOTE_OPEN:
   5171     {
   5172       OnigCodePoint end_op[2];
   5173       UChar *qstart, *qend, *nextp;
   5174 
   5175       end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
   5176       end_op[1] = (OnigCodePoint )'E';
   5177       qstart = *src;
   5178       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
   5179       if (IS_NULL(qend)) {
   5180 	nextp = qend = end;
   5181       }
   5182       *np = node_new_str(qstart, qend);
   5183       CHECK_NULL_RETURN_MEMERR(*np);
   5184       *src = nextp;
   5185     }
   5186     break;
   5187 
   5188   case TK_CHAR_TYPE:
   5189     {
   5190       switch (tok->u.prop.ctype) {
   5191       case ONIGENC_CTYPE_WORD:
   5192 	*np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
   5193 	CHECK_NULL_RETURN_MEMERR(*np);
   5194 	break;
   5195 
   5196       case ONIGENC_CTYPE_SPACE:
   5197       case ONIGENC_CTYPE_DIGIT:
   5198       case ONIGENC_CTYPE_XDIGIT:
   5199 	{
   5200 	  CClassNode* cc;
   5201 
   5202 #ifdef USE_SHARED_CCLASS_TABLE
   5203           const OnigCodePoint *mbr;
   5204 	  OnigCodePoint sb_out;
   5205 
   5206           r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
   5207 					   &sb_out, &mbr);
   5208           if (r == 0 &&
   5209               ONIGENC_CODE_RANGE_NUM(mbr)
   5210               >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
   5211             type_cclass_key  key;
   5212             type_cclass_key* new_key;
   5213 
   5214             key.enc  = env->enc;
   5215             key.not  = tok->u.prop.not;
   5216             key.type = tok->u.prop.ctype;
   5217 
   5218             THREAD_ATOMIC_START;
   5219 
   5220             if (IS_NULL(OnigTypeCClassTable)) {
   5221               OnigTypeCClassTable
   5222                 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
   5223               if (IS_NULL(OnigTypeCClassTable)) {
   5224                 THREAD_ATOMIC_END;
   5225                 return ONIGERR_MEMORY;
   5226               }
   5227             }
   5228             else {
   5229               if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )(UINTN)&key,
   5230                                  (st_data_t* )np)) {
   5231                 THREAD_ATOMIC_END;
   5232                 break;
   5233               }
   5234             }
   5235 
   5236             *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
   5237 						     sb_out, mbr);
   5238             if (IS_NULL(*np)) {
   5239               THREAD_ATOMIC_END;
   5240               return ONIGERR_MEMORY;
   5241             }
   5242 
   5243             cc = NCCLASS(*np);
   5244             NCCLASS_SET_SHARE(cc);
   5245             new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
   5246             CHECK_NULL_RETURN_MEMERR(new_key);
   5247 	    xmemcpy(new_key, &key, sizeof(type_cclass_key));
   5248             onig_st_add_direct(OnigTypeCClassTable, (st_data_t )(UINTN)new_key,
   5249                                (st_data_t )(UINTN)*np);
   5250 
   5251             THREAD_ATOMIC_END;
   5252           }
   5253           else {
   5254 #endif
   5255             *np = node_new_cclass();
   5256             CHECK_NULL_RETURN_MEMERR(*np);
   5257             cc = NCCLASS(*np);
   5258             add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
   5259             if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
   5260 #ifdef USE_SHARED_CCLASS_TABLE
   5261           }
   5262 #endif
   5263 	}
   5264 	break;
   5265 
   5266       default:
   5267 	return ONIGERR_PARSER_BUG;
   5268 	break;
   5269       }
   5270     }
   5271     break;
   5272 
   5273   case TK_CHAR_PROPERTY:
   5274     r = parse_char_property(np, tok, src, end, env);
   5275     if (r != 0) return r;
   5276     break;
   5277 
   5278   case TK_CC_OPEN:
   5279     {
   5280       CClassNode* cc;
   5281 
   5282       r = parse_char_class(np, tok, src, end, env);
   5283       if (r != 0) return r;
   5284 
   5285       cc = NCCLASS(*np);
   5286       if (IS_IGNORECASE(env->option)) {
   5287 	IApplyCaseFoldArg iarg;
   5288 
   5289 	iarg.env      = env;
   5290 	iarg.cc       = cc;
   5291 	iarg.alt_root = NULL_NODE;
   5292 	iarg.ptail    = &(iarg.alt_root);
   5293 
   5294 	r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
   5295 					i_apply_case_fold, &iarg);
   5296 	if (r != 0) {
   5297 	  onig_node_free(iarg.alt_root);
   5298 	  return r;
   5299 	}
   5300 	if (IS_NOT_NULL(iarg.alt_root)) {
   5301           Node* work = onig_node_new_alt(*np, iarg.alt_root);
   5302           if (IS_NULL(work)) {
   5303             onig_node_free(iarg.alt_root);
   5304             return ONIGERR_MEMORY;
   5305           }
   5306           *np = work;
   5307 	}
   5308       }
   5309     }
   5310     break;
   5311 
   5312   case TK_ANYCHAR:
   5313     *np = node_new_anychar();
   5314     CHECK_NULL_RETURN_MEMERR(*np);
   5315     break;
   5316 
   5317   case TK_ANYCHAR_ANYTIME:
   5318     *np = node_new_anychar();
   5319     CHECK_NULL_RETURN_MEMERR(*np);
   5320     qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
   5321     CHECK_NULL_RETURN_MEMERR(qn);
   5322     NQTFR(qn)->target = *np;
   5323     *np = qn;
   5324     break;
   5325 
   5326   case TK_BACKREF:
   5327     len = tok->u.backref.num;
   5328     *np = node_new_backref(len,
   5329 		   (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
   5330 			   tok->u.backref.by_name,
   5331 #ifdef USE_BACKREF_WITH_LEVEL
   5332 			   tok->u.backref.exist_level,
   5333 			   tok->u.backref.level,
   5334 #endif
   5335 			   env);
   5336     CHECK_NULL_RETURN_MEMERR(*np);
   5337     break;
   5338 
   5339 #ifdef USE_SUBEXP_CALL
   5340   case TK_CALL:
   5341     {
   5342       int gnum = tok->u.call.gnum;
   5343 
   5344       if (gnum < 0) {
   5345 	gnum = BACKREF_REL_TO_ABS(gnum, env);
   5346 	if (gnum <= 0)
   5347 	  return ONIGERR_INVALID_BACKREF;
   5348       }
   5349       *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
   5350       CHECK_NULL_RETURN_MEMERR(*np);
   5351       env->num_call++;
   5352     }
   5353     break;
   5354 #endif
   5355 
   5356   case TK_ANCHOR:
   5357     *np = onig_node_new_anchor(tok->u.anchor);
   5358     CHECK_NULL_RETURN_MEMERR(*np);
   5359     break;
   5360 
   5361   case TK_OP_REPEAT:
   5362   case TK_INTERVAL:
   5363     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
   5364       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
   5365 	return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
   5366       else
   5367 	*np = node_new_empty();
   5368   CHECK_NULL_RETURN_MEMERR(*np);
   5369     }
   5370     else {
   5371       goto tk_byte;
   5372     }
   5373     break;
   5374 
   5375   default:
   5376     return ONIGERR_PARSER_BUG;
   5377     break;
   5378   }
   5379 
   5380   {
   5381     targetp = np;
   5382 
   5383   re_entry:
   5384     r = fetch_token(tok, src, end, env);
   5385     if (r < 0) return r;
   5386 
   5387   repeat:
   5388     if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
   5389       if (is_invalid_quantifier_target(*targetp))
   5390 	return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
   5391 
   5392       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
   5393 			       (r == TK_INTERVAL ? 1 : 0));
   5394       CHECK_NULL_RETURN_MEMERR(qn);
   5395       NQTFR(qn)->greedy = tok->u.repeat.greedy;
   5396       r = set_quantifier(qn, *targetp, group, env);
   5397       if (r < 0) {
   5398 	onig_node_free(qn);
   5399 	return r;
   5400       }
   5401 
   5402       if (tok->u.repeat.possessive != 0) {
   5403 	Node* en;
   5404 	en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
   5405 	if (IS_NULL(en)) {
   5406 	  onig_node_free(qn);
   5407 	  return ONIGERR_MEMORY;
   5408 	}
   5409 	NENCLOSE(en)->target = qn;
   5410 	qn = en;
   5411       }
   5412 
   5413       if (r == 0) {
   5414 	*targetp = qn;
   5415       }
   5416       else if (r == 1) {
   5417 	onig_node_free(qn);
   5418       }
   5419       else if (r == 2) { /* split case: /abc+/ */
   5420 	Node *tmp;
   5421 
   5422 	*targetp = node_new_list(*targetp, NULL);
   5423 	if (IS_NULL(*targetp)) {
   5424 	  onig_node_free(qn);
   5425 	  return ONIGERR_MEMORY;
   5426 	}
   5427 	tmp = NCDR(*targetp) = node_new_list(qn, NULL);
   5428 	if (IS_NULL(tmp)) {
   5429 	  onig_node_free(qn);
   5430 	  return ONIGERR_MEMORY;
   5431 	}
   5432 	targetp = &(NCAR(tmp));
   5433       }
   5434       goto re_entry;
   5435     }
   5436   }
   5437 
   5438   return r;
   5439 }
   5440 
   5441 static int
   5442 parse_branch(Node** top, OnigToken* tok, int term,
   5443 	     UChar** src, UChar* end, ScanEnv* env)
   5444 {
   5445   int r;
   5446   Node *node, **headp;
   5447 
   5448   *top = NULL;
   5449   r = parse_exp(&node, tok, term, src, end, env);
   5450   if (r < 0) return r;
   5451 
   5452   if (r == TK_EOT || r == term || r == TK_ALT) {
   5453     *top = node;
   5454   }
   5455   else {
   5456     *top  = node_new_list(node, NULL);
   5457     CHECK_NULL_RETURN_MEMERR(*top);
   5458     headp = &(NCDR(*top));
   5459     while (r != TK_EOT && r != term && r != TK_ALT) {
   5460       r = parse_exp(&node, tok, term, src, end, env);
   5461       CHECK_NULL_RETURN_MEMERR(node);
   5462       if (r < 0) return r;
   5463 
   5464       if (NTYPE(node) == NT_LIST) {
   5465 	*headp = node;
   5466 	while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
   5467 	headp = &(NCDR(node));
   5468       }
   5469       else {
   5470 	*headp = node_new_list(node, NULL);
   5471 	headp = &(NCDR(*headp));
   5472       }
   5473     }
   5474   }
   5475 
   5476   return r;
   5477 }
   5478 
   5479 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
   5480 static int
   5481 parse_subexp(Node** top, OnigToken* tok, int term,
   5482 	     UChar** src, UChar* end, ScanEnv* env)
   5483 {
   5484   int r;
   5485   Node *node, **headp;
   5486 
   5487   *top = NULL;
   5488   r = parse_branch(&node, tok, term, src, end, env);
   5489   if (r < 0) {
   5490     onig_node_free(node);
   5491     return r;
   5492   }
   5493 
   5494   if (r == term) {
   5495     *top = node;
   5496   }
   5497   else if (r == TK_ALT) {
   5498     *top  = onig_node_new_alt(node, NULL);
   5499     CHECK_NULL_RETURN_MEMERR(*top);
   5500     headp = &(NCDR(*top));
   5501     while (r == TK_ALT) {
   5502       r = fetch_token(tok, src, end, env);
   5503       if (r < 0) return r;
   5504       r = parse_branch(&node, tok, term, src, end, env);
   5505       if (r < 0) return r;
   5506 
   5507       *headp = onig_node_new_alt(node, NULL);
   5508       headp = &(NCDR(*headp));
   5509     }
   5510 
   5511     if (tok->type != (enum TokenSyms )term)
   5512       goto err;
   5513   }
   5514   else {
   5515   err:
   5516     if (term == TK_SUBEXP_CLOSE)
   5517       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
   5518     else
   5519       return ONIGERR_PARSER_BUG;
   5520   }
   5521 
   5522   return r;
   5523 }
   5524 
   5525 static int
   5526 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
   5527 {
   5528   int r;
   5529   OnigToken tok;
   5530 
   5531   r = fetch_token(&tok, src, end, env);
   5532   if (r < 0) return r;
   5533   r = parse_subexp(top, &tok, TK_EOT, src, end, env);
   5534   if (r < 0) return r;
   5535   return 0;
   5536 }
   5537 
   5538 extern int
   5539 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
   5540 		     regex_t* reg, ScanEnv* env)
   5541 {
   5542   int r;
   5543   UChar* p;
   5544 
   5545 #ifdef USE_NAMED_GROUP
   5546   names_clear(reg);
   5547 #endif
   5548 
   5549   scan_env_clear(env);
   5550   env->option         = reg->options;
   5551   env->case_fold_flag = reg->case_fold_flag;
   5552   env->enc            = reg->enc;
   5553   env->syntax         = reg->syntax;
   5554   env->pattern        = (UChar* )pattern;
   5555   env->pattern_end    = (UChar* )end;
   5556   env->reg            = reg;
   5557 
   5558   *root = NULL;
   5559   p = (UChar* )pattern;
   5560   r = parse_regexp(root, &p, (UChar* )end, env);
   5561   reg->num_mem = env->num_mem;
   5562   return r;
   5563 }
   5564 
   5565 extern void
   5566 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
   5567 				UChar* arg, UChar* arg_end)
   5568 {
   5569   env->error     = arg;
   5570   env->error_end = arg_end;
   5571 }
   5572