Home | History | Annotate | Download | only in Oniguruma
      1 /**********************************************************************
      2   regenc.c -  Oniguruma (regular expression library)
      3 **********************************************************************/
      4 /*-
      5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
      6  * All rights reserved.
      7  *
      8  * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include "regint.h"
     33 
     34 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
     35 
     36 extern int
     37 onigenc_init(void)
     38 {
     39   return 0;
     40 }
     41 
     42 extern OnigEncoding
     43 onigenc_get_default_encoding(void)
     44 {
     45   return OnigEncDefaultCharEncoding;
     46 }
     47 
     48 extern int
     49 onigenc_set_default_encoding(OnigEncoding enc)
     50 {
     51   OnigEncDefaultCharEncoding = enc;
     52   return 0;
     53 }
     54 
     55 extern UChar*
     56 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
     57 {
     58   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
     59   if (p < s) {
     60     p += enclen(enc, p);
     61   }
     62   return p;
     63 }
     64 
     65 extern UChar*
     66 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
     67 				   const UChar* start, const UChar* s, const UChar** prev)
     68 {
     69   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
     70 
     71   if (p < s) {
     72     if (prev) *prev = (const UChar* )p;
     73     p += enclen(enc, p);
     74   }
     75   else {
     76     if (prev) *prev = (const UChar* )NULL; /* Sorry */
     77   }
     78   return p;
     79 }
     80 
     81 extern UChar*
     82 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
     83 {
     84   if (s <= start)
     85     return (UChar* )NULL;
     86 
     87   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
     88 }
     89 
     90 extern UChar*
     91 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
     92 {
     93   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
     94     if (s <= start)
     95       return (UChar* )NULL;
     96 
     97     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
     98   }
     99   return (UChar* )s;
    100 }
    101 
    102 extern UChar*
    103 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
    104 {
    105   UChar* q = (UChar* )p;
    106   while (n-- > 0) {
    107     q += ONIGENC_MBC_ENC_LEN(enc, q);
    108   }
    109   return (q <= end ? q : NULL);
    110 }
    111 
    112 extern int
    113 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
    114 {
    115   int n = 0;
    116   UChar* q = (UChar* )p;
    117 
    118   while (q < end) {
    119     q += ONIGENC_MBC_ENC_LEN(enc, q);
    120     n++;
    121   }
    122   return n;
    123 }
    124 
    125 extern int
    126 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
    127 {
    128   int n = 0;
    129   UChar* p = (UChar* )s;
    130 
    131   while (1) {
    132     if (*p == '\0') {
    133       UChar* q;
    134       int len = ONIGENC_MBC_MINLEN(enc);
    135 
    136       if (len == 1) return n;
    137       q = p + 1;
    138       while (len > 1) {
    139         if (*q != '\0') break;
    140         q++;
    141         len--;
    142       }
    143       if (len == 1) return n;
    144     }
    145     p += ONIGENC_MBC_ENC_LEN(enc, p);
    146     n++;
    147   }
    148 }
    149 
    150 extern int
    151 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
    152 {
    153   UChar* start = (UChar* )s;
    154   UChar* p = (UChar* )s;
    155 
    156   while (1) {
    157     if (*p == '\0') {
    158       UChar* q;
    159       int len = ONIGENC_MBC_MINLEN(enc);
    160 
    161       if (len == 1) return (int )(p - start);
    162       q = p + 1;
    163       while (len > 1) {
    164         if (*q != '\0') break;
    165         q++;
    166         len--;
    167       }
    168       if (len == 1) return (int )(p - start);
    169     }
    170     p += ONIGENC_MBC_ENC_LEN(enc, p);
    171   }
    172 }
    173 
    174 const UChar OnigEncAsciiToLowerCaseTable[] = {
    175   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
    176   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
    177   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
    178   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
    179   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
    180   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
    181   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
    182   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
    183   0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
    184   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
    185   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
    186   0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
    187   0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
    188   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
    189   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
    190   0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
    191   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
    192   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
    193   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
    194   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
    195   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
    196   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
    197   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
    198   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
    199   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
    200   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
    201   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
    202   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
    203   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
    204   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
    205   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
    206   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
    207 };
    208 
    209 #ifdef USE_UPPER_CASE_TABLE
    210 const UChar OnigEncAsciiToUpperCaseTable[256] = {
    211   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
    212   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
    213   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
    214   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
    215   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
    216   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
    217   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
    218   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
    219   0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
    220   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
    221   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
    222   0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
    223   0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
    224   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
    225   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
    226   0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
    227   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
    228   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
    229   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
    230   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
    231   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
    232   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
    233   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
    234   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
    235   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
    236   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
    237   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
    238   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
    239   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
    240   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
    241   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
    242   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
    243 };
    244 #endif
    245 
    246 const unsigned short OnigEncAsciiCtypeTable[256] = {
    247   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
    248   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
    249   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
    250   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
    251   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
    252   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
    253   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
    254   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
    255   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
    256   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
    257   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
    258   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
    259   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
    260   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
    261   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
    262   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
    263   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    264   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    265   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    266   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    267   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    268   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    269   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    270   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    271   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    272   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    273   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    274   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    275   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    276   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    277   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    278   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
    279 };
    280 
    281 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
    282   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
    283   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
    284   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
    285   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
    286   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
    287   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
    288   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
    289   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
    290   0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
    291   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
    292   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
    293   0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
    294   0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
    295   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
    296   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
    297   0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
    298   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
    299   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
    300   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
    301   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
    302   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
    303   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
    304   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
    305   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
    306   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
    307   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
    308   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0327,
    309   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0337,
    310   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
    311   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
    312   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
    313   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
    314 };
    315 
    316 #ifdef USE_UPPER_CASE_TABLE
    317 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
    318   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
    319   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
    320   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
    321   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
    322   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
    323   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
    324   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
    325   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
    326   0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
    327   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
    328   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
    329   0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
    330   0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
    331   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
    332   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
    333   0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
    334   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
    335   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
    336   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
    337   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
    338   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
    339   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
    340   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
    341   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
    342   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
    343   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
    344   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
    345   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
    346   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
    347   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
    348   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0367,
    349   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0377,
    350 };
    351 #endif
    352 
    353 extern void
    354 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
    355 {
    356   /* nothing */
    357   /* obsoleted. */
    358 }
    359 
    360 extern UChar*
    361 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
    362 {
    363   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
    364 }
    365 
    366 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
    367   { 0x41, 0x61 },
    368   { 0x42, 0x62 },
    369   { 0x43, 0x63 },
    370   { 0x44, 0x64 },
    371   { 0x45, 0x65 },
    372   { 0x46, 0x66 },
    373   { 0x47, 0x67 },
    374   { 0x48, 0x68 },
    375   { 0x49, 0x69 },
    376   { 0x4a, 0x6a },
    377   { 0x4b, 0x6b },
    378   { 0x4c, 0x6c },
    379   { 0x4d, 0x6d },
    380   { 0x4e, 0x6e },
    381   { 0x4f, 0x6f },
    382   { 0x50, 0x70 },
    383   { 0x51, 0x71 },
    384   { 0x52, 0x72 },
    385   { 0x53, 0x73 },
    386   { 0x54, 0x74 },
    387   { 0x55, 0x75 },
    388   { 0x56, 0x76 },
    389   { 0x57, 0x77 },
    390   { 0x58, 0x78 },
    391   { 0x59, 0x79 },
    392   { 0x5a, 0x7a }
    393 };
    394 
    395 extern int
    396 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
    397 				  OnigApplyAllCaseFoldFunc f, void* arg)
    398 {
    399   OnigCodePoint code;
    400   int i, r;
    401 
    402   for (i = 0;
    403        i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
    404        i++) {
    405     code = OnigAsciiLowerMap[i].to;
    406     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
    407     if (r != 0) return r;
    408 
    409     code = OnigAsciiLowerMap[i].from;
    410     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
    411     if (r != 0) return r;
    412   }
    413 
    414   return 0;
    415 }
    416 
    417 extern int
    418 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
    419 	 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
    420 	 OnigCaseFoldCodeItem items[])
    421 {
    422   if (0x41 <= *p && *p <= 0x5a) {
    423     items[0].byte_len = 1;
    424     items[0].code_len = 1;
    425     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
    426     return 1;
    427   }
    428   else if (0x61 <= *p && *p <= 0x7a) {
    429     items[0].byte_len = 1;
    430     items[0].code_len = 1;
    431     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
    432     return 1;
    433   }
    434   else
    435     return 0;
    436 }
    437 
    438 static int
    439 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
    440 		       OnigApplyAllCaseFoldFunc f, void* arg)
    441 {
    442   static OnigCodePoint ss[] = { 0x73, 0x73 };
    443 
    444   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
    445 }
    446 
    447 extern int
    448 onigenc_apply_all_case_fold_with_map(int map_size,
    449     const OnigPairCaseFoldCodes map[],
    450     int ess_tsett_flag, OnigCaseFoldType flag,
    451     OnigApplyAllCaseFoldFunc f, void* arg)
    452 {
    453   OnigCodePoint code;
    454   int i, r;
    455 
    456   r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
    457   if (r != 0) return r;
    458 
    459   for (i = 0; i < map_size; i++) {
    460     code = map[i].to;
    461     r = (*f)(map[i].from, &code, 1, arg);
    462     if (r != 0) return r;
    463 
    464     code = map[i].from;
    465     r = (*f)(map[i].to, &code, 1, arg);
    466     if (r != 0) return r;
    467   }
    468 
    469   if (ess_tsett_flag != 0)
    470     return ss_apply_all_case_fold(flag, f, arg);
    471 
    472   return 0;
    473 }
    474 
    475 extern int
    476 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
    477     const OnigPairCaseFoldCodes map[],
    478     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
    479     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
    480 {
    481   if (0x41 <= *p && *p <= 0x5a) {
    482     items[0].byte_len = 1;
    483     items[0].code_len = 1;
    484     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
    485     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
    486 	&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
    487       /* SS */
    488       items[1].byte_len = 2;
    489       items[1].code_len = 1;
    490       items[1].code[0] = (OnigCodePoint )0xdf;
    491       return 2;
    492     }
    493     else
    494       return 1;
    495   }
    496   else if (0x61 <= *p && *p <= 0x7a) {
    497     items[0].byte_len = 1;
    498     items[0].code_len = 1;
    499     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
    500     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
    501 	&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
    502       /* ss */
    503       items[1].byte_len = 2;
    504       items[1].code_len = 1;
    505       items[1].code[0] = (OnigCodePoint )0xdf;
    506       return 2;
    507     }
    508     else
    509       return 1;
    510   }
    511   else if (*p == 0xdf && ess_tsett_flag != 0) {
    512     items[0].byte_len = 1;
    513     items[0].code_len = 2;
    514     items[0].code[0] = (OnigCodePoint )'s';
    515     items[0].code[1] = (OnigCodePoint )'s';
    516 
    517     items[1].byte_len = 1;
    518     items[1].code_len = 2;
    519     items[1].code[0] = (OnigCodePoint )'S';
    520     items[1].code[1] = (OnigCodePoint )'S';
    521 
    522     items[2].byte_len = 1;
    523     items[2].code_len = 2;
    524     items[2].code[0] = (OnigCodePoint )'s';
    525     items[2].code[1] = (OnigCodePoint )'S';
    526 
    527     items[3].byte_len = 1;
    528     items[3].code_len = 2;
    529     items[3].code[0] = (OnigCodePoint )'S';
    530     items[3].code[1] = (OnigCodePoint )'s';
    531 
    532     return 4;
    533   }
    534   else {
    535     int i;
    536 
    537     for (i = 0; i < map_size; i++) {
    538       if (*p == map[i].from) {
    539 	items[0].byte_len = 1;
    540 	items[0].code_len = 1;
    541 	items[0].code[0] = map[i].to;
    542 	return 1;
    543       }
    544       else if (*p == map[i].to) {
    545 	items[0].byte_len = 1;
    546 	items[0].code_len = 1;
    547 	items[0].code[0] = map[i].from;
    548 	return 1;
    549       }
    550     }
    551   }
    552 
    553   return 0;
    554 }
    555 
    556 
    557 extern int
    558 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
    559 	 OnigCodePoint* sb_out ARG_UNUSED,
    560 	 const OnigCodePoint* ranges[] ARG_UNUSED)
    561 {
    562   return ONIG_NO_SUPPORT_CONFIG;
    563 }
    564 
    565 extern int
    566 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
    567 {
    568   if (p < end) {
    569     if (*p == 0x0a) return 1;
    570   }
    571   return 0;
    572 }
    573 
    574 /* for single byte encodings */
    575 extern int
    576 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
    577 	    const UChar*end ARG_UNUSED, UChar* lower)
    578 {
    579   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
    580 
    581   (*p)++;
    582   return 1; /* return byte length of converted char to lower */
    583 }
    584 
    585 #if 0
    586 extern int
    587 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
    588 			       const UChar** pp, const UChar* end)
    589 {
    590   const UChar* p = *pp;
    591 
    592   (*pp)++;
    593   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
    594 }
    595 #endif
    596 
    597 extern int
    598 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
    599 {
    600   return 1;
    601 }
    602 
    603 extern OnigCodePoint
    604 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
    605 {
    606   return (OnigCodePoint )(*p);
    607 }
    608 
    609 extern int
    610 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
    611 {
    612   return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
    613 }
    614 
    615 extern int
    616 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
    617 {
    618   *buf = (UChar )(code & 0xff);
    619   return 1;
    620 }
    621 
    622 extern UChar*
    623 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
    624 					  const UChar* s)
    625 {
    626   return (UChar* )s;
    627 }
    628 
    629 extern int
    630 onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
    631 					     const UChar* end ARG_UNUSED)
    632 {
    633   return TRUE;
    634 }
    635 
    636 extern int
    637 onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
    638 					      const UChar* end ARG_UNUSED)
    639 {
    640   return FALSE;
    641 }
    642 
    643 extern OnigCodePoint
    644 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
    645 {
    646   int c, i, len;
    647   OnigCodePoint n;
    648 
    649   len = enclen(enc, p);
    650   n = (OnigCodePoint )(*p++);
    651   if (len == 1) return n;
    652 
    653   for (i = 1; i < len; i++) {
    654     if (p >= end) break;
    655     c = *p++;
    656     n <<= 8;  n += c;
    657   }
    658   return n;
    659 }
    660 
    661 extern int
    662 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
    663                           const UChar** pp, const UChar* end ARG_UNUSED,
    664 			  UChar* lower)
    665 {
    666   int len;
    667   const UChar *p = *pp;
    668 
    669   if (ONIGENC_IS_MBC_ASCII(p)) {
    670     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
    671     (*pp)++;
    672     return 1;
    673   }
    674   else {
    675     int i;
    676 
    677     len = enclen(enc, p);
    678     for (i = 0; i < len; i++) {
    679       *lower++ = *p++;
    680     }
    681     (*pp) += len;
    682     return len; /* return byte length of converted to lower char */
    683   }
    684 }
    685 
    686 #if 0
    687 extern int
    688 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
    689                              const UChar** pp, const UChar* end)
    690 {
    691   const UChar* p = *pp;
    692 
    693   if (ONIGENC_IS_MBC_ASCII(p)) {
    694     (*pp)++;
    695     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
    696   }
    697 
    698   (*pp) += enclen(enc, p);
    699   return FALSE;
    700 }
    701 #endif
    702 
    703 extern int
    704 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
    705 {
    706   if ((code & 0xff00) != 0) return 2;
    707   else return 1;
    708 }
    709 
    710 extern int
    711 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
    712 {
    713        if ((code & 0xff000000) != 0) return 4;
    714   else if ((code & 0xff0000) != 0) return 3;
    715   else if ((code & 0xff00) != 0) return 2;
    716   else return 1;
    717 }
    718 
    719 extern int
    720 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
    721 {
    722   UChar *p = buf;
    723 
    724   if ((code & 0xff00) != 0) {
    725     *p++ = (UChar )((code >>  8) & 0xff);
    726   }
    727   *p++ = (UChar )(code & 0xff);
    728 
    729 #if 1
    730   if (enclen(enc, buf) != (p - buf))
    731     return ONIGERR_INVALID_CODE_POINT_VALUE;
    732 #endif
    733   return (int)(p - buf);
    734 }
    735 
    736 extern int
    737 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
    738 {
    739   UChar *p = buf;
    740 
    741   if ((code & 0xff000000) != 0) {
    742     *p++ = (UChar )((code >> 24) & 0xff);
    743   }
    744   if ((code & 0xff0000) != 0 || p != buf) {
    745     *p++ = (UChar )((code >> 16) & 0xff);
    746   }
    747   if ((code & 0xff00) != 0 || p != buf) {
    748     *p++ = (UChar )((code >> 8) & 0xff);
    749   }
    750   *p++ = (UChar )(code & 0xff);
    751 
    752 #if 1
    753   if (enclen(enc, buf) != (p - buf))
    754     return ONIGERR_INVALID_CODE_POINT_VALUE;
    755 #endif
    756   return (int)(p - buf);
    757 }
    758 
    759 extern int
    760 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
    761 {
    762   static PosixBracketEntryType PBS[] = {
    763     { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
    764     { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
    765     { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
    766     { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
    767     { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
    768     { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
    769     { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
    770     { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
    771     { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
    772     { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
    773     { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
    774     { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
    775     { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
    776     { (UChar* )"Word",   ONIGENC_CTYPE_WORD,   4 },
    777     { (UChar* )NULL, -1, 0 }
    778   };
    779 
    780   PosixBracketEntryType *pb;
    781   int len;
    782 
    783   len = onigenc_strlen(enc, p, end);
    784   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
    785     if (len == pb->len &&
    786         onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
    787       return pb->ctype;
    788   }
    789 
    790   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
    791 }
    792 
    793 extern int
    794 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
    795 			  unsigned int ctype)
    796 {
    797   if (code < 128)
    798     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
    799   else {
    800     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
    801       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
    802     }
    803   }
    804 
    805   return FALSE;
    806 }
    807 
    808 extern int
    809 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
    810 			  unsigned int ctype)
    811 {
    812   if (code < 128)
    813     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
    814   else {
    815     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
    816       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
    817     }
    818   }
    819 
    820   return FALSE;
    821 }
    822 
    823 extern int
    824 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
    825                            const UChar* sascii /* ascii */, int n)
    826 {
    827   int x, c;
    828 
    829   while (n-- > 0) {
    830     if (p >= end) return (int )(*sascii);
    831 
    832     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
    833     x = *sascii - c;
    834     if (x) return x;
    835 
    836     sascii++;
    837     p += enclen(enc, p);
    838   }
    839   return 0;
    840 }
    841 
    842 /* Property management */
    843 static int
    844 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
    845 {
    846   int size;
    847   const OnigCodePoint **list = *plist;
    848 
    849   size = sizeof(OnigCodePoint*) * new_size;
    850   if (IS_NULL(list)) {
    851     list = (const OnigCodePoint** )xmalloc(size);
    852   }
    853   else {
    854     list = (const OnigCodePoint** )xrealloc((void* )list, size, *psize * sizeof(OnigCodePoint*));
    855   }
    856 
    857   if (IS_NULL(list)) return ONIGERR_MEMORY;
    858 
    859   *plist = list;
    860   *psize = new_size;
    861 
    862   return 0;
    863 }
    864 
    865 extern int
    866 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
    867      hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
    868      int *psize)
    869 {
    870 #define PROP_INIT_SIZE     16
    871 
    872   int r;
    873 
    874   if (*psize <= *pnum) {
    875     int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
    876     r = resize_property_list(new_size, plist, psize);
    877     if (r != 0) return r;
    878   }
    879 
    880   (*plist)[*pnum] = prop;
    881 
    882   if (ONIG_IS_NULL(*table)) {
    883     *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
    884     if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
    885   }
    886 
    887   *pnum = *pnum + 1;
    888   onig_st_insert_strend(*table, name, name + strlen_s((char* )name, MAX_STRING_SIZE),
    889 			(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
    890   return 0;
    891 }
    892 
    893 extern int
    894 onigenc_property_list_init(int (*f)(void))
    895 {
    896   int r;
    897 
    898   THREAD_ATOMIC_START;
    899 
    900   r = f();
    901 
    902   THREAD_ATOMIC_END;
    903   return r;
    904 }
    905