1 /********************************************************************** 2 regenc.c - Oniguruma (regular expression library) 3 **********************************************************************/ 4 /*- 5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * All rights reserved. 7 * 8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include "regint.h" 33 34 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; 35 36 extern int 37 onigenc_init(void) 38 { 39 return 0; 40 } 41 42 extern OnigEncoding 43 onigenc_get_default_encoding(void) 44 { 45 return OnigEncDefaultCharEncoding; 46 } 47 48 extern int 49 onigenc_set_default_encoding(OnigEncoding enc) 50 { 51 OnigEncDefaultCharEncoding = enc; 52 return 0; 53 } 54 55 extern UChar* 56 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) 57 { 58 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); 59 if (p < s) { 60 p += enclen(enc, p); 61 } 62 return p; 63 } 64 65 extern UChar* 66 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, 67 const UChar* start, const UChar* s, const UChar** prev) 68 { 69 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); 70 71 if (p < s) { 72 if (prev) *prev = (const UChar* )p; 73 p += enclen(enc, p); 74 } 75 else { 76 if (prev) *prev = (const UChar* )NULL; /* Sorry */ 77 } 78 return p; 79 } 80 81 extern UChar* 82 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) 83 { 84 if (s <= start) 85 return (UChar* )NULL; 86 87 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); 88 } 89 90 extern UChar* 91 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) 92 { 93 while (ONIG_IS_NOT_NULL(s) && n-- > 0) { 94 if (s <= start) 95 return (UChar* )NULL; 96 97 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); 98 } 99 return (UChar* )s; 100 } 101 102 extern UChar* 103 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) 104 { 105 UChar* q = (UChar* )p; 106 while (n-- > 0) { 107 q += ONIGENC_MBC_ENC_LEN(enc, q); 108 } 109 return (q <= end ? q : NULL); 110 } 111 112 extern int 113 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) 114 { 115 int n = 0; 116 UChar* q = (UChar* )p; 117 118 while (q < end) { 119 q += ONIGENC_MBC_ENC_LEN(enc, q); 120 n++; 121 } 122 return n; 123 } 124 125 extern int 126 onigenc_strlen_null(OnigEncoding enc, const UChar* s) 127 { 128 int n = 0; 129 UChar* p = (UChar* )s; 130 131 while (1) { 132 if (*p == '\0') { 133 UChar* q; 134 int len = ONIGENC_MBC_MINLEN(enc); 135 136 if (len == 1) return n; 137 q = p + 1; 138 while (len > 1) { 139 if (*q != '\0') break; 140 q++; 141 len--; 142 } 143 if (len == 1) return n; 144 } 145 p += ONIGENC_MBC_ENC_LEN(enc, p); 146 n++; 147 } 148 } 149 150 extern int 151 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) 152 { 153 UChar* start = (UChar* )s; 154 UChar* p = (UChar* )s; 155 156 while (1) { 157 if (*p == '\0') { 158 UChar* q; 159 int len = ONIGENC_MBC_MINLEN(enc); 160 161 if (len == 1) return (int )(p - start); 162 q = p + 1; 163 while (len > 1) { 164 if (*q != '\0') break; 165 q++; 166 len--; 167 } 168 if (len == 1) return (int )(p - start); 169 } 170 p += ONIGENC_MBC_ENC_LEN(enc, p); 171 } 172 } 173 174 const UChar OnigEncAsciiToLowerCaseTable[] = { 175 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007, 176 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, 177 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, 178 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 179 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, 180 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, 181 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 182 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, 183 0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147, 184 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157, 185 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167, 186 0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137, 187 0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147, 188 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157, 189 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167, 190 0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177, 191 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 192 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 193 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 194 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 195 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 196 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 197 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 198 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 199 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 200 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 201 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, 202 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 203 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 204 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 205 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, 206 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377, 207 }; 208 209 #ifdef USE_UPPER_CASE_TABLE 210 const UChar OnigEncAsciiToUpperCaseTable[256] = { 211 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007, 212 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, 213 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, 214 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 215 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, 216 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, 217 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 218 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, 219 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 220 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 221 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 222 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, 223 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 224 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 225 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 226 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, 227 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 228 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 229 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 230 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 231 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 232 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 233 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 234 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 235 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 236 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 237 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, 238 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 239 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 240 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 241 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, 242 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377, 243 }; 244 #endif 245 246 const unsigned short OnigEncAsciiCtypeTable[256] = { 247 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 248 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 249 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 250 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 251 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 252 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 253 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 254 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 255 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 256 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 257 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 258 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 259 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 260 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 261 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 262 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 263 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 264 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 265 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 266 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 267 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 268 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 269 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 270 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 271 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 272 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 273 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 274 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 275 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 279 }; 280 281 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { 282 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007, 283 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, 284 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, 285 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 286 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, 287 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, 288 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 289 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, 290 0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147, 291 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157, 292 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167, 293 0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137, 294 0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147, 295 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157, 296 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167, 297 0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177, 298 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 299 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 300 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 301 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 302 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 303 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 304 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 305 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 306 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 307 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 308 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0327, 309 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0337, 310 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 311 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 312 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, 313 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 314 }; 315 316 #ifdef USE_UPPER_CASE_TABLE 317 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { 318 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007, 319 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, 320 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, 321 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 322 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, 323 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, 324 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, 325 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, 326 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 327 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 328 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 329 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, 330 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 331 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 332 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 333 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, 334 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 335 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 336 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 337 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 338 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 339 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 340 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 341 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 342 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 343 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 344 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, 345 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 346 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 347 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 348 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0367, 349 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0377, 350 }; 351 #endif 352 353 extern void 354 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) 355 { 356 /* nothing */ 357 /* obsoleted. */ 358 } 359 360 extern UChar* 361 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) 362 { 363 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); 364 } 365 366 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { 367 { 0x41, 0x61 }, 368 { 0x42, 0x62 }, 369 { 0x43, 0x63 }, 370 { 0x44, 0x64 }, 371 { 0x45, 0x65 }, 372 { 0x46, 0x66 }, 373 { 0x47, 0x67 }, 374 { 0x48, 0x68 }, 375 { 0x49, 0x69 }, 376 { 0x4a, 0x6a }, 377 { 0x4b, 0x6b }, 378 { 0x4c, 0x6c }, 379 { 0x4d, 0x6d }, 380 { 0x4e, 0x6e }, 381 { 0x4f, 0x6f }, 382 { 0x50, 0x70 }, 383 { 0x51, 0x71 }, 384 { 0x52, 0x72 }, 385 { 0x53, 0x73 }, 386 { 0x54, 0x74 }, 387 { 0x55, 0x75 }, 388 { 0x56, 0x76 }, 389 { 0x57, 0x77 }, 390 { 0x58, 0x78 }, 391 { 0x59, 0x79 }, 392 { 0x5a, 0x7a } 393 }; 394 395 extern int 396 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, 397 OnigApplyAllCaseFoldFunc f, void* arg) 398 { 399 OnigCodePoint code; 400 int i, r; 401 402 for (i = 0; 403 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); 404 i++) { 405 code = OnigAsciiLowerMap[i].to; 406 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); 407 if (r != 0) return r; 408 409 code = OnigAsciiLowerMap[i].from; 410 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); 411 if (r != 0) return r; 412 } 413 414 return 0; 415 } 416 417 extern int 418 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, 419 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, 420 OnigCaseFoldCodeItem items[]) 421 { 422 if (0x41 <= *p && *p <= 0x5a) { 423 items[0].byte_len = 1; 424 items[0].code_len = 1; 425 items[0].code[0] = (OnigCodePoint )(*p + 0x20); 426 return 1; 427 } 428 else if (0x61 <= *p && *p <= 0x7a) { 429 items[0].byte_len = 1; 430 items[0].code_len = 1; 431 items[0].code[0] = (OnigCodePoint )(*p - 0x20); 432 return 1; 433 } 434 else 435 return 0; 436 } 437 438 static int 439 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, 440 OnigApplyAllCaseFoldFunc f, void* arg) 441 { 442 static OnigCodePoint ss[] = { 0x73, 0x73 }; 443 444 return (*f)((OnigCodePoint )0xdf, ss, 2, arg); 445 } 446 447 extern int 448 onigenc_apply_all_case_fold_with_map(int map_size, 449 const OnigPairCaseFoldCodes map[], 450 int ess_tsett_flag, OnigCaseFoldType flag, 451 OnigApplyAllCaseFoldFunc f, void* arg) 452 { 453 OnigCodePoint code; 454 int i, r; 455 456 r = onigenc_ascii_apply_all_case_fold(flag, f, arg); 457 if (r != 0) return r; 458 459 for (i = 0; i < map_size; i++) { 460 code = map[i].to; 461 r = (*f)(map[i].from, &code, 1, arg); 462 if (r != 0) return r; 463 464 code = map[i].from; 465 r = (*f)(map[i].to, &code, 1, arg); 466 if (r != 0) return r; 467 } 468 469 if (ess_tsett_flag != 0) 470 return ss_apply_all_case_fold(flag, f, arg); 471 472 return 0; 473 } 474 475 extern int 476 onigenc_get_case_fold_codes_by_str_with_map(int map_size, 477 const OnigPairCaseFoldCodes map[], 478 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, 479 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 480 { 481 if (0x41 <= *p && *p <= 0x5a) { 482 items[0].byte_len = 1; 483 items[0].code_len = 1; 484 items[0].code[0] = (OnigCodePoint )(*p + 0x20); 485 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 486 && (*(p+1) == 0x53 || *(p+1) == 0x73)) { 487 /* SS */ 488 items[1].byte_len = 2; 489 items[1].code_len = 1; 490 items[1].code[0] = (OnigCodePoint )0xdf; 491 return 2; 492 } 493 else 494 return 1; 495 } 496 else if (0x61 <= *p && *p <= 0x7a) { 497 items[0].byte_len = 1; 498 items[0].code_len = 1; 499 items[0].code[0] = (OnigCodePoint )(*p - 0x20); 500 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 501 && (*(p+1) == 0x73 || *(p+1) == 0x53)) { 502 /* ss */ 503 items[1].byte_len = 2; 504 items[1].code_len = 1; 505 items[1].code[0] = (OnigCodePoint )0xdf; 506 return 2; 507 } 508 else 509 return 1; 510 } 511 else if (*p == 0xdf && ess_tsett_flag != 0) { 512 items[0].byte_len = 1; 513 items[0].code_len = 2; 514 items[0].code[0] = (OnigCodePoint )'s'; 515 items[0].code[1] = (OnigCodePoint )'s'; 516 517 items[1].byte_len = 1; 518 items[1].code_len = 2; 519 items[1].code[0] = (OnigCodePoint )'S'; 520 items[1].code[1] = (OnigCodePoint )'S'; 521 522 items[2].byte_len = 1; 523 items[2].code_len = 2; 524 items[2].code[0] = (OnigCodePoint )'s'; 525 items[2].code[1] = (OnigCodePoint )'S'; 526 527 items[3].byte_len = 1; 528 items[3].code_len = 2; 529 items[3].code[0] = (OnigCodePoint )'S'; 530 items[3].code[1] = (OnigCodePoint )'s'; 531 532 return 4; 533 } 534 else { 535 int i; 536 537 for (i = 0; i < map_size; i++) { 538 if (*p == map[i].from) { 539 items[0].byte_len = 1; 540 items[0].code_len = 1; 541 items[0].code[0] = map[i].to; 542 return 1; 543 } 544 else if (*p == map[i].to) { 545 items[0].byte_len = 1; 546 items[0].code_len = 1; 547 items[0].code[0] = map[i].from; 548 return 1; 549 } 550 } 551 } 552 553 return 0; 554 } 555 556 557 extern int 558 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, 559 OnigCodePoint* sb_out ARG_UNUSED, 560 const OnigCodePoint* ranges[] ARG_UNUSED) 561 { 562 return ONIG_NO_SUPPORT_CONFIG; 563 } 564 565 extern int 566 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) 567 { 568 if (p < end) { 569 if (*p == 0x0a) return 1; 570 } 571 return 0; 572 } 573 574 /* for single byte encodings */ 575 extern int 576 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, 577 const UChar*end ARG_UNUSED, UChar* lower) 578 { 579 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); 580 581 (*p)++; 582 return 1; /* return byte length of converted char to lower */ 583 } 584 585 #if 0 586 extern int 587 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, 588 const UChar** pp, const UChar* end) 589 { 590 const UChar* p = *pp; 591 592 (*pp)++; 593 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); 594 } 595 #endif 596 597 extern int 598 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED) 599 { 600 return 1; 601 } 602 603 extern OnigCodePoint 604 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 605 { 606 return (OnigCodePoint )(*p); 607 } 608 609 extern int 610 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED) 611 { 612 return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE); 613 } 614 615 extern int 616 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) 617 { 618 *buf = (UChar )(code & 0xff); 619 return 1; 620 } 621 622 extern UChar* 623 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, 624 const UChar* s) 625 { 626 return (UChar* )s; 627 } 628 629 extern int 630 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, 631 const UChar* end ARG_UNUSED) 632 { 633 return TRUE; 634 } 635 636 extern int 637 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, 638 const UChar* end ARG_UNUSED) 639 { 640 return FALSE; 641 } 642 643 extern OnigCodePoint 644 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) 645 { 646 int c, i, len; 647 OnigCodePoint n; 648 649 len = enclen(enc, p); 650 n = (OnigCodePoint )(*p++); 651 if (len == 1) return n; 652 653 for (i = 1; i < len; i++) { 654 if (p >= end) break; 655 c = *p++; 656 n <<= 8; n += c; 657 } 658 return n; 659 } 660 661 extern int 662 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, 663 const UChar** pp, const UChar* end ARG_UNUSED, 664 UChar* lower) 665 { 666 int len; 667 const UChar *p = *pp; 668 669 if (ONIGENC_IS_MBC_ASCII(p)) { 670 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 671 (*pp)++; 672 return 1; 673 } 674 else { 675 int i; 676 677 len = enclen(enc, p); 678 for (i = 0; i < len; i++) { 679 *lower++ = *p++; 680 } 681 (*pp) += len; 682 return len; /* return byte length of converted to lower char */ 683 } 684 } 685 686 #if 0 687 extern int 688 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, 689 const UChar** pp, const UChar* end) 690 { 691 const UChar* p = *pp; 692 693 if (ONIGENC_IS_MBC_ASCII(p)) { 694 (*pp)++; 695 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); 696 } 697 698 (*pp) += enclen(enc, p); 699 return FALSE; 700 } 701 #endif 702 703 extern int 704 onigenc_mb2_code_to_mbclen(OnigCodePoint code) 705 { 706 if ((code & 0xff00) != 0) return 2; 707 else return 1; 708 } 709 710 extern int 711 onigenc_mb4_code_to_mbclen(OnigCodePoint code) 712 { 713 if ((code & 0xff000000) != 0) return 4; 714 else if ((code & 0xff0000) != 0) return 3; 715 else if ((code & 0xff00) != 0) return 2; 716 else return 1; 717 } 718 719 extern int 720 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) 721 { 722 UChar *p = buf; 723 724 if ((code & 0xff00) != 0) { 725 *p++ = (UChar )((code >> 8) & 0xff); 726 } 727 *p++ = (UChar )(code & 0xff); 728 729 #if 1 730 if (enclen(enc, buf) != (p - buf)) 731 return ONIGERR_INVALID_CODE_POINT_VALUE; 732 #endif 733 return (int)(p - buf); 734 } 735 736 extern int 737 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) 738 { 739 UChar *p = buf; 740 741 if ((code & 0xff000000) != 0) { 742 *p++ = (UChar )((code >> 24) & 0xff); 743 } 744 if ((code & 0xff0000) != 0 || p != buf) { 745 *p++ = (UChar )((code >> 16) & 0xff); 746 } 747 if ((code & 0xff00) != 0 || p != buf) { 748 *p++ = (UChar )((code >> 8) & 0xff); 749 } 750 *p++ = (UChar )(code & 0xff); 751 752 #if 1 753 if (enclen(enc, buf) != (p - buf)) 754 return ONIGERR_INVALID_CODE_POINT_VALUE; 755 #endif 756 return (int)(p - buf); 757 } 758 759 extern int 760 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) 761 { 762 static PosixBracketEntryType PBS[] = { 763 { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, 764 { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, 765 { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, 766 { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, 767 { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, 768 { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, 769 { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, 770 { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, 771 { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, 772 { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, 773 { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, 774 { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, 775 { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, 776 { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, 777 { (UChar* )NULL, -1, 0 } 778 }; 779 780 PosixBracketEntryType *pb; 781 int len; 782 783 len = onigenc_strlen(enc, p, end); 784 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { 785 if (len == pb->len && 786 onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) 787 return pb->ctype; 788 } 789 790 return ONIGERR_INVALID_CHAR_PROPERTY_NAME; 791 } 792 793 extern int 794 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, 795 unsigned int ctype) 796 { 797 if (code < 128) 798 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 799 else { 800 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 801 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); 802 } 803 } 804 805 return FALSE; 806 } 807 808 extern int 809 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, 810 unsigned int ctype) 811 { 812 if (code < 128) 813 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 814 else { 815 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 816 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); 817 } 818 } 819 820 return FALSE; 821 } 822 823 extern int 824 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, 825 const UChar* sascii /* ascii */, int n) 826 { 827 int x, c; 828 829 while (n-- > 0) { 830 if (p >= end) return (int )(*sascii); 831 832 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); 833 x = *sascii - c; 834 if (x) return x; 835 836 sascii++; 837 p += enclen(enc, p); 838 } 839 return 0; 840 } 841 842 /* Property management */ 843 static int 844 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) 845 { 846 int size; 847 const OnigCodePoint **list = *plist; 848 849 size = sizeof(OnigCodePoint*) * new_size; 850 if (IS_NULL(list)) { 851 list = (const OnigCodePoint** )xmalloc(size); 852 } 853 else { 854 list = (const OnigCodePoint** )xrealloc((void* )list, size, *psize * sizeof(OnigCodePoint*)); 855 } 856 857 if (IS_NULL(list)) return ONIGERR_MEMORY; 858 859 *plist = list; 860 *psize = new_size; 861 862 return 0; 863 } 864 865 extern int 866 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, 867 hash_table_type **table, const OnigCodePoint*** plist, int *pnum, 868 int *psize) 869 { 870 #define PROP_INIT_SIZE 16 871 872 int r; 873 874 if (*psize <= *pnum) { 875 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); 876 r = resize_property_list(new_size, plist, psize); 877 if (r != 0) return r; 878 } 879 880 (*plist)[*pnum] = prop; 881 882 if (ONIG_IS_NULL(*table)) { 883 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); 884 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; 885 } 886 887 *pnum = *pnum + 1; 888 onig_st_insert_strend(*table, name, name + strlen_s((char* )name, MAX_STRING_SIZE), 889 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); 890 return 0; 891 } 892 893 extern int 894 onigenc_property_list_init(int (*f)(void)) 895 { 896 int r; 897 898 THREAD_ATOMIC_START; 899 900 r = f(); 901 902 THREAD_ATOMIC_END; 903 return r; 904 } 905