1 /********************************************************************** 2 regposix.c - Oniguruma (regular expression library) 3 **********************************************************************/ 4 /*- 5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * All rights reserved. 7 * 8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #define regex_t onig_regex_t 33 #include "regint.h" 34 #undef regex_t 35 #include "onigposix.h" 36 37 #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig)) 38 #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig)) 39 40 /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */ 41 #define ENC_STRING_LEN(enc,s,len) do { \ 42 if (ONIGENC_MBC_MINLEN(enc) == 1) { \ 43 UChar* tmps = (UChar* )(s); \ 44 while (*tmps != 0) tmps++; \ 45 len = (int)(tmps - (UChar* )(s)); \ 46 } \ 47 else { \ 48 len = onigenc_str_bytelen_null(enc, (UChar* )s); \ 49 } \ 50 } while(0) 51 52 typedef struct { 53 int onig_err; 54 int posix_err; 55 } O2PERR; 56 57 static int 58 onig2posix_error_code(int code) 59 { 60 static const O2PERR o2p[] = { 61 { ONIG_MISMATCH, REG_NOMATCH }, 62 { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, 63 { ONIGERR_MEMORY, REG_ESPACE }, 64 { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, 65 { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, 66 { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, 67 { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, 68 { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, 69 { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, 70 { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, 71 { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, 72 { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG }, 73 { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, 74 { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, 75 { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, 76 { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, 77 { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE }, 78 { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE }, 79 { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, 80 { ONIGERR_META_CODE_SYNTAX, REG_BADPAT }, 81 { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, 82 { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, 83 { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, 84 { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, 85 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, 86 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, 87 { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, 88 { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, 89 { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, 90 { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, 91 { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, 92 { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, 93 { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, 94 { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, 95 { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT }, 96 { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, 97 { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, 98 { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, 99 { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, 100 { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, 101 { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, 102 { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, 103 { ONIGERR_INVALID_BACKREF, REG_ESUBREG }, 104 { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, 105 { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, 106 { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, 107 { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC }, 108 { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, 109 { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, 110 { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, 111 { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, 112 { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, 113 { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT }, 114 { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, 115 { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, 116 { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, 117 { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, 118 { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, 119 { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } 120 121 }; 122 123 int i; 124 125 if (code >= 0) return 0; 126 127 for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) { 128 if (code == o2p[i].onig_err) 129 return o2p[i].posix_err; 130 } 131 132 return REG_EONIG_INTERNAL; /* but, unknown error code */ 133 } 134 135 extern int 136 regcomp(regex_t* reg, const char* pattern, int posix_options) 137 { 138 int r, len; 139 OnigSyntaxType* syntax = OnigDefaultSyntax; 140 OnigOptionType options; 141 142 if ((posix_options & REG_EXTENDED) == 0) 143 syntax = ONIG_SYNTAX_POSIX_BASIC; 144 145 options = syntax->options; 146 if ((posix_options & REG_ICASE) != 0) 147 ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE); 148 if ((posix_options & REG_NEWLINE) != 0) { 149 ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE); 150 ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE); 151 } 152 153 reg->comp_options = posix_options; 154 155 ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len); 156 r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), 157 options, OnigEncDefaultCharEncoding, syntax, 158 (OnigErrorInfo* )NULL); 159 if (r != ONIG_NORMAL) { 160 return onig2posix_error_code(r); 161 } 162 163 reg->re_nsub = ONIG_C(reg)->num_mem; 164 return 0; 165 } 166 167 extern int 168 regexec(regex_t* reg, const char* str, size_t nmatch, 169 regmatch_t pmatch[], int posix_options) 170 { 171 int r, i, len; 172 UChar* end; 173 regmatch_t* pm; 174 OnigOptionType options; 175 176 options = ONIG_OPTION_POSIX_REGION; 177 if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL; 178 if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; 179 180 if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) { 181 pm = (regmatch_t* )NULL; 182 nmatch = 0; 183 } 184 else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) { 185 pm = (regmatch_t* )xmalloc(sizeof(regmatch_t) 186 * (ONIG_C(reg)->num_mem + 1)); 187 if (pm == NULL) 188 return REG_ESPACE; 189 } 190 else { 191 pm = pmatch; 192 } 193 194 ENC_STRING_LEN(ONIG_C(reg)->enc, str, len); 195 end = (UChar* )(str + len); 196 r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, 197 (OnigRegion* )pm, options); 198 199 if (r >= 0) { 200 r = 0; /* Match */ 201 if (pm != pmatch && pm != NULL) { 202 xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch); 203 } 204 } 205 else if (r == ONIG_MISMATCH) { 206 r = REG_NOMATCH; 207 for (i = 0; i < (int )nmatch; i++) 208 pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS; 209 } 210 else { 211 r = onig2posix_error_code(r); 212 } 213 214 if (pm != pmatch && pm != NULL) 215 xfree(pm); 216 217 #if 0 218 if (reg->re_nsub > nmatch - 1) 219 reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1); 220 #endif 221 222 return r; 223 } 224 225 extern void 226 regfree(regex_t* reg) 227 { 228 onig_free(ONIG_C(reg)); 229 } 230 231 232 extern void 233 reg_set_encoding(int mb_code) 234 { 235 OnigEncoding enc; 236 237 switch (mb_code) { 238 case REG_POSIX_ENCODING_ASCII: 239 enc = ONIG_ENCODING_ASCII; 240 break; 241 case REG_POSIX_ENCODING_EUC_JP: 242 enc = ONIG_ENCODING_EUC_JP; 243 break; 244 case REG_POSIX_ENCODING_SJIS: 245 enc = ONIG_ENCODING_SJIS; 246 break; 247 case REG_POSIX_ENCODING_UTF8: 248 enc = ONIG_ENCODING_UTF8; 249 break; 250 case REG_POSIX_ENCODING_UTF16_BE: 251 enc = ONIG_ENCODING_UTF16_BE; 252 break; 253 case REG_POSIX_ENCODING_UTF16_LE: 254 enc = ONIG_ENCODING_UTF16_LE; 255 break; 256 257 default: 258 return ; 259 break; 260 } 261 262 onigenc_set_default_encoding(enc); 263 } 264 265 extern int 266 reg_name_to_group_numbers(regex_t* reg, 267 const unsigned char* name, const unsigned char* name_end, int** nums) 268 { 269 return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); 270 } 271 272 typedef struct { 273 int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*); 274 regex_t* reg; 275 void* arg; 276 } i_wrap; 277 278 static int 279 i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs, 280 onig_regex_t* reg ARG_UNUSED, void* arg) 281 { 282 i_wrap* warg = (i_wrap* )arg; 283 284 return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg); 285 } 286 287 extern int 288 reg_foreach_name(regex_t* reg, 289 int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), 290 void* arg) 291 { 292 i_wrap warg; 293 294 warg.func = func; 295 warg.reg = reg; 296 warg.arg = arg; 297 298 return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg); 299 } 300 301 extern int 302 reg_number_of_names(regex_t* reg) 303 { 304 return onig_number_of_names(ONIG_C(reg)); 305 } 306