1 /* Regular expression tests. 2 Copyright (C) 2002, 2003 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 Contributed by Jakub Jelinek <jakub (at) redhat.com>, 2002. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. */ 20 21 #ifdef HAVE_CONFIG_H 22 #include "config.h" 23 #endif 24 25 #include <sys/types.h> 26 #ifdef HAVE_MCHECK_H 27 #include <mcheck.h> 28 #endif 29 #include <regex.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 33 /* Tests supposed to match. */ 34 struct 35 { 36 const char *pattern; 37 const char *string; 38 int flags, nmatch; 39 regmatch_t rm[5]; 40 } tests[] = { 41 /* Test for newline handling in regex. */ 42 { "[^~]*~", "\nx~y", 0, 2, { { 0, 3 }, { -1, -1 } } }, 43 /* Other tests. */ 44 { "a(.*)b", "a b", REG_EXTENDED, 2, { { 0, 3 }, { 1, 2 } } }, 45 { ".*|\\([KIO]\\)\\([^|]*\\).*|?[KIO]", "10~.~|P|K0|I10|O16|?KSb", 0, 3, 46 { { 0, 21 }, { 15, 16 }, { 16, 18 } } }, 47 { ".*|\\([KIO]\\)\\([^|]*\\).*|?\\1", "10~.~|P|K0|I10|O16|?KSb", 0, 3, 48 { { 0, 21 }, { 8, 9 }, { 9, 10 } } }, 49 { "^\\(a*\\)\\1\\{9\\}\\(a\\{0,9\\}\\)\\([0-9]*;.*[^a]\\2\\([0-9]\\)\\)", 50 "a1;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9aa2aa1a0", 0, 51 5, { { 0, 67 }, { 0, 0 }, { 0, 1 }, { 1, 67 }, { 66, 67 } } }, 52 /* Test for BRE expression anchoring. POSIX says just that this may match; 53 in glibc regex it always matched, so avoid changing it. */ 54 { "\\(^\\|foo\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } }, 55 { "\\(foo\\|^\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } }, 56 /* In ERE this must be treated as an anchor. */ 57 { "(^|foo)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, 58 { "(foo|^)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, 59 /* Here ^ cannot be treated as an anchor according to POSIX. */ 60 { "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, 61 { "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, 62 /* More tests on backreferences. */ 63 { "()\\1", "x", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, 64 { "()x\\1", "x", REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } }, 65 { "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, 66 { "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, 67 { "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, 68 { "(b)()c\\1", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } }, 69 { "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, 70 { "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } }, 71 { "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, 72 { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, 73 { "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } }, 74 { "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, 75 { "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } }, 76 { "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } }, 77 { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } }, 78 { "^(.?)(.?)(.?)(.?)(.?).?\\5\\4\\3\\2\\1$", 79 "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, 80 { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", 81 "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, 82 { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", 83 "abcdedcba", REG_EXTENDED, 1, { { 0, 9 } } }, 84 #if 0 85 /* XXX Not used since they fail so far. */ 86 { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$", 87 "ababababa", REG_EXTENDED, 1, { { 0, 9 } } }, 88 { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$", 89 "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, 90 { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$", 91 "ababababa", REG_EXTENDED, 1, { { 0, 9 } } }, 92 #endif 93 }; 94 95 int 96 main (void) 97 { 98 regex_t re; 99 regmatch_t rm[5]; 100 size_t i; 101 int n, ret = 0; 102 103 #ifdef HAVE_MCHECK_H 104 mtrace (); 105 #endif 106 107 for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i) 108 { 109 n = regcomp (&re, tests[i].pattern, tests[i].flags); 110 if (n != 0) 111 { 112 char buf[500]; 113 regerror (n, &re, buf, sizeof (buf)); 114 printf ("%s: regcomp %lu failed: %s\n", tests[i].pattern, i, buf); 115 ret = 1; 116 continue; 117 } 118 119 if (regexec (&re, tests[i].string, tests[i].nmatch, rm, 0)) 120 { 121 printf ("%s: regexec %lu failed\n", tests[i].pattern, i); 122 ret = 1; 123 regfree (&re); 124 continue; 125 } 126 127 for (n = 0; n < tests[i].nmatch; ++n) 128 if (rm[n].rm_so != tests[i].rm[n].rm_so 129 || rm[n].rm_eo != tests[i].rm[n].rm_eo) 130 { 131 if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1) 132 break; 133 printf ("%s: regexec %lu match failure rm[%d] %d..%d\n", 134 tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo); 135 ret = 1; 136 break; 137 } 138 139 regfree (&re); 140 } 141 142 return ret; 143 } 144