1 /* 2 * testRegexp.c: simple module for testing regular expressions 3 * 4 * See Copyright for the status of this software. 5 * 6 * Daniel Veillard <veillard (at) redhat.com> 7 */ 8 9 #include "libxml.h" 10 #ifdef LIBXML_REGEXP_ENABLED 11 #include <string.h> 12 13 #include <libxml/tree.h> 14 #include <libxml/xmlregexp.h> 15 16 static int repeat = 0; 17 static int debug = 0; 18 19 static void testRegexp(xmlRegexpPtr comp, const char *value) { 20 int ret; 21 22 ret = xmlRegexpExec(comp, (const xmlChar *) value); 23 if (ret == 1) 24 printf("%s: Ok\n", value); 25 else if (ret == 0) 26 printf("%s: Fail\n", value); 27 else 28 printf("%s: Error: %d\n", value, ret); 29 if (repeat) { 30 int j; 31 for (j = 0;j < 999999;j++) 32 xmlRegexpExec(comp, (const xmlChar *) value); 33 } 34 } 35 36 static void 37 testRegexpFile(const char *filename) { 38 xmlRegexpPtr comp = NULL; 39 FILE *input; 40 char expression[5000]; 41 int len; 42 43 input = fopen(filename, "r"); 44 if (input == NULL) { 45 xmlGenericError(xmlGenericErrorContext, 46 "Cannot open %s for reading\n", filename); 47 return; 48 } 49 while (fgets(expression, 4500, input) != NULL) { 50 len = strlen(expression); 51 len--; 52 while ((len >= 0) && 53 ((expression[len] == '\n') || (expression[len] == '\t') || 54 (expression[len] == '\r') || (expression[len] == ' '))) len--; 55 expression[len + 1] = 0; 56 if (len >= 0) { 57 if (expression[0] == '#') 58 continue; 59 if ((expression[0] == '=') && (expression[1] == '>')) { 60 char *pattern = &expression[2]; 61 62 if (comp != NULL) { 63 xmlRegFreeRegexp(comp); 64 comp = NULL; 65 } 66 printf("Regexp: %s\n", pattern) ; 67 comp = xmlRegexpCompile((const xmlChar *) pattern); 68 if (comp == NULL) { 69 printf(" failed to compile\n"); 70 break; 71 } 72 } else if (comp == NULL) { 73 printf("Regexp: %s\n", expression) ; 74 comp = xmlRegexpCompile((const xmlChar *) expression); 75 if (comp == NULL) { 76 printf(" failed to compile\n"); 77 break; 78 } 79 } else if (comp != NULL) { 80 testRegexp(comp, expression); 81 } 82 } 83 } 84 fclose(input); 85 if (comp != NULL) 86 xmlRegFreeRegexp(comp); 87 } 88 89 #ifdef LIBXML_EXPR_ENABLED 90 static void 91 runFileTest(xmlExpCtxtPtr ctxt, const char *filename) { 92 xmlExpNodePtr expr = NULL, sub; 93 FILE *input; 94 char expression[5000]; 95 int len; 96 97 input = fopen(filename, "r"); 98 if (input == NULL) { 99 xmlGenericError(xmlGenericErrorContext, 100 "Cannot open %s for reading\n", filename); 101 return; 102 } 103 while (fgets(expression, 4500, input) != NULL) { 104 len = strlen(expression); 105 len--; 106 while ((len >= 0) && 107 ((expression[len] == '\n') || (expression[len] == '\t') || 108 (expression[len] == '\r') || (expression[len] == ' '))) len--; 109 expression[len + 1] = 0; 110 if (len >= 0) { 111 if (expression[0] == '#') 112 continue; 113 if ((expression[0] == '=') && (expression[1] == '>')) { 114 char *str = &expression[2]; 115 116 if (expr != NULL) { 117 xmlExpFree(ctxt, expr); 118 if (xmlExpCtxtNbNodes(ctxt) != 0) 119 printf(" Parse/free of Expression leaked %d\n", 120 xmlExpCtxtNbNodes(ctxt)); 121 expr = NULL; 122 } 123 printf("Expression: %s\n", str) ; 124 expr = xmlExpParse(ctxt, str); 125 if (expr == NULL) { 126 printf(" parsing Failed\n"); 127 break; 128 } 129 } else if (expr != NULL) { 130 int expect = -1; 131 int nodes1, nodes2; 132 133 if (expression[0] == '0') 134 expect = 0; 135 if (expression[0] == '1') 136 expect = 1; 137 printf("Subexp: %s", expression + 2) ; 138 nodes1 = xmlExpCtxtNbNodes(ctxt); 139 sub = xmlExpParse(ctxt, expression + 2); 140 if (sub == NULL) { 141 printf(" parsing Failed\n"); 142 break; 143 } else { 144 int ret; 145 146 nodes2 = xmlExpCtxtNbNodes(ctxt); 147 ret = xmlExpSubsume(ctxt, expr, sub); 148 149 if ((expect == 1) && (ret == 1)) { 150 printf(" => accept, Ok\n"); 151 } else if ((expect == 0) && (ret == 0)) { 152 printf(" => reject, Ok\n"); 153 } else if ((expect == 1) && (ret == 0)) { 154 printf(" => reject, Failed\n"); 155 } else if ((expect == 0) && (ret == 1)) { 156 printf(" => accept, Failed\n"); 157 } else { 158 printf(" => fail internally\n"); 159 } 160 if (xmlExpCtxtNbNodes(ctxt) > nodes2) { 161 printf(" Subsume leaked %d\n", 162 xmlExpCtxtNbNodes(ctxt) - nodes2); 163 nodes1 += xmlExpCtxtNbNodes(ctxt) - nodes2; 164 } 165 xmlExpFree(ctxt, sub); 166 if (xmlExpCtxtNbNodes(ctxt) > nodes1) { 167 printf(" Parse/free leaked %d\n", 168 xmlExpCtxtNbNodes(ctxt) - nodes1); 169 } 170 } 171 172 } 173 } 174 } 175 if (expr != NULL) { 176 xmlExpFree(ctxt, expr); 177 if (xmlExpCtxtNbNodes(ctxt) != 0) 178 printf(" Parse/free of Expression leaked %d\n", 179 xmlExpCtxtNbNodes(ctxt)); 180 } 181 fclose(input); 182 } 183 184 static void 185 testReduce(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr, const char *tst) { 186 xmlBufferPtr xmlExpBuf; 187 xmlExpNodePtr sub, deriv; 188 xmlExpBuf = xmlBufferCreate(); 189 190 sub = xmlExpParse(ctxt, tst); 191 if (sub == NULL) { 192 printf("Subset %s failed to parse\n", tst); 193 return; 194 } 195 xmlExpDump(xmlExpBuf, sub); 196 printf("Subset parsed as: %s\n", 197 (const char *) xmlBufferContent(xmlExpBuf)); 198 deriv = xmlExpExpDerive(ctxt, expr, sub); 199 if (deriv == NULL) { 200 printf("Derivation led to an internal error, report this !\n"); 201 return; 202 } else { 203 xmlBufferEmpty(xmlExpBuf); 204 xmlExpDump(xmlExpBuf, deriv); 205 if (xmlExpIsNillable(deriv)) 206 printf("Resulting nillable derivation: %s\n", 207 (const char *) xmlBufferContent(xmlExpBuf)); 208 else 209 printf("Resulting derivation: %s\n", 210 (const char *) xmlBufferContent(xmlExpBuf)); 211 xmlExpFree(ctxt, deriv); 212 } 213 xmlExpFree(ctxt, sub); 214 } 215 216 static void 217 exprDebug(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr) { 218 xmlBufferPtr xmlExpBuf; 219 xmlExpNodePtr deriv; 220 const char *list[40]; 221 int ret; 222 223 xmlExpBuf = xmlBufferCreate(); 224 225 if (expr == NULL) { 226 printf("Failed to parse\n"); 227 return; 228 } 229 xmlExpDump(xmlExpBuf, expr); 230 printf("Parsed as: %s\n", (const char *) xmlBufferContent(xmlExpBuf)); 231 printf("Max token input = %d\n", xmlExpMaxToken(expr)); 232 if (xmlExpIsNillable(expr) == 1) 233 printf("Is nillable\n"); 234 ret = xmlExpGetLanguage(ctxt, expr, (const xmlChar **) &list[0], 40); 235 if (ret < 0) 236 printf("Failed to get list: %d\n", ret); 237 else { 238 int i; 239 240 printf("Language has %d strings, testing string derivations\n", ret); 241 for (i = 0;i < ret;i++) { 242 deriv = xmlExpStringDerive(ctxt, expr, BAD_CAST list[i], -1); 243 if (deriv == NULL) { 244 printf(" %s -> derivation failed\n", list[i]); 245 } else { 246 xmlBufferEmpty(xmlExpBuf); 247 xmlExpDump(xmlExpBuf, deriv); 248 printf(" %s -> %s\n", list[i], 249 (const char *) xmlBufferContent(xmlExpBuf)); 250 } 251 xmlExpFree(ctxt, deriv); 252 } 253 } 254 xmlBufferFree(xmlExpBuf); 255 } 256 #endif 257 258 static void usage(const char *name) { 259 fprintf(stderr, "Usage: %s [flags]\n", name); 260 fprintf(stderr, "Testing tool for libxml2 string and pattern regexps\n"); 261 fprintf(stderr, " --debug: switch on debugging\n"); 262 fprintf(stderr, " --repeat: loop on the operation\n"); 263 #ifdef LIBXML_EXPR_ENABLED 264 fprintf(stderr, " --expr: test xmlExp and not xmlRegexp\n"); 265 #endif 266 fprintf(stderr, " --input filename: use the given filename for regexp\n"); 267 fprintf(stderr, " --input filename: use the given filename for exp\n"); 268 } 269 270 int main(int argc, char **argv) { 271 xmlRegexpPtr comp = NULL; 272 #ifdef LIBXML_EXPR_ENABLED 273 xmlExpNodePtr expr = NULL; 274 int use_exp = 0; 275 xmlExpCtxtPtr ctxt = NULL; 276 #endif 277 const char *pattern = NULL; 278 char *filename = NULL; 279 int i; 280 281 xmlInitMemory(); 282 283 if (argc <= 1) { 284 usage(argv[0]); 285 return(1); 286 } 287 for (i = 1; i < argc ; i++) { 288 if (!strcmp(argv[i], "-")) 289 break; 290 291 if (argv[i][0] != '-') 292 continue; 293 if (!strcmp(argv[i], "--")) 294 break; 295 296 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) { 297 debug++; 298 } else if ((!strcmp(argv[i], "-repeat")) || 299 (!strcmp(argv[i], "--repeat"))) { 300 repeat++; 301 #ifdef LIBXML_EXPR_ENABLED 302 } else if ((!strcmp(argv[i], "-expr")) || 303 (!strcmp(argv[i], "--expr"))) { 304 use_exp++; 305 #endif 306 } else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "-f")) || 307 (!strcmp(argv[i], "--input"))) 308 filename = argv[++i]; 309 else { 310 fprintf(stderr, "Unknown option %s\n", argv[i]); 311 usage(argv[0]); 312 } 313 } 314 315 #ifdef LIBXML_EXPR_ENABLED 316 if (use_exp) 317 ctxt = xmlExpNewCtxt(0, NULL); 318 #endif 319 320 if (filename != NULL) { 321 #ifdef LIBXML_EXPR_ENABLED 322 if (use_exp) 323 runFileTest(ctxt, filename); 324 else 325 #endif 326 testRegexpFile(filename); 327 } else { 328 int data = 0; 329 #ifdef LIBXML_EXPR_ENABLED 330 331 if (use_exp) { 332 for (i = 1; i < argc ; i++) { 333 if (strcmp(argv[i], "--") == 0) 334 data = 1; 335 else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) || 336 (data == 1)) { 337 if (pattern == NULL) { 338 pattern = argv[i]; 339 printf("Testing expr %s:\n", pattern); 340 expr = xmlExpParse(ctxt, pattern); 341 if (expr == NULL) { 342 printf(" failed to compile\n"); 343 break; 344 } 345 if (debug) { 346 exprDebug(ctxt, expr); 347 } 348 } else { 349 testReduce(ctxt, expr, argv[i]); 350 } 351 } 352 } 353 if (expr != NULL) { 354 xmlExpFree(ctxt, expr); 355 expr = NULL; 356 } 357 } else 358 #endif 359 { 360 for (i = 1; i < argc ; i++) { 361 if (strcmp(argv[i], "--") == 0) 362 data = 1; 363 else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) || 364 (data == 1)) { 365 if (pattern == NULL) { 366 pattern = argv[i]; 367 printf("Testing %s:\n", pattern); 368 comp = xmlRegexpCompile((const xmlChar *) pattern); 369 if (comp == NULL) { 370 printf(" failed to compile\n"); 371 break; 372 } 373 if (debug) 374 xmlRegexpPrint(stdout, comp); 375 } else { 376 testRegexp(comp, argv[i]); 377 } 378 } 379 } 380 if (comp != NULL) 381 xmlRegFreeRegexp(comp); 382 } 383 } 384 #ifdef LIBXML_EXPR_ENABLED 385 if (ctxt != NULL) { 386 printf("Ops: %d nodes, %d cons\n", 387 xmlExpCtxtNbNodes(ctxt), xmlExpCtxtNbCons(ctxt)); 388 xmlExpFreeCtxt(ctxt); 389 } 390 #endif 391 xmlCleanupParser(); 392 xmlMemoryDump(); 393 return(0); 394 } 395 396 #else 397 #include <stdio.h> 398 int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { 399 printf("%s : Regexp support not compiled in\n", argv[0]); 400 return(0); 401 } 402 #endif /* LIBXML_REGEXP_ENABLED */ 403