Home | History | Annotate | Download | only in libxml2
      1 /*
      2  * testHTML.c : a small tester program for HTML input.
      3  *
      4  * See Copyright for the status of this software.
      5  *
      6  * daniel (at) veillard.com
      7  */
      8 
      9 #include "libxml.h"
     10 
     11 #ifdef LIBXML_HTML_ENABLED
     12 
     13 #include <string.h>
     14 #include <stdarg.h>
     15 
     16 
     17 #ifdef HAVE_SYS_TYPES_H
     18 #include <sys/types.h>
     19 #endif
     20 #ifdef HAVE_SYS_STAT_H
     21 #include <sys/stat.h>
     22 #endif
     23 #ifdef HAVE_FCNTL_H
     24 #include <fcntl.h>
     25 #endif
     26 #ifdef HAVE_UNISTD_H
     27 #include <unistd.h>
     28 #endif
     29 #ifdef HAVE_STDLIB_H
     30 #include <stdlib.h>
     31 #endif
     32 
     33 #include <libxml/xmlmemory.h>
     34 #include <libxml/HTMLparser.h>
     35 #include <libxml/HTMLtree.h>
     36 #include <libxml/debugXML.h>
     37 #include <libxml/xmlerror.h>
     38 #include <libxml/globals.h>
     39 
     40 #ifdef LIBXML_DEBUG_ENABLED
     41 static int debug = 0;
     42 #endif
     43 static int copy = 0;
     44 static int sax = 0;
     45 static int repeat = 0;
     46 static int noout = 0;
     47 #ifdef LIBXML_PUSH_ENABLED
     48 static int push = 0;
     49 #endif /* LIBXML_PUSH_ENABLED */
     50 static char *encoding = NULL;
     51 static int options = 0;
     52 
     53 static xmlSAXHandler emptySAXHandlerStruct = {
     54     NULL, /* internalSubset */
     55     NULL, /* isStandalone */
     56     NULL, /* hasInternalSubset */
     57     NULL, /* hasExternalSubset */
     58     NULL, /* resolveEntity */
     59     NULL, /* getEntity */
     60     NULL, /* entityDecl */
     61     NULL, /* notationDecl */
     62     NULL, /* attributeDecl */
     63     NULL, /* elementDecl */
     64     NULL, /* unparsedEntityDecl */
     65     NULL, /* setDocumentLocator */
     66     NULL, /* startDocument */
     67     NULL, /* endDocument */
     68     NULL, /* startElement */
     69     NULL, /* endElement */
     70     NULL, /* reference */
     71     NULL, /* characters */
     72     NULL, /* ignorableWhitespace */
     73     NULL, /* processingInstruction */
     74     NULL, /* comment */
     75     NULL, /* xmlParserWarning */
     76     NULL, /* xmlParserError */
     77     NULL, /* xmlParserError */
     78     NULL, /* getParameterEntity */
     79     NULL, /* cdataBlock */
     80     NULL, /* externalSubset */
     81     1,    /* initialized */
     82     NULL, /* private */
     83     NULL, /* startElementNsSAX2Func */
     84     NULL, /* endElementNsSAX2Func */
     85     NULL  /* xmlStructuredErrorFunc */
     86 };
     87 
     88 static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
     89 extern xmlSAXHandlerPtr debugSAXHandler;
     90 
     91 /************************************************************************
     92  *									*
     93  *				Debug Handlers				*
     94  *									*
     95  ************************************************************************/
     96 
     97 /**
     98  * isStandaloneDebug:
     99  * @ctxt:  An XML parser context
    100  *
    101  * Is this document tagged standalone ?
    102  *
    103  * Returns 1 if true
    104  */
    105 static int
    106 isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
    107 {
    108     fprintf(stdout, "SAX.isStandalone()\n");
    109     return(0);
    110 }
    111 
    112 /**
    113  * hasInternalSubsetDebug:
    114  * @ctxt:  An XML parser context
    115  *
    116  * Does this document has an internal subset
    117  *
    118  * Returns 1 if true
    119  */
    120 static int
    121 hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
    122 {
    123     fprintf(stdout, "SAX.hasInternalSubset()\n");
    124     return(0);
    125 }
    126 
    127 /**
    128  * hasExternalSubsetDebug:
    129  * @ctxt:  An XML parser context
    130  *
    131  * Does this document has an external subset
    132  *
    133  * Returns 1 if true
    134  */
    135 static int
    136 hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
    137 {
    138     fprintf(stdout, "SAX.hasExternalSubset()\n");
    139     return(0);
    140 }
    141 
    142 /**
    143  * hasInternalSubsetDebug:
    144  * @ctxt:  An XML parser context
    145  *
    146  * Does this document has an internal subset
    147  */
    148 static void
    149 internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
    150 	       const xmlChar *ExternalID, const xmlChar *SystemID)
    151 {
    152     fprintf(stdout, "SAX.internalSubset(%s,", name);
    153     if (ExternalID == NULL)
    154 	fprintf(stdout, " ,");
    155     else
    156 	fprintf(stdout, " %s,", ExternalID);
    157     if (SystemID == NULL)
    158 	fprintf(stdout, " )\n");
    159     else
    160 	fprintf(stdout, " %s)\n", SystemID);
    161 }
    162 
    163 /**
    164  * resolveEntityDebug:
    165  * @ctxt:  An XML parser context
    166  * @publicId: The public ID of the entity
    167  * @systemId: The system ID of the entity
    168  *
    169  * Special entity resolver, better left to the parser, it has
    170  * more context than the application layer.
    171  * The default behaviour is to NOT resolve the entities, in that case
    172  * the ENTITY_REF nodes are built in the structure (and the parameter
    173  * values).
    174  *
    175  * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
    176  */
    177 static xmlParserInputPtr
    178 resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
    179 {
    180     /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
    181 
    182 
    183     fprintf(stdout, "SAX.resolveEntity(");
    184     if (publicId != NULL)
    185 	fprintf(stdout, "%s", (char *)publicId);
    186     else
    187 	fprintf(stdout, " ");
    188     if (systemId != NULL)
    189 	fprintf(stdout, ", %s)\n", (char *)systemId);
    190     else
    191 	fprintf(stdout, ", )\n");
    192 /*********
    193     if (systemId != NULL) {
    194         return(xmlNewInputFromFile(ctxt, (char *) systemId));
    195     }
    196  *********/
    197     return(NULL);
    198 }
    199 
    200 /**
    201  * getEntityDebug:
    202  * @ctxt:  An XML parser context
    203  * @name: The entity name
    204  *
    205  * Get an entity by name
    206  *
    207  * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
    208  */
    209 static xmlEntityPtr
    210 getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
    211 {
    212     fprintf(stdout, "SAX.getEntity(%s)\n", name);
    213     return(NULL);
    214 }
    215 
    216 /**
    217  * getParameterEntityDebug:
    218  * @ctxt:  An XML parser context
    219  * @name: The entity name
    220  *
    221  * Get a parameter entity by name
    222  *
    223  * Returns the xmlParserInputPtr
    224  */
    225 static xmlEntityPtr
    226 getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
    227 {
    228     fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
    229     return(NULL);
    230 }
    231 
    232 
    233 /**
    234  * entityDeclDebug:
    235  * @ctxt:  An XML parser context
    236  * @name:  the entity name
    237  * @type:  the entity type
    238  * @publicId: The public ID of the entity
    239  * @systemId: The system ID of the entity
    240  * @content: the entity value (without processing).
    241  *
    242  * An entity definition has been parsed
    243  */
    244 static void
    245 entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
    246           const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
    247 {
    248     fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
    249             name, type, publicId, systemId, content);
    250 }
    251 
    252 /**
    253  * attributeDeclDebug:
    254  * @ctxt:  An XML parser context
    255  * @name:  the attribute name
    256  * @type:  the attribute type
    257  *
    258  * An attribute definition has been parsed
    259  */
    260 static void
    261 attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
    262               int type, int def, const xmlChar *defaultValue,
    263 	      xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
    264 {
    265     fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
    266             elem, name, type, def, defaultValue);
    267 }
    268 
    269 /**
    270  * elementDeclDebug:
    271  * @ctxt:  An XML parser context
    272  * @name:  the element name
    273  * @type:  the element type
    274  * @content: the element value (without processing).
    275  *
    276  * An element definition has been parsed
    277  */
    278 static void
    279 elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
    280 	    xmlElementContentPtr content ATTRIBUTE_UNUSED)
    281 {
    282     fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
    283             name, type);
    284 }
    285 
    286 /**
    287  * notationDeclDebug:
    288  * @ctxt:  An XML parser context
    289  * @name: The name of the notation
    290  * @publicId: The public ID of the entity
    291  * @systemId: The system ID of the entity
    292  *
    293  * What to do when a notation declaration has been parsed.
    294  */
    295 static void
    296 notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
    297 	     const xmlChar *publicId, const xmlChar *systemId)
    298 {
    299     fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
    300             (char *) name, (char *) publicId, (char *) systemId);
    301 }
    302 
    303 /**
    304  * unparsedEntityDeclDebug:
    305  * @ctxt:  An XML parser context
    306  * @name: The name of the entity
    307  * @publicId: The public ID of the entity
    308  * @systemId: The system ID of the entity
    309  * @notationName: the name of the notation
    310  *
    311  * What to do when an unparsed entity declaration is parsed
    312  */
    313 static void
    314 unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
    315 		   const xmlChar *publicId, const xmlChar *systemId,
    316 		   const xmlChar *notationName)
    317 {
    318     fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
    319             (char *) name, (char *) publicId, (char *) systemId,
    320 	    (char *) notationName);
    321 }
    322 
    323 /**
    324  * setDocumentLocatorDebug:
    325  * @ctxt:  An XML parser context
    326  * @loc: A SAX Locator
    327  *
    328  * Receive the document locator at startup, actually xmlDefaultSAXLocator
    329  * Everything is available on the context, so this is useless in our case.
    330  */
    331 static void
    332 setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
    333 {
    334     fprintf(stdout, "SAX.setDocumentLocator()\n");
    335 }
    336 
    337 /**
    338  * startDocumentDebug:
    339  * @ctxt:  An XML parser context
    340  *
    341  * called when the document start being processed.
    342  */
    343 static void
    344 startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
    345 {
    346     fprintf(stdout, "SAX.startDocument()\n");
    347 }
    348 
    349 /**
    350  * endDocumentDebug:
    351  * @ctxt:  An XML parser context
    352  *
    353  * called when the document end has been detected.
    354  */
    355 static void
    356 endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
    357 {
    358     fprintf(stdout, "SAX.endDocument()\n");
    359 }
    360 
    361 /**
    362  * startElementDebug:
    363  * @ctxt:  An XML parser context
    364  * @name:  The element name
    365  *
    366  * called when an opening tag has been processed.
    367  */
    368 static void
    369 startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
    370 {
    371     int i;
    372 
    373     fprintf(stdout, "SAX.startElement(%s", (char *) name);
    374     if (atts != NULL) {
    375         for (i = 0;(atts[i] != NULL);i++) {
    376 	    fprintf(stdout, ", %s", atts[i++]);
    377 	    if (atts[i] != NULL) {
    378 		unsigned char output[40];
    379 		const unsigned char *att = atts[i];
    380 		int outlen, attlen;
    381 	        fprintf(stdout, "='");
    382 		while ((attlen = strlen((char*)att)) > 0) {
    383 		    outlen = sizeof output - 1;
    384 		    htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
    385 		    output[outlen] = 0;
    386 		    fprintf(stdout, "%s", (char *) output);
    387 		    att += attlen;
    388 		}
    389 		fprintf(stdout, "'");
    390 	    }
    391 	}
    392     }
    393     fprintf(stdout, ")\n");
    394 }
    395 
    396 /**
    397  * endElementDebug:
    398  * @ctxt:  An XML parser context
    399  * @name:  The element name
    400  *
    401  * called when the end of an element has been detected.
    402  */
    403 static void
    404 endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
    405 {
    406     fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
    407 }
    408 
    409 /**
    410  * charactersDebug:
    411  * @ctxt:  An XML parser context
    412  * @ch:  a xmlChar string
    413  * @len: the number of xmlChar
    414  *
    415  * receiving some chars from the parser.
    416  * Question: how much at a time ???
    417  */
    418 static void
    419 charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
    420 {
    421     unsigned char output[40];
    422     int inlen = len, outlen = 30;
    423 
    424     htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
    425     output[outlen] = 0;
    426 
    427     fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
    428 }
    429 
    430 /**
    431  * cdataDebug:
    432  * @ctxt:  An XML parser context
    433  * @ch:  a xmlChar string
    434  * @len: the number of xmlChar
    435  *
    436  * receiving some cdata chars from the parser.
    437  * Question: how much at a time ???
    438  */
    439 static void
    440 cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
    441 {
    442     unsigned char output[40];
    443     int inlen = len, outlen = 30;
    444 
    445     htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
    446     output[outlen] = 0;
    447 
    448     fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
    449 }
    450 
    451 /**
    452  * referenceDebug:
    453  * @ctxt:  An XML parser context
    454  * @name:  The entity name
    455  *
    456  * called when an entity reference is detected.
    457  */
    458 static void
    459 referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
    460 {
    461     fprintf(stdout, "SAX.reference(%s)\n", name);
    462 }
    463 
    464 /**
    465  * ignorableWhitespaceDebug:
    466  * @ctxt:  An XML parser context
    467  * @ch:  a xmlChar string
    468  * @start: the first char in the string
    469  * @len: the number of xmlChar
    470  *
    471  * receiving some ignorable whitespaces from the parser.
    472  * Question: how much at a time ???
    473  */
    474 static void
    475 ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
    476 {
    477     char output[40];
    478     int i;
    479 
    480     for (i = 0;(i<len) && (i < 30);i++)
    481 	output[i] = ch[i];
    482     output[i] = 0;
    483 
    484     fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
    485 }
    486 
    487 /**
    488  * processingInstructionDebug:
    489  * @ctxt:  An XML parser context
    490  * @target:  the target name
    491  * @data: the PI data's
    492  * @len: the number of xmlChar
    493  *
    494  * A processing instruction has been parsed.
    495  */
    496 static void
    497 processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
    498                       const xmlChar *data)
    499 {
    500     fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
    501             (char *) target, (char *) data);
    502 }
    503 
    504 /**
    505  * commentDebug:
    506  * @ctxt:  An XML parser context
    507  * @value:  the comment content
    508  *
    509  * A comment has been parsed.
    510  */
    511 static void
    512 commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
    513 {
    514     fprintf(stdout, "SAX.comment(%s)\n", value);
    515 }
    516 
    517 /**
    518  * warningDebug:
    519  * @ctxt:  An XML parser context
    520  * @msg:  the message to display/transmit
    521  * @...:  extra parameters for the message display
    522  *
    523  * Display and format a warning messages, gives file, line, position and
    524  * extra parameters.
    525  */
    526 static void XMLCDECL
    527 warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
    528 {
    529     va_list args;
    530 
    531     va_start(args, msg);
    532     fprintf(stdout, "SAX.warning: ");
    533     vfprintf(stdout, msg, args);
    534     va_end(args);
    535 }
    536 
    537 /**
    538  * errorDebug:
    539  * @ctxt:  An XML parser context
    540  * @msg:  the message to display/transmit
    541  * @...:  extra parameters for the message display
    542  *
    543  * Display and format a error messages, gives file, line, position and
    544  * extra parameters.
    545  */
    546 static void XMLCDECL
    547 errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
    548 {
    549     va_list args;
    550 
    551     va_start(args, msg);
    552     fprintf(stdout, "SAX.error: ");
    553     vfprintf(stdout, msg, args);
    554     va_end(args);
    555 }
    556 
    557 /**
    558  * fatalErrorDebug:
    559  * @ctxt:  An XML parser context
    560  * @msg:  the message to display/transmit
    561  * @...:  extra parameters for the message display
    562  *
    563  * Display and format a fatalError messages, gives file, line, position and
    564  * extra parameters.
    565  */
    566 static void XMLCDECL
    567 fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
    568 {
    569     va_list args;
    570 
    571     va_start(args, msg);
    572     fprintf(stdout, "SAX.fatalError: ");
    573     vfprintf(stdout, msg, args);
    574     va_end(args);
    575 }
    576 
    577 static xmlSAXHandler debugSAXHandlerStruct = {
    578     internalSubsetDebug,
    579     isStandaloneDebug,
    580     hasInternalSubsetDebug,
    581     hasExternalSubsetDebug,
    582     resolveEntityDebug,
    583     getEntityDebug,
    584     entityDeclDebug,
    585     notationDeclDebug,
    586     attributeDeclDebug,
    587     elementDeclDebug,
    588     unparsedEntityDeclDebug,
    589     setDocumentLocatorDebug,
    590     startDocumentDebug,
    591     endDocumentDebug,
    592     startElementDebug,
    593     endElementDebug,
    594     referenceDebug,
    595     charactersDebug,
    596     ignorableWhitespaceDebug,
    597     processingInstructionDebug,
    598     commentDebug,
    599     warningDebug,
    600     errorDebug,
    601     fatalErrorDebug,
    602     getParameterEntityDebug,
    603     cdataDebug,
    604     NULL,
    605     1,
    606     NULL,
    607     NULL,
    608     NULL,
    609     NULL
    610 };
    611 
    612 xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
    613 /************************************************************************
    614  *									*
    615  *				Debug					*
    616  *									*
    617  ************************************************************************/
    618 
    619 static void
    620 parseSAXFile(char *filename) {
    621     htmlDocPtr doc = NULL;
    622 
    623     /*
    624      * Empty callbacks for checking
    625      */
    626 #ifdef LIBXML_PUSH_ENABLED
    627     if (push) {
    628 	FILE *f;
    629 
    630 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
    631 	f = fopen(filename, "rb");
    632 #else
    633 	f = fopen(filename, "r");
    634 #endif
    635 	if (f != NULL) {
    636 	    int res, size = 3;
    637 	    char chars[4096];
    638 	    htmlParserCtxtPtr ctxt;
    639 
    640 	    /* if (repeat) */
    641 		size = 4096;
    642 	    res = fread(chars, 1, 4, f);
    643 	    if (res > 0) {
    644 		ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
    645 			    chars, res, filename, XML_CHAR_ENCODING_NONE);
    646 		while ((res = fread(chars, 1, size, f)) > 0) {
    647 		    htmlParseChunk(ctxt, chars, res, 0);
    648 		}
    649 		htmlParseChunk(ctxt, chars, 0, 1);
    650 		doc = ctxt->myDoc;
    651 		htmlFreeParserCtxt(ctxt);
    652 	    }
    653 	    if (doc != NULL) {
    654 		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
    655 		xmlFreeDoc(doc);
    656 	    }
    657 	    fclose(f);
    658 	}
    659 	if (!noout) {
    660 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
    661 		f = fopen(filename, "rb");
    662 #else
    663 		f = fopen(filename, "r");
    664 #endif
    665 	    if (f != NULL) {
    666 		int res, size = 3;
    667 		char chars[4096];
    668 		htmlParserCtxtPtr ctxt;
    669 
    670 		/* if (repeat) */
    671 		    size = 4096;
    672 		res = fread(chars, 1, 4, f);
    673 		if (res > 0) {
    674 		    ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
    675 				chars, res, filename, XML_CHAR_ENCODING_NONE);
    676 		    while ((res = fread(chars, 1, size, f)) > 0) {
    677 			htmlParseChunk(ctxt, chars, res, 0);
    678 		    }
    679 		    htmlParseChunk(ctxt, chars, 0, 1);
    680 		    doc = ctxt->myDoc;
    681 		    htmlFreeParserCtxt(ctxt);
    682 		}
    683 		if (doc != NULL) {
    684 		    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
    685 		    xmlFreeDoc(doc);
    686 		}
    687 		fclose(f);
    688 	    }
    689 	}
    690     } else {
    691 #endif /* LIBXML_PUSH_ENABLED */
    692 	doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
    693 	if (doc != NULL) {
    694 	    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
    695 	    xmlFreeDoc(doc);
    696 	}
    697 
    698 	if (!noout) {
    699 	    /*
    700 	     * Debug callback
    701 	     */
    702 	    doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
    703 	    if (doc != NULL) {
    704 		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
    705 		xmlFreeDoc(doc);
    706 	    }
    707 	}
    708 #ifdef LIBXML_PUSH_ENABLED
    709     }
    710 #endif /* LIBXML_PUSH_ENABLED */
    711 }
    712 
    713 static void
    714 parseAndPrintFile(char *filename) {
    715     htmlDocPtr doc = NULL;
    716 
    717     /*
    718      * build an HTML tree from a string;
    719      */
    720 #ifdef LIBXML_PUSH_ENABLED
    721     if (push) {
    722 	FILE *f;
    723 
    724 #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
    725 	f = fopen(filename, "rb");
    726 #else
    727 	f = fopen(filename, "r");
    728 #endif
    729 	if (f != NULL) {
    730 	    int res, size = 3;
    731 	    char chars[4096];
    732 	    htmlParserCtxtPtr ctxt;
    733 
    734 	    /* if (repeat) */
    735 		size = 4096;
    736 	    res = fread(chars, 1, 4, f);
    737 	    if (res > 0) {
    738 		ctxt = htmlCreatePushParserCtxt(NULL, NULL,
    739 			    chars, res, filename, XML_CHAR_ENCODING_NONE);
    740 		while ((res = fread(chars, 1, size, f)) > 0) {
    741 		    htmlParseChunk(ctxt, chars, res, 0);
    742 		}
    743 		htmlParseChunk(ctxt, chars, 0, 1);
    744 		doc = ctxt->myDoc;
    745 		htmlFreeParserCtxt(ctxt);
    746 	    }
    747 	    fclose(f);
    748 	}
    749     } else {
    750 	doc = htmlReadFile(filename, NULL, options);
    751     }
    752 #else
    753 	doc = htmlReadFile(filename,NULL,options);
    754 #endif
    755     if (doc == NULL) {
    756         xmlGenericError(xmlGenericErrorContext,
    757 		"Could not parse %s\n", filename);
    758     }
    759 
    760 #ifdef LIBXML_TREE_ENABLED
    761     /*
    762      * test intermediate copy if needed.
    763      */
    764     if (copy) {
    765         htmlDocPtr tmp;
    766 
    767         tmp = doc;
    768 	doc = xmlCopyDoc(doc, 1);
    769 	xmlFreeDoc(tmp);
    770     }
    771 #endif
    772 
    773 #ifdef LIBXML_OUTPUT_ENABLED
    774     /*
    775      * print it.
    776      */
    777     if (!noout) {
    778 #ifdef LIBXML_DEBUG_ENABLED
    779 	if (!debug) {
    780 	    if (encoding)
    781 		htmlSaveFileEnc("-", doc, encoding);
    782 	    else
    783 		htmlDocDump(stdout, doc);
    784 	} else
    785 	    xmlDebugDumpDocument(stdout, doc);
    786 #else
    787 	if (encoding)
    788 	    htmlSaveFileEnc("-", doc, encoding);
    789 	else
    790 	    htmlDocDump(stdout, doc);
    791 #endif
    792     }
    793 #endif /* LIBXML_OUTPUT_ENABLED */
    794 
    795     /*
    796      * free it.
    797      */
    798     xmlFreeDoc(doc);
    799 }
    800 
    801 int main(int argc, char **argv) {
    802     int i, count;
    803     int files = 0;
    804 
    805     for (i = 1; i < argc ; i++) {
    806 #ifdef LIBXML_DEBUG_ENABLED
    807 	if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
    808 	    debug++;
    809 	else
    810 #endif
    811 	    if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
    812 	    copy++;
    813 #ifdef LIBXML_PUSH_ENABLED
    814 	else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
    815 	    push++;
    816 #endif /* LIBXML_PUSH_ENABLED */
    817 	else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
    818 	    sax++;
    819 	else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
    820 	    noout++;
    821 	else if ((!strcmp(argv[i], "-repeat")) ||
    822 	         (!strcmp(argv[i], "--repeat")))
    823 	    repeat++;
    824 	else if ((!strcmp(argv[i], "-encode")) ||
    825 	         (!strcmp(argv[i], "--encode"))) {
    826 	    i++;
    827 	    encoding = argv[i];
    828         }
    829     }
    830     for (i = 1; i < argc ; i++) {
    831 	if ((!strcmp(argv[i], "-encode")) ||
    832 	         (!strcmp(argv[i], "--encode"))) {
    833 	    i++;
    834 	    continue;
    835         }
    836 	if (argv[i][0] != '-') {
    837 	    if (repeat) {
    838 		for (count = 0;count < 100 * repeat;count++) {
    839 		    if (sax)
    840 			parseSAXFile(argv[i]);
    841 		    else
    842 			parseAndPrintFile(argv[i]);
    843 		}
    844 	    } else {
    845 		if (sax)
    846 		    parseSAXFile(argv[i]);
    847 		else
    848 		    parseAndPrintFile(argv[i]);
    849 	    }
    850 	    files ++;
    851 	}
    852     }
    853     if (files == 0) {
    854 	printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
    855 	       argv[0]);
    856 	printf("\tParse the HTML files and output the result of the parsing\n");
    857 #ifdef LIBXML_DEBUG_ENABLED
    858 	printf("\t--debug : dump a debug tree of the in-memory document\n");
    859 #endif
    860 	printf("\t--copy : used to test the internal copy implementation\n");
    861 	printf("\t--sax : debug the sequence of SAX callbacks\n");
    862 	printf("\t--repeat : parse the file 100 times, for timing\n");
    863 	printf("\t--noout : do not print the result\n");
    864 #ifdef LIBXML_PUSH_ENABLED
    865 	printf("\t--push : use the push mode parser\n");
    866 #endif /* LIBXML_PUSH_ENABLED */
    867 	printf("\t--encode encoding : output in the given encoding\n");
    868     }
    869     xmlCleanupParser();
    870     xmlMemoryDump();
    871 
    872     return(0);
    873 }
    874 #else /* !LIBXML_HTML_ENABLED */
    875 #include <stdio.h>
    876 int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
    877     printf("%s : HTML support not compiled in\n", argv[0]);
    878     return(0);
    879 }
    880 #endif
    881