Home | History | Annotate | Download | only in libxml2
      1 /*
      2  * HTMLtree.c : implementation of access function for an HTML tree.
      3  *
      4  * See Copyright for the status of this software.
      5  *
      6  * daniel (at) veillard.com
      7  */
      8 
      9 
     10 #define IN_LIBXML
     11 #include "libxml.h"
     12 #ifdef LIBXML_HTML_ENABLED
     13 
     14 #include <string.h> /* for memset() only ! */
     15 
     16 #ifdef HAVE_CTYPE_H
     17 #include <ctype.h>
     18 #endif
     19 #ifdef HAVE_STDLIB_H
     20 #include <stdlib.h>
     21 #endif
     22 
     23 #include <libxml/xmlmemory.h>
     24 #include <libxml/HTMLparser.h>
     25 #include <libxml/HTMLtree.h>
     26 #include <libxml/entities.h>
     27 #include <libxml/valid.h>
     28 #include <libxml/xmlerror.h>
     29 #include <libxml/parserInternals.h>
     30 #include <libxml/globals.h>
     31 #include <libxml/uri.h>
     32 
     33 /************************************************************************
     34  *									*
     35  *   		Getting/Setting encoding meta tags			*
     36  *									*
     37  ************************************************************************/
     38 
     39 /**
     40  * htmlGetMetaEncoding:
     41  * @doc:  the document
     42  *
     43  * Encoding definition lookup in the Meta tags
     44  *
     45  * Returns the current encoding as flagged in the HTML source
     46  */
     47 const xmlChar *
     48 htmlGetMetaEncoding(htmlDocPtr doc) {
     49     htmlNodePtr cur;
     50     const xmlChar *content;
     51     const xmlChar *encoding;
     52 
     53     if (doc == NULL)
     54 	return(NULL);
     55     cur = doc->children;
     56 
     57     /*
     58      * Search the html
     59      */
     60     while (cur != NULL) {
     61 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     62 	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
     63 		break;
     64 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
     65 		goto found_head;
     66 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
     67 		goto found_meta;
     68 	}
     69 	cur = cur->next;
     70     }
     71     if (cur == NULL)
     72 	return(NULL);
     73     cur = cur->children;
     74 
     75     /*
     76      * Search the head
     77      */
     78     while (cur != NULL) {
     79 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     80 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
     81 		break;
     82 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
     83 		goto found_meta;
     84 	}
     85 	cur = cur->next;
     86     }
     87     if (cur == NULL)
     88 	return(NULL);
     89 found_head:
     90     cur = cur->children;
     91 
     92     /*
     93      * Search the meta elements
     94      */
     95 found_meta:
     96     while (cur != NULL) {
     97 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     98 	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
     99 		xmlAttrPtr attr = cur->properties;
    100 		int http;
    101 		const xmlChar *value;
    102 
    103 		content = NULL;
    104 		http = 0;
    105 		while (attr != NULL) {
    106 		    if ((attr->children != NULL) &&
    107 		        (attr->children->type == XML_TEXT_NODE) &&
    108 		        (attr->children->next == NULL)) {
    109 			value = attr->children->content;
    110 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
    111 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
    112 			    http = 1;
    113 			else if ((value != NULL)
    114 			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
    115 			    content = value;
    116 			if ((http != 0) && (content != NULL))
    117 			    goto found_content;
    118 		    }
    119 		    attr = attr->next;
    120 		}
    121 	    }
    122 	}
    123 	cur = cur->next;
    124     }
    125     return(NULL);
    126 
    127 found_content:
    128     encoding = xmlStrstr(content, BAD_CAST"charset=");
    129     if (encoding == NULL)
    130 	encoding = xmlStrstr(content, BAD_CAST"Charset=");
    131     if (encoding == NULL)
    132 	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
    133     if (encoding != NULL) {
    134 	encoding += 8;
    135     } else {
    136 	encoding = xmlStrstr(content, BAD_CAST"charset =");
    137 	if (encoding == NULL)
    138 	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
    139 	if (encoding == NULL)
    140 	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
    141 	if (encoding != NULL)
    142 	    encoding += 9;
    143     }
    144     if (encoding != NULL) {
    145 	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
    146     }
    147     return(encoding);
    148 }
    149 
    150 /**
    151  * htmlSetMetaEncoding:
    152  * @doc:  the document
    153  * @encoding:  the encoding string
    154  *
    155  * Sets the current encoding in the Meta tags
    156  * NOTE: this will not change the document content encoding, just
    157  * the META flag associated.
    158  *
    159  * Returns 0 in case of success and -1 in case of error
    160  */
    161 int
    162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
    163     htmlNodePtr cur, meta = NULL, head = NULL;
    164     const xmlChar *content = NULL;
    165     char newcontent[100];
    166 
    167 
    168     if (doc == NULL)
    169 	return(-1);
    170 
    171     /* html isn't a real encoding it's just libxml2 way to get entities */
    172     if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
    173         return(-1);
    174 
    175     if (encoding != NULL) {
    176 	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
    177                 (char *)encoding);
    178 	newcontent[sizeof(newcontent) - 1] = 0;
    179     }
    180 
    181     cur = doc->children;
    182 
    183     /*
    184      * Search the html
    185      */
    186     while (cur != NULL) {
    187 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    188 	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
    189 		break;
    190 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
    191 		goto found_head;
    192 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
    193 		goto found_meta;
    194 	}
    195 	cur = cur->next;
    196     }
    197     if (cur == NULL)
    198 	return(-1);
    199     cur = cur->children;
    200 
    201     /*
    202      * Search the head
    203      */
    204     while (cur != NULL) {
    205 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    206 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
    207 		break;
    208 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
    209                 head = cur->parent;
    210 		goto found_meta;
    211             }
    212 	}
    213 	cur = cur->next;
    214     }
    215     if (cur == NULL)
    216 	return(-1);
    217 found_head:
    218     head = cur;
    219     if (cur->children == NULL)
    220         goto create;
    221     cur = cur->children;
    222 
    223 found_meta:
    224     /*
    225      * Search and update all the remaining the meta elements carrying
    226      * encoding informations
    227      */
    228     while (cur != NULL) {
    229 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    230 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
    231 		xmlAttrPtr attr = cur->properties;
    232 		int http;
    233 		const xmlChar *value;
    234 
    235 		content = NULL;
    236 		http = 0;
    237 		while (attr != NULL) {
    238 		    if ((attr->children != NULL) &&
    239 		        (attr->children->type == XML_TEXT_NODE) &&
    240 		        (attr->children->next == NULL)) {
    241 			value = attr->children->content;
    242 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
    243 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
    244 			    http = 1;
    245 			else
    246                         {
    247                            if ((value != NULL) &&
    248                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
    249 			       content = value;
    250                         }
    251 		        if ((http != 0) && (content != NULL))
    252 			    break;
    253 		    }
    254 		    attr = attr->next;
    255 		}
    256 		if ((http != 0) && (content != NULL)) {
    257 		    meta = cur;
    258 		    break;
    259 		}
    260 
    261 	    }
    262 	}
    263 	cur = cur->next;
    264     }
    265 create:
    266     if (meta == NULL) {
    267         if ((encoding != NULL) && (head != NULL)) {
    268             /*
    269              * Create a new Meta element with the right attributes
    270              */
    271 
    272             meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
    273             if (head->children == NULL)
    274                 xmlAddChild(head, meta);
    275             else
    276                 xmlAddPrevSibling(head->children, meta);
    277             xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
    278             xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
    279         }
    280     } else {
    281         /* change the document only if there is a real encoding change */
    282         if (xmlStrcasestr(content, encoding) == NULL) {
    283             xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
    284         }
    285     }
    286 
    287 
    288     return(0);
    289 }
    290 
    291 /**
    292  * booleanHTMLAttrs:
    293  *
    294  * These are the HTML attributes which will be output
    295  * in minimized form, i.e. <option selected="selected"> will be
    296  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
    297  *
    298  */
    299 static const char* htmlBooleanAttrs[] = {
    300   "checked", "compact", "declare", "defer", "disabled", "ismap",
    301   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
    302   "selected", NULL
    303 };
    304 
    305 
    306 /**
    307  * htmlIsBooleanAttr:
    308  * @name:  the name of the attribute to check
    309  *
    310  * Determine if a given attribute is a boolean attribute.
    311  *
    312  * returns: false if the attribute is not boolean, true otherwise.
    313  */
    314 int
    315 htmlIsBooleanAttr(const xmlChar *name)
    316 {
    317     int i = 0;
    318 
    319     while (htmlBooleanAttrs[i] != NULL) {
    320         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
    321             return 1;
    322         i++;
    323     }
    324     return 0;
    325 }
    326 
    327 #ifdef LIBXML_OUTPUT_ENABLED
    328 /*
    329  * private routine exported from xmlIO.c
    330  */
    331 xmlOutputBufferPtr
    332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
    333 /************************************************************************
    334  *									*
    335  * 			Output error handlers				*
    336  *									*
    337  ************************************************************************/
    338 /**
    339  * htmlSaveErrMemory:
    340  * @extra:  extra informations
    341  *
    342  * Handle an out of memory condition
    343  */
    344 static void
    345 htmlSaveErrMemory(const char *extra)
    346 {
    347     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
    348 }
    349 
    350 /**
    351  * htmlSaveErr:
    352  * @code:  the error number
    353  * @node:  the location of the error.
    354  * @extra:  extra informations
    355  *
    356  * Handle an out of memory condition
    357  */
    358 static void
    359 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
    360 {
    361     const char *msg = NULL;
    362 
    363     switch(code) {
    364         case XML_SAVE_NOT_UTF8:
    365 	    msg = "string is not in UTF-8\n";
    366 	    break;
    367 	case XML_SAVE_CHAR_INVALID:
    368 	    msg = "invalid character value\n";
    369 	    break;
    370 	case XML_SAVE_UNKNOWN_ENCODING:
    371 	    msg = "unknown encoding %s\n";
    372 	    break;
    373 	case XML_SAVE_NO_DOCTYPE:
    374 	    msg = "HTML has no DOCTYPE\n";
    375 	    break;
    376 	default:
    377 	    msg = "unexpected error number\n";
    378     }
    379     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
    380 }
    381 
    382 /************************************************************************
    383  *									*
    384  *   		Dumping HTML tree content to a simple buffer		*
    385  *									*
    386  ************************************************************************/
    387 
    388 static int
    389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
    390 	           int format);
    391 
    392 /**
    393  * htmlNodeDumpFormat:
    394  * @buf:  the HTML buffer output
    395  * @doc:  the document
    396  * @cur:  the current node
    397  * @format:  should formatting spaces been added
    398  *
    399  * Dump an HTML node, recursive behaviour,children are printed too.
    400  *
    401  * Returns the number of byte written or -1 in case of error
    402  */
    403 static int
    404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
    405 	           int format) {
    406     unsigned int use;
    407     int ret;
    408     xmlOutputBufferPtr outbuf;
    409 
    410     if (cur == NULL) {
    411 	return (-1);
    412     }
    413     if (buf == NULL) {
    414 	return (-1);
    415     }
    416     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
    417     if (outbuf == NULL) {
    418         htmlSaveErrMemory("allocating HTML output buffer");
    419 	return (-1);
    420     }
    421     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
    422     outbuf->buffer = buf;
    423     outbuf->encoder = NULL;
    424     outbuf->writecallback = NULL;
    425     outbuf->closecallback = NULL;
    426     outbuf->context = NULL;
    427     outbuf->written = 0;
    428 
    429     use = buf->use;
    430     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
    431     xmlFree(outbuf);
    432     ret = buf->use - use;
    433     return (ret);
    434 }
    435 
    436 /**
    437  * htmlNodeDump:
    438  * @buf:  the HTML buffer output
    439  * @doc:  the document
    440  * @cur:  the current node
    441  *
    442  * Dump an HTML node, recursive behaviour,children are printed too,
    443  * and formatting returns are added.
    444  *
    445  * Returns the number of byte written or -1 in case of error
    446  */
    447 int
    448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
    449     xmlInitParser();
    450 
    451     return(htmlNodeDumpFormat(buf, doc, cur, 1));
    452 }
    453 
    454 /**
    455  * htmlNodeDumpFileFormat:
    456  * @out:  the FILE pointer
    457  * @doc:  the document
    458  * @cur:  the current node
    459  * @encoding: the document encoding
    460  * @format:  should formatting spaces been added
    461  *
    462  * Dump an HTML node, recursive behaviour,children are printed too.
    463  *
    464  * TODO: if encoding == NULL try to save in the doc encoding
    465  *
    466  * returns: the number of byte written or -1 in case of failure.
    467  */
    468 int
    469 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
    470 	               xmlNodePtr cur, const char *encoding, int format) {
    471     xmlOutputBufferPtr buf;
    472     xmlCharEncodingHandlerPtr handler = NULL;
    473     int ret;
    474 
    475     xmlInitParser();
    476 
    477     if (encoding != NULL) {
    478 	xmlCharEncoding enc;
    479 
    480 	enc = xmlParseCharEncoding(encoding);
    481 	if (enc != XML_CHAR_ENCODING_UTF8) {
    482 	    handler = xmlFindCharEncodingHandler(encoding);
    483 	    if (handler == NULL)
    484 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
    485 	}
    486     }
    487 
    488     /*
    489      * Fallback to HTML or ASCII when the encoding is unspecified
    490      */
    491     if (handler == NULL)
    492 	handler = xmlFindCharEncodingHandler("HTML");
    493     if (handler == NULL)
    494 	handler = xmlFindCharEncodingHandler("ascii");
    495 
    496     /*
    497      * save the content to a temp buffer.
    498      */
    499     buf = xmlOutputBufferCreateFile(out, handler);
    500     if (buf == NULL) return(0);
    501 
    502     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
    503 
    504     ret = xmlOutputBufferClose(buf);
    505     return(ret);
    506 }
    507 
    508 /**
    509  * htmlNodeDumpFile:
    510  * @out:  the FILE pointer
    511  * @doc:  the document
    512  * @cur:  the current node
    513  *
    514  * Dump an HTML node, recursive behaviour,children are printed too,
    515  * and formatting returns are added.
    516  */
    517 void
    518 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
    519     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
    520 }
    521 
    522 /**
    523  * htmlDocDumpMemoryFormat:
    524  * @cur:  the document
    525  * @mem:  OUT: the memory pointer
    526  * @size:  OUT: the memory length
    527  * @format:  should formatting spaces been added
    528  *
    529  * Dump an HTML document in memory and return the xmlChar * and it's size.
    530  * It's up to the caller to free the memory.
    531  */
    532 void
    533 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
    534     xmlOutputBufferPtr buf;
    535     xmlCharEncodingHandlerPtr handler = NULL;
    536     const char *encoding;
    537 
    538     xmlInitParser();
    539 
    540     if ((mem == NULL) || (size == NULL))
    541         return;
    542     if (cur == NULL) {
    543 	*mem = NULL;
    544 	*size = 0;
    545 	return;
    546     }
    547 
    548     encoding = (const char *) htmlGetMetaEncoding(cur);
    549 
    550     if (encoding != NULL) {
    551 	xmlCharEncoding enc;
    552 
    553 	enc = xmlParseCharEncoding(encoding);
    554 	if (enc != cur->charset) {
    555 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
    556 		/*
    557 		 * Not supported yet
    558 		 */
    559 		*mem = NULL;
    560 		*size = 0;
    561 		return;
    562 	    }
    563 
    564 	    handler = xmlFindCharEncodingHandler(encoding);
    565 	    if (handler == NULL)
    566                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
    567 
    568 	} else {
    569 	    handler = xmlFindCharEncodingHandler(encoding);
    570 	}
    571     }
    572 
    573     /*
    574      * Fallback to HTML or ASCII when the encoding is unspecified
    575      */
    576     if (handler == NULL)
    577 	handler = xmlFindCharEncodingHandler("HTML");
    578     if (handler == NULL)
    579 	handler = xmlFindCharEncodingHandler("ascii");
    580 
    581     buf = xmlAllocOutputBufferInternal(handler);
    582     if (buf == NULL) {
    583 	*mem = NULL;
    584 	*size = 0;
    585 	return;
    586     }
    587 
    588     htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
    589 
    590     xmlOutputBufferFlush(buf);
    591     if (buf->conv != NULL) {
    592 	*size = buf->conv->use;
    593 	*mem = xmlStrndup(buf->conv->content, *size);
    594     } else {
    595 	*size = buf->buffer->use;
    596 	*mem = xmlStrndup(buf->buffer->content, *size);
    597     }
    598     (void)xmlOutputBufferClose(buf);
    599 }
    600 
    601 /**
    602  * htmlDocDumpMemory:
    603  * @cur:  the document
    604  * @mem:  OUT: the memory pointer
    605  * @size:  OUT: the memory length
    606  *
    607  * Dump an HTML document in memory and return the xmlChar * and it's size.
    608  * It's up to the caller to free the memory.
    609  */
    610 void
    611 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
    612 	htmlDocDumpMemoryFormat(cur, mem, size, 1);
    613 }
    614 
    615 
    616 /************************************************************************
    617  *									*
    618  *   		Dumping HTML tree content to an I/O output buffer	*
    619  *									*
    620  ************************************************************************/
    621 
    622 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
    623 
    624 /**
    625  * htmlDtdDumpOutput:
    626  * @buf:  the HTML buffer output
    627  * @doc:  the document
    628  * @encoding:  the encoding string
    629  *
    630  * TODO: check whether encoding is needed
    631  *
    632  * Dump the HTML document DTD, if any.
    633  */
    634 static void
    635 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    636 	          const char *encoding ATTRIBUTE_UNUSED) {
    637     xmlDtdPtr cur = doc->intSubset;
    638 
    639     if (cur == NULL) {
    640 	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
    641 	return;
    642     }
    643     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
    644     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    645     if (cur->ExternalID != NULL) {
    646 	xmlOutputBufferWriteString(buf, " PUBLIC ");
    647 	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
    648 	if (cur->SystemID != NULL) {
    649 	    xmlOutputBufferWriteString(buf, " ");
    650 	    xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
    651 	}
    652     }  else if (cur->SystemID != NULL) {
    653 	xmlOutputBufferWriteString(buf, " SYSTEM ");
    654 	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
    655     }
    656     xmlOutputBufferWriteString(buf, ">\n");
    657 }
    658 
    659 /**
    660  * htmlAttrDumpOutput:
    661  * @buf:  the HTML buffer output
    662  * @doc:  the document
    663  * @cur:  the attribute pointer
    664  * @encoding:  the encoding string
    665  *
    666  * Dump an HTML attribute
    667  */
    668 static void
    669 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
    670 	           const char *encoding ATTRIBUTE_UNUSED) {
    671     xmlChar *value;
    672 
    673     /*
    674      * TODO: The html output method should not escape a & character
    675      *       occurring in an attribute value immediately followed by
    676      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
    677      */
    678 
    679     if (cur == NULL) {
    680 	return;
    681     }
    682     xmlOutputBufferWriteString(buf, " ");
    683     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    684         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    685 	xmlOutputBufferWriteString(buf, ":");
    686     }
    687     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    688     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
    689 	value = xmlNodeListGetString(doc, cur->children, 0);
    690 	if (value) {
    691 	    xmlOutputBufferWriteString(buf, "=");
    692 	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
    693 		(cur->parent->ns == NULL) &&
    694 		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
    695 	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
    696 		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
    697 		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
    698 		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
    699 		xmlChar *escaped;
    700 		xmlChar *tmp = value;
    701 
    702 		while (IS_BLANK_CH(*tmp)) tmp++;
    703 
    704 		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
    705 		if (escaped != NULL) {
    706 		    xmlBufferWriteQuotedString(buf->buffer, escaped);
    707 		    xmlFree(escaped);
    708 		} else {
    709 		    xmlBufferWriteQuotedString(buf->buffer, value);
    710 		}
    711 	    } else {
    712 		xmlBufferWriteQuotedString(buf->buffer, value);
    713 	    }
    714 	    xmlFree(value);
    715 	} else  {
    716 	    xmlOutputBufferWriteString(buf, "=\"\"");
    717 	}
    718     }
    719 }
    720 
    721 /**
    722  * htmlAttrListDumpOutput:
    723  * @buf:  the HTML buffer output
    724  * @doc:  the document
    725  * @cur:  the first attribute pointer
    726  * @encoding:  the encoding string
    727  *
    728  * Dump a list of HTML attributes
    729  */
    730 static void
    731 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
    732     if (cur == NULL) {
    733 	return;
    734     }
    735     while (cur != NULL) {
    736         htmlAttrDumpOutput(buf, doc, cur, encoding);
    737 	cur = cur->next;
    738     }
    739 }
    740 
    741 
    742 
    743 /**
    744  * htmlNodeListDumpOutput:
    745  * @buf:  the HTML buffer output
    746  * @doc:  the document
    747  * @cur:  the first node
    748  * @encoding:  the encoding string
    749  * @format:  should formatting spaces been added
    750  *
    751  * Dump an HTML node list, recursive behaviour,children are printed too.
    752  */
    753 static void
    754 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    755 	               xmlNodePtr cur, const char *encoding, int format) {
    756     if (cur == NULL) {
    757 	return;
    758     }
    759     while (cur != NULL) {
    760         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
    761 	cur = cur->next;
    762     }
    763 }
    764 
    765 /**
    766  * htmlNodeDumpFormatOutput:
    767  * @buf:  the HTML buffer output
    768  * @doc:  the document
    769  * @cur:  the current node
    770  * @encoding:  the encoding string
    771  * @format:  should formatting spaces been added
    772  *
    773  * Dump an HTML node, recursive behaviour,children are printed too.
    774  */
    775 void
    776 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    777 	                 xmlNodePtr cur, const char *encoding, int format) {
    778     const htmlElemDesc * info;
    779 
    780     xmlInitParser();
    781 
    782     if ((cur == NULL) || (buf == NULL)) {
    783 	return;
    784     }
    785     /*
    786      * Special cases.
    787      */
    788     if (cur->type == XML_DTD_NODE)
    789 	return;
    790     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
    791         (cur->type == XML_DOCUMENT_NODE)){
    792 	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
    793 	return;
    794     }
    795     if (cur->type == XML_ATTRIBUTE_NODE) {
    796         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
    797 	return;
    798     }
    799     if (cur->type == HTML_TEXT_NODE) {
    800 	if (cur->content != NULL) {
    801 	    if (((cur->name == (const xmlChar *)xmlStringText) ||
    802 		 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
    803 		((cur->parent == NULL) ||
    804 		 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
    805 		  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
    806 		xmlChar *buffer;
    807 
    808 		buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
    809 		if (buffer != NULL) {
    810 		    xmlOutputBufferWriteString(buf, (const char *)buffer);
    811 		    xmlFree(buffer);
    812 		}
    813 	    } else {
    814 		xmlOutputBufferWriteString(buf, (const char *)cur->content);
    815 	    }
    816 	}
    817 	return;
    818     }
    819     if (cur->type == HTML_COMMENT_NODE) {
    820 	if (cur->content != NULL) {
    821 	    xmlOutputBufferWriteString(buf, "<!--");
    822 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    823 	    xmlOutputBufferWriteString(buf, "-->");
    824 	}
    825 	return;
    826     }
    827     if (cur->type == HTML_PI_NODE) {
    828 	if (cur->name == NULL)
    829 	    return;
    830 	xmlOutputBufferWriteString(buf, "<?");
    831 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
    832 	if (cur->content != NULL) {
    833 	    xmlOutputBufferWriteString(buf, " ");
    834 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    835 	}
    836 	xmlOutputBufferWriteString(buf, ">");
    837 	return;
    838     }
    839     if (cur->type == HTML_ENTITY_REF_NODE) {
    840         xmlOutputBufferWriteString(buf, "&");
    841 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
    842         xmlOutputBufferWriteString(buf, ";");
    843 	return;
    844     }
    845     if (cur->type == HTML_PRESERVE_NODE) {
    846 	if (cur->content != NULL) {
    847 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    848 	}
    849 	return;
    850     }
    851 
    852     /*
    853      * Get specific HTML info for that node.
    854      */
    855     if (cur->ns == NULL)
    856 	info = htmlTagLookup(cur->name);
    857     else
    858 	info = NULL;
    859 
    860     xmlOutputBufferWriteString(buf, "<");
    861     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    862         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    863 	xmlOutputBufferWriteString(buf, ":");
    864     }
    865     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    866     if (cur->nsDef)
    867 	xmlNsListDumpOutput(buf, cur->nsDef);
    868     if (cur->properties != NULL)
    869         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
    870 
    871     if ((info != NULL) && (info->empty)) {
    872         xmlOutputBufferWriteString(buf, ">");
    873 	if ((format) && (!info->isinline) && (cur->next != NULL)) {
    874 	    if ((cur->next->type != HTML_TEXT_NODE) &&
    875 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
    876 		(cur->parent != NULL) &&
    877 		(cur->parent->name != NULL) &&
    878 		(cur->parent->name[0] != 'p')) /* p, pre, param */
    879 		xmlOutputBufferWriteString(buf, "\n");
    880 	}
    881 	return;
    882     }
    883     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
    884 	(cur->children == NULL)) {
    885         if ((info != NULL) && (info->saveEndTag != 0) &&
    886 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
    887 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
    888 	    xmlOutputBufferWriteString(buf, ">");
    889 	} else {
    890 	    xmlOutputBufferWriteString(buf, "></");
    891             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    892                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    893                 xmlOutputBufferWriteString(buf, ":");
    894             }
    895 	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
    896 	    xmlOutputBufferWriteString(buf, ">");
    897 	}
    898 	if ((format) && (cur->next != NULL) &&
    899             (info != NULL) && (!info->isinline)) {
    900 	    if ((cur->next->type != HTML_TEXT_NODE) &&
    901 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
    902 		(cur->parent != NULL) &&
    903 		(cur->parent->name != NULL) &&
    904 		(cur->parent->name[0] != 'p')) /* p, pre, param */
    905 		xmlOutputBufferWriteString(buf, "\n");
    906 	}
    907 	return;
    908     }
    909     xmlOutputBufferWriteString(buf, ">");
    910     if ((cur->type != XML_ELEMENT_NODE) &&
    911 	(cur->content != NULL)) {
    912 	    /*
    913 	     * Uses the OutputBuffer property to automatically convert
    914 	     * invalids to charrefs
    915 	     */
    916 
    917             xmlOutputBufferWriteString(buf, (const char *) cur->content);
    918     }
    919     if (cur->children != NULL) {
    920         if ((format) && (info != NULL) && (!info->isinline) &&
    921 	    (cur->children->type != HTML_TEXT_NODE) &&
    922 	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
    923 	    (cur->children != cur->last) &&
    924 	    (cur->name != NULL) &&
    925 	    (cur->name[0] != 'p')) /* p, pre, param */
    926 	    xmlOutputBufferWriteString(buf, "\n");
    927 	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
    928         if ((format) && (info != NULL) && (!info->isinline) &&
    929 	    (cur->last->type != HTML_TEXT_NODE) &&
    930 	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
    931 	    (cur->children != cur->last) &&
    932 	    (cur->name != NULL) &&
    933 	    (cur->name[0] != 'p')) /* p, pre, param */
    934 	    xmlOutputBufferWriteString(buf, "\n");
    935     }
    936     xmlOutputBufferWriteString(buf, "</");
    937     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    938         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    939 	xmlOutputBufferWriteString(buf, ":");
    940     }
    941     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    942     xmlOutputBufferWriteString(buf, ">");
    943     if ((format) && (info != NULL) && (!info->isinline) &&
    944 	(cur->next != NULL)) {
    945         if ((cur->next->type != HTML_TEXT_NODE) &&
    946 	    (cur->next->type != HTML_ENTITY_REF_NODE) &&
    947 	    (cur->parent != NULL) &&
    948 	    (cur->parent->name != NULL) &&
    949 	    (cur->parent->name[0] != 'p')) /* p, pre, param */
    950 	    xmlOutputBufferWriteString(buf, "\n");
    951     }
    952 }
    953 
    954 /**
    955  * htmlNodeDumpOutput:
    956  * @buf:  the HTML buffer output
    957  * @doc:  the document
    958  * @cur:  the current node
    959  * @encoding:  the encoding string
    960  *
    961  * Dump an HTML node, recursive behaviour,children are printed too,
    962  * and formatting returns/spaces are added.
    963  */
    964 void
    965 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    966 	           xmlNodePtr cur, const char *encoding) {
    967     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
    968 }
    969 
    970 /**
    971  * htmlDocContentDumpFormatOutput:
    972  * @buf:  the HTML buffer output
    973  * @cur:  the document
    974  * @encoding:  the encoding string
    975  * @format:  should formatting spaces been added
    976  *
    977  * Dump an HTML document.
    978  */
    979 void
    980 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
    981 	                       const char *encoding, int format) {
    982     int type;
    983 
    984     xmlInitParser();
    985 
    986     if ((buf == NULL) || (cur == NULL))
    987         return;
    988 
    989     /*
    990      * force to output the stuff as HTML, especially for entities
    991      */
    992     type = cur->type;
    993     cur->type = XML_HTML_DOCUMENT_NODE;
    994     if (cur->intSubset != NULL) {
    995         htmlDtdDumpOutput(buf, cur, NULL);
    996     }
    997     if (cur->children != NULL) {
    998         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
    999     }
   1000     xmlOutputBufferWriteString(buf, "\n");
   1001     cur->type = (xmlElementType) type;
   1002 }
   1003 
   1004 /**
   1005  * htmlDocContentDumpOutput:
   1006  * @buf:  the HTML buffer output
   1007  * @cur:  the document
   1008  * @encoding:  the encoding string
   1009  *
   1010  * Dump an HTML document. Formating return/spaces are added.
   1011  */
   1012 void
   1013 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
   1014 	                 const char *encoding) {
   1015     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
   1016 }
   1017 
   1018 /************************************************************************
   1019  *									*
   1020  *		Saving functions front-ends				*
   1021  *									*
   1022  ************************************************************************/
   1023 
   1024 /**
   1025  * htmlDocDump:
   1026  * @f:  the FILE*
   1027  * @cur:  the document
   1028  *
   1029  * Dump an HTML document to an open FILE.
   1030  *
   1031  * returns: the number of byte written or -1 in case of failure.
   1032  */
   1033 int
   1034 htmlDocDump(FILE *f, xmlDocPtr cur) {
   1035     xmlOutputBufferPtr buf;
   1036     xmlCharEncodingHandlerPtr handler = NULL;
   1037     const char *encoding;
   1038     int ret;
   1039 
   1040     xmlInitParser();
   1041 
   1042     if ((cur == NULL) || (f == NULL)) {
   1043 	return(-1);
   1044     }
   1045 
   1046     encoding = (const char *) htmlGetMetaEncoding(cur);
   1047 
   1048     if (encoding != NULL) {
   1049 	xmlCharEncoding enc;
   1050 
   1051 	enc = xmlParseCharEncoding(encoding);
   1052 	if (enc != cur->charset) {
   1053 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1054 		/*
   1055 		 * Not supported yet
   1056 		 */
   1057 		return(-1);
   1058 	    }
   1059 
   1060 	    handler = xmlFindCharEncodingHandler(encoding);
   1061 	    if (handler == NULL)
   1062 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
   1063 	} else {
   1064 	    handler = xmlFindCharEncodingHandler(encoding);
   1065 	}
   1066     }
   1067 
   1068     /*
   1069      * Fallback to HTML or ASCII when the encoding is unspecified
   1070      */
   1071     if (handler == NULL)
   1072 	handler = xmlFindCharEncodingHandler("HTML");
   1073     if (handler == NULL)
   1074 	handler = xmlFindCharEncodingHandler("ascii");
   1075 
   1076     buf = xmlOutputBufferCreateFile(f, handler);
   1077     if (buf == NULL) return(-1);
   1078     htmlDocContentDumpOutput(buf, cur, NULL);
   1079 
   1080     ret = xmlOutputBufferClose(buf);
   1081     return(ret);
   1082 }
   1083 
   1084 /**
   1085  * htmlSaveFile:
   1086  * @filename:  the filename (or URL)
   1087  * @cur:  the document
   1088  *
   1089  * Dump an HTML document to a file. If @filename is "-" the stdout file is
   1090  * used.
   1091  * returns: the number of byte written or -1 in case of failure.
   1092  */
   1093 int
   1094 htmlSaveFile(const char *filename, xmlDocPtr cur) {
   1095     xmlOutputBufferPtr buf;
   1096     xmlCharEncodingHandlerPtr handler = NULL;
   1097     const char *encoding;
   1098     int ret;
   1099 
   1100     if ((cur == NULL) || (filename == NULL))
   1101         return(-1);
   1102 
   1103     xmlInitParser();
   1104 
   1105     encoding = (const char *) htmlGetMetaEncoding(cur);
   1106 
   1107     if (encoding != NULL) {
   1108 	xmlCharEncoding enc;
   1109 
   1110 	enc = xmlParseCharEncoding(encoding);
   1111 	if (enc != cur->charset) {
   1112 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1113 		/*
   1114 		 * Not supported yet
   1115 		 */
   1116 		return(-1);
   1117 	    }
   1118 
   1119 	    handler = xmlFindCharEncodingHandler(encoding);
   1120 	    if (handler == NULL)
   1121 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
   1122 	}
   1123     }
   1124 
   1125     /*
   1126      * Fallback to HTML or ASCII when the encoding is unspecified
   1127      */
   1128     if (handler == NULL)
   1129 	handler = xmlFindCharEncodingHandler("HTML");
   1130     if (handler == NULL)
   1131 	handler = xmlFindCharEncodingHandler("ascii");
   1132 
   1133     /*
   1134      * save the content to a temp buffer.
   1135      */
   1136     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
   1137     if (buf == NULL) return(0);
   1138 
   1139     htmlDocContentDumpOutput(buf, cur, NULL);
   1140 
   1141     ret = xmlOutputBufferClose(buf);
   1142     return(ret);
   1143 }
   1144 
   1145 /**
   1146  * htmlSaveFileFormat:
   1147  * @filename:  the filename
   1148  * @cur:  the document
   1149  * @format:  should formatting spaces been added
   1150  * @encoding: the document encoding
   1151  *
   1152  * Dump an HTML document to a file using a given encoding.
   1153  *
   1154  * returns: the number of byte written or -1 in case of failure.
   1155  */
   1156 int
   1157 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
   1158 	           const char *encoding, int format) {
   1159     xmlOutputBufferPtr buf;
   1160     xmlCharEncodingHandlerPtr handler = NULL;
   1161     int ret;
   1162 
   1163     if ((cur == NULL) || (filename == NULL))
   1164         return(-1);
   1165 
   1166     xmlInitParser();
   1167 
   1168     if (encoding != NULL) {
   1169 	xmlCharEncoding enc;
   1170 
   1171 	enc = xmlParseCharEncoding(encoding);
   1172 	if (enc != cur->charset) {
   1173 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1174 		/*
   1175 		 * Not supported yet
   1176 		 */
   1177 		return(-1);
   1178 	    }
   1179 
   1180 	    handler = xmlFindCharEncodingHandler(encoding);
   1181 	    if (handler == NULL)
   1182 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
   1183 	}
   1184         htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
   1185     } else {
   1186 	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
   1187     }
   1188 
   1189     /*
   1190      * Fallback to HTML or ASCII when the encoding is unspecified
   1191      */
   1192     if (handler == NULL)
   1193 	handler = xmlFindCharEncodingHandler("HTML");
   1194     if (handler == NULL)
   1195 	handler = xmlFindCharEncodingHandler("ascii");
   1196 
   1197     /*
   1198      * save the content to a temp buffer.
   1199      */
   1200     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
   1201     if (buf == NULL) return(0);
   1202 
   1203     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
   1204 
   1205     ret = xmlOutputBufferClose(buf);
   1206     return(ret);
   1207 }
   1208 
   1209 /**
   1210  * htmlSaveFileEnc:
   1211  * @filename:  the filename
   1212  * @cur:  the document
   1213  * @encoding: the document encoding
   1214  *
   1215  * Dump an HTML document to a file using a given encoding
   1216  * and formatting returns/spaces are added.
   1217  *
   1218  * returns: the number of byte written or -1 in case of failure.
   1219  */
   1220 int
   1221 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
   1222     return(htmlSaveFileFormat(filename, cur, encoding, 1));
   1223 }
   1224 
   1225 #endif /* LIBXML_OUTPUT_ENABLED */
   1226 
   1227 #define bottom_HTMLtree
   1228 #include "elfgcchack.h"
   1229 #endif /* LIBXML_HTML_ENABLED */
   1230