Home | History | Annotate | Download | only in libxml2
      1 /*
      2  * HTMLtree.c : implementation of access function for an HTML tree.
      3  *
      4  * See Copyright for the status of this software.
      5  *
      6  * daniel (at) veillard.com
      7  */
      8 
      9 
     10 #define IN_LIBXML
     11 #include "libxml.h"
     12 #ifdef LIBXML_HTML_ENABLED
     13 
     14 #include <string.h> /* for memset() only ! */
     15 
     16 #ifdef HAVE_CTYPE_H
     17 #include <ctype.h>
     18 #endif
     19 #ifdef HAVE_STDLIB_H
     20 #include <stdlib.h>
     21 #endif
     22 
     23 #include <libxml/xmlmemory.h>
     24 #include <libxml/HTMLparser.h>
     25 #include <libxml/HTMLtree.h>
     26 #include <libxml/entities.h>
     27 #include <libxml/valid.h>
     28 #include <libxml/xmlerror.h>
     29 #include <libxml/parserInternals.h>
     30 #include <libxml/globals.h>
     31 #include <libxml/uri.h>
     32 
     33 /************************************************************************
     34  *									*
     35  *   		Getting/Setting encoding meta tags			*
     36  *									*
     37  ************************************************************************/
     38 
     39 /**
     40  * htmlGetMetaEncoding:
     41  * @doc:  the document
     42  *
     43  * Encoding definition lookup in the Meta tags
     44  *
     45  * Returns the current encoding as flagged in the HTML source
     46  */
     47 const xmlChar *
     48 htmlGetMetaEncoding(htmlDocPtr doc) {
     49     htmlNodePtr cur;
     50     const xmlChar *content;
     51     const xmlChar *encoding;
     52 
     53     if (doc == NULL)
     54 	return(NULL);
     55     cur = doc->children;
     56 
     57     /*
     58      * Search the html
     59      */
     60     while (cur != NULL) {
     61 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     62 	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
     63 		break;
     64 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
     65 		goto found_head;
     66 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
     67 		goto found_meta;
     68 	}
     69 	cur = cur->next;
     70     }
     71     if (cur == NULL)
     72 	return(NULL);
     73     cur = cur->children;
     74 
     75     /*
     76      * Search the head
     77      */
     78     while (cur != NULL) {
     79 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     80 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
     81 		break;
     82 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
     83 		goto found_meta;
     84 	}
     85 	cur = cur->next;
     86     }
     87     if (cur == NULL)
     88 	return(NULL);
     89 found_head:
     90     cur = cur->children;
     91 
     92     /*
     93      * Search the meta elements
     94      */
     95 found_meta:
     96     while (cur != NULL) {
     97 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     98 	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
     99 		xmlAttrPtr attr = cur->properties;
    100 		int http;
    101 		const xmlChar *value;
    102 
    103 		content = NULL;
    104 		http = 0;
    105 		while (attr != NULL) {
    106 		    if ((attr->children != NULL) &&
    107 		        (attr->children->type == XML_TEXT_NODE) &&
    108 		        (attr->children->next == NULL)) {
    109 			value = attr->children->content;
    110 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
    111 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
    112 			    http = 1;
    113 			else if ((value != NULL)
    114 			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
    115 			    content = value;
    116 			if ((http != 0) && (content != NULL))
    117 			    goto found_content;
    118 		    }
    119 		    attr = attr->next;
    120 		}
    121 	    }
    122 	}
    123 	cur = cur->next;
    124     }
    125     return(NULL);
    126 
    127 found_content:
    128     encoding = xmlStrstr(content, BAD_CAST"charset=");
    129     if (encoding == NULL)
    130 	encoding = xmlStrstr(content, BAD_CAST"Charset=");
    131     if (encoding == NULL)
    132 	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
    133     if (encoding != NULL) {
    134 	encoding += 8;
    135     } else {
    136 	encoding = xmlStrstr(content, BAD_CAST"charset =");
    137 	if (encoding == NULL)
    138 	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
    139 	if (encoding == NULL)
    140 	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
    141 	if (encoding != NULL)
    142 	    encoding += 9;
    143     }
    144     if (encoding != NULL) {
    145 	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
    146     }
    147     return(encoding);
    148 }
    149 
    150 /**
    151  * htmlSetMetaEncoding:
    152  * @doc:  the document
    153  * @encoding:  the encoding string
    154  *
    155  * Sets the current encoding in the Meta tags
    156  * NOTE: this will not change the document content encoding, just
    157  * the META flag associated.
    158  *
    159  * Returns 0 in case of success and -1 in case of error
    160  */
    161 int
    162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
    163     htmlNodePtr cur, meta;
    164     const xmlChar *content;
    165     char newcontent[100];
    166 
    167 
    168     if (doc == NULL)
    169 	return(-1);
    170 
    171     if (encoding != NULL) {
    172 	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
    173                 (char *)encoding);
    174 	newcontent[sizeof(newcontent) - 1] = 0;
    175     }
    176 
    177     cur = doc->children;
    178 
    179     /*
    180      * Search the html
    181      */
    182     while (cur != NULL) {
    183 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    184 	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
    185 		break;
    186 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
    187 		goto found_head;
    188 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
    189 		goto found_meta;
    190 	}
    191 	cur = cur->next;
    192     }
    193     if (cur == NULL)
    194 	return(-1);
    195     cur = cur->children;
    196 
    197     /*
    198      * Search the head
    199      */
    200     while (cur != NULL) {
    201 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    202 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
    203 		break;
    204 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
    205 		goto found_meta;
    206 	}
    207 	cur = cur->next;
    208     }
    209     if (cur == NULL)
    210 	return(-1);
    211 found_head:
    212     if (cur->children == NULL) {
    213 	if (encoding == NULL)
    214 	    return(0);
    215 	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
    216 	xmlAddChild(cur, meta);
    217 	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
    218 	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
    219 	return(0);
    220     }
    221     cur = cur->children;
    222 
    223 found_meta:
    224     if (encoding != NULL) {
    225 	/*
    226 	 * Create a new Meta element with the right attributes
    227 	 */
    228 
    229 	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
    230 	xmlAddPrevSibling(cur, meta);
    231 	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
    232 	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
    233     }
    234 
    235     /*
    236      * Search and destroy all the remaining the meta elements carrying
    237      * encoding informations
    238      */
    239     while (cur != NULL) {
    240 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    241 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
    242 		xmlAttrPtr attr = cur->properties;
    243 		int http;
    244 		const xmlChar *value;
    245 
    246 		content = NULL;
    247 		http = 0;
    248 		while (attr != NULL) {
    249 		    if ((attr->children != NULL) &&
    250 		        (attr->children->type == XML_TEXT_NODE) &&
    251 		        (attr->children->next == NULL)) {
    252 			value = attr->children->content;
    253 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
    254 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
    255 			    http = 1;
    256 			else
    257                         {
    258                            if ((value != NULL) &&
    259 				(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
    260 			      content = value;
    261                         }
    262 		        if ((http != 0) && (content != NULL))
    263 			    break;
    264 		    }
    265 		    attr = attr->next;
    266 		}
    267 		if ((http != 0) && (content != NULL)) {
    268 		    meta = cur;
    269 		    cur = cur->next;
    270 		    xmlUnlinkNode(meta);
    271                     xmlFreeNode(meta);
    272 		    continue;
    273 		}
    274 
    275 	    }
    276 	}
    277 	cur = cur->next;
    278     }
    279     return(0);
    280 }
    281 
    282 /**
    283  * booleanHTMLAttrs:
    284  *
    285  * These are the HTML attributes which will be output
    286  * in minimized form, i.e. <option selected="selected"> will be
    287  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
    288  *
    289  */
    290 static const char* htmlBooleanAttrs[] = {
    291   "checked", "compact", "declare", "defer", "disabled", "ismap",
    292   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
    293   "selected", NULL
    294 };
    295 
    296 
    297 /**
    298  * htmlIsBooleanAttr:
    299  * @name:  the name of the attribute to check
    300  *
    301  * Determine if a given attribute is a boolean attribute.
    302  *
    303  * returns: false if the attribute is not boolean, true otherwise.
    304  */
    305 int
    306 htmlIsBooleanAttr(const xmlChar *name)
    307 {
    308     int i = 0;
    309 
    310     while (htmlBooleanAttrs[i] != NULL) {
    311         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
    312             return 1;
    313         i++;
    314     }
    315     return 0;
    316 }
    317 
    318 #ifdef LIBXML_OUTPUT_ENABLED
    319 /*
    320  * private routine exported from xmlIO.c
    321  */
    322 xmlOutputBufferPtr
    323 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
    324 /************************************************************************
    325  *									*
    326  * 			Output error handlers				*
    327  *									*
    328  ************************************************************************/
    329 /**
    330  * htmlSaveErrMemory:
    331  * @extra:  extra informations
    332  *
    333  * Handle an out of memory condition
    334  */
    335 static void
    336 htmlSaveErrMemory(const char *extra)
    337 {
    338     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
    339 }
    340 
    341 /**
    342  * htmlSaveErr:
    343  * @code:  the error number
    344  * @node:  the location of the error.
    345  * @extra:  extra informations
    346  *
    347  * Handle an out of memory condition
    348  */
    349 static void
    350 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
    351 {
    352     const char *msg = NULL;
    353 
    354     switch(code) {
    355         case XML_SAVE_NOT_UTF8:
    356 	    msg = "string is not in UTF-8\n";
    357 	    break;
    358 	case XML_SAVE_CHAR_INVALID:
    359 	    msg = "invalid character value\n";
    360 	    break;
    361 	case XML_SAVE_UNKNOWN_ENCODING:
    362 	    msg = "unknown encoding %s\n";
    363 	    break;
    364 	case XML_SAVE_NO_DOCTYPE:
    365 	    msg = "HTML has no DOCTYPE\n";
    366 	    break;
    367 	default:
    368 	    msg = "unexpected error number\n";
    369     }
    370     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
    371 }
    372 
    373 /************************************************************************
    374  *									*
    375  *   		Dumping HTML tree content to a simple buffer		*
    376  *									*
    377  ************************************************************************/
    378 
    379 static int
    380 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
    381 	           int format);
    382 
    383 /**
    384  * htmlNodeDumpFormat:
    385  * @buf:  the HTML buffer output
    386  * @doc:  the document
    387  * @cur:  the current node
    388  * @format:  should formatting spaces been added
    389  *
    390  * Dump an HTML node, recursive behaviour,children are printed too.
    391  *
    392  * Returns the number of byte written or -1 in case of error
    393  */
    394 static int
    395 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
    396 	           int format) {
    397     unsigned int use;
    398     int ret;
    399     xmlOutputBufferPtr outbuf;
    400 
    401     if (cur == NULL) {
    402 	return (-1);
    403     }
    404     if (buf == NULL) {
    405 	return (-1);
    406     }
    407     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
    408     if (outbuf == NULL) {
    409         htmlSaveErrMemory("allocating HTML output buffer");
    410 	return (-1);
    411     }
    412     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
    413     outbuf->buffer = buf;
    414     outbuf->encoder = NULL;
    415     outbuf->writecallback = NULL;
    416     outbuf->closecallback = NULL;
    417     outbuf->context = NULL;
    418     outbuf->written = 0;
    419 
    420     use = buf->use;
    421     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
    422     xmlFree(outbuf);
    423     ret = buf->use - use;
    424     return (ret);
    425 }
    426 
    427 /**
    428  * htmlNodeDump:
    429  * @buf:  the HTML buffer output
    430  * @doc:  the document
    431  * @cur:  the current node
    432  *
    433  * Dump an HTML node, recursive behaviour,children are printed too,
    434  * and formatting returns are added.
    435  *
    436  * Returns the number of byte written or -1 in case of error
    437  */
    438 int
    439 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
    440     xmlInitParser();
    441 
    442     return(htmlNodeDumpFormat(buf, doc, cur, 1));
    443 }
    444 
    445 /**
    446  * htmlNodeDumpFileFormat:
    447  * @out:  the FILE pointer
    448  * @doc:  the document
    449  * @cur:  the current node
    450  * @encoding: the document encoding
    451  * @format:  should formatting spaces been added
    452  *
    453  * Dump an HTML node, recursive behaviour,children are printed too.
    454  *
    455  * TODO: if encoding == NULL try to save in the doc encoding
    456  *
    457  * returns: the number of byte written or -1 in case of failure.
    458  */
    459 int
    460 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
    461 	               xmlNodePtr cur, const char *encoding, int format) {
    462     xmlOutputBufferPtr buf;
    463     xmlCharEncodingHandlerPtr handler = NULL;
    464     int ret;
    465 
    466     xmlInitParser();
    467 
    468     if (encoding != NULL) {
    469 	xmlCharEncoding enc;
    470 
    471 	enc = xmlParseCharEncoding(encoding);
    472 	if (enc != XML_CHAR_ENCODING_UTF8) {
    473 	    handler = xmlFindCharEncodingHandler(encoding);
    474 	    if (handler == NULL)
    475 		return(-1);
    476 	}
    477     }
    478 
    479     /*
    480      * Fallback to HTML or ASCII when the encoding is unspecified
    481      */
    482     if (handler == NULL)
    483 	handler = xmlFindCharEncodingHandler("HTML");
    484     if (handler == NULL)
    485 	handler = xmlFindCharEncodingHandler("ascii");
    486 
    487     /*
    488      * save the content to a temp buffer.
    489      */
    490     buf = xmlOutputBufferCreateFile(out, handler);
    491     if (buf == NULL) return(0);
    492 
    493     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
    494 
    495     ret = xmlOutputBufferClose(buf);
    496     return(ret);
    497 }
    498 
    499 /**
    500  * htmlNodeDumpFile:
    501  * @out:  the FILE pointer
    502  * @doc:  the document
    503  * @cur:  the current node
    504  *
    505  * Dump an HTML node, recursive behaviour,children are printed too,
    506  * and formatting returns are added.
    507  */
    508 void
    509 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
    510     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
    511 }
    512 
    513 /**
    514  * htmlDocDumpMemoryFormat:
    515  * @cur:  the document
    516  * @mem:  OUT: the memory pointer
    517  * @size:  OUT: the memory length
    518  * @format:  should formatting spaces been added
    519  *
    520  * Dump an HTML document in memory and return the xmlChar * and it's size.
    521  * It's up to the caller to free the memory.
    522  */
    523 void
    524 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
    525     xmlOutputBufferPtr buf;
    526     xmlCharEncodingHandlerPtr handler = NULL;
    527     const char *encoding;
    528 
    529     xmlInitParser();
    530 
    531     if ((mem == NULL) || (size == NULL))
    532         return;
    533     if (cur == NULL) {
    534 	*mem = NULL;
    535 	*size = 0;
    536 	return;
    537     }
    538 
    539     encoding = (const char *) htmlGetMetaEncoding(cur);
    540 
    541     if (encoding != NULL) {
    542 	xmlCharEncoding enc;
    543 
    544 	enc = xmlParseCharEncoding(encoding);
    545 	if (enc != cur->charset) {
    546 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
    547 		/*
    548 		 * Not supported yet
    549 		 */
    550 		*mem = NULL;
    551 		*size = 0;
    552 		return;
    553 	    }
    554 
    555 	    handler = xmlFindCharEncodingHandler(encoding);
    556 	    if (handler == NULL) {
    557 		*mem = NULL;
    558 		*size = 0;
    559 		return;
    560 	    }
    561 	} else {
    562 	    handler = xmlFindCharEncodingHandler(encoding);
    563 	}
    564     }
    565 
    566     /*
    567      * Fallback to HTML or ASCII when the encoding is unspecified
    568      */
    569     if (handler == NULL)
    570 	handler = xmlFindCharEncodingHandler("HTML");
    571     if (handler == NULL)
    572 	handler = xmlFindCharEncodingHandler("ascii");
    573 
    574     buf = xmlAllocOutputBufferInternal(handler);
    575     if (buf == NULL) {
    576 	*mem = NULL;
    577 	*size = 0;
    578 	return;
    579     }
    580 
    581 	htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
    582 
    583     xmlOutputBufferFlush(buf);
    584     if (buf->conv != NULL) {
    585 	*size = buf->conv->use;
    586 	*mem = xmlStrndup(buf->conv->content, *size);
    587     } else {
    588 	*size = buf->buffer->use;
    589 	*mem = xmlStrndup(buf->buffer->content, *size);
    590     }
    591     (void)xmlOutputBufferClose(buf);
    592 }
    593 
    594 /**
    595  * htmlDocDumpMemory:
    596  * @cur:  the document
    597  * @mem:  OUT: the memory pointer
    598  * @size:  OUT: the memory length
    599  *
    600  * Dump an HTML document in memory and return the xmlChar * and it's size.
    601  * It's up to the caller to free the memory.
    602  */
    603 void
    604 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
    605 	htmlDocDumpMemoryFormat(cur, mem, size, 1);
    606 }
    607 
    608 
    609 /************************************************************************
    610  *									*
    611  *   		Dumping HTML tree content to an I/O output buffer	*
    612  *									*
    613  ************************************************************************/
    614 
    615 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
    616 
    617 /**
    618  * htmlDtdDumpOutput:
    619  * @buf:  the HTML buffer output
    620  * @doc:  the document
    621  * @encoding:  the encoding string
    622  *
    623  * TODO: check whether encoding is needed
    624  *
    625  * Dump the HTML document DTD, if any.
    626  */
    627 static void
    628 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    629 	          const char *encoding ATTRIBUTE_UNUSED) {
    630     xmlDtdPtr cur = doc->intSubset;
    631 
    632     if (cur == NULL) {
    633 	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
    634 	return;
    635     }
    636     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
    637     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    638     if (cur->ExternalID != NULL) {
    639 	xmlOutputBufferWriteString(buf, " PUBLIC ");
    640 	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
    641 	if (cur->SystemID != NULL) {
    642 	    xmlOutputBufferWriteString(buf, " ");
    643 	    xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
    644 	}
    645     }  else if (cur->SystemID != NULL) {
    646 	xmlOutputBufferWriteString(buf, " SYSTEM ");
    647 	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
    648     }
    649     xmlOutputBufferWriteString(buf, ">\n");
    650 }
    651 
    652 /**
    653  * htmlAttrDumpOutput:
    654  * @buf:  the HTML buffer output
    655  * @doc:  the document
    656  * @cur:  the attribute pointer
    657  * @encoding:  the encoding string
    658  *
    659  * Dump an HTML attribute
    660  */
    661 static void
    662 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
    663 	           const char *encoding ATTRIBUTE_UNUSED) {
    664     xmlChar *value;
    665 
    666     /*
    667      * TODO: The html output method should not escape a & character
    668      *       occurring in an attribute value immediately followed by
    669      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
    670      */
    671 
    672     if (cur == NULL) {
    673 	return;
    674     }
    675     xmlOutputBufferWriteString(buf, " ");
    676     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    677         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    678 	xmlOutputBufferWriteString(buf, ":");
    679     }
    680     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    681     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
    682 	value = xmlNodeListGetString(doc, cur->children, 0);
    683 	if (value) {
    684 	    xmlOutputBufferWriteString(buf, "=");
    685 	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
    686 		(cur->parent->ns == NULL) &&
    687 		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
    688 	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
    689 		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
    690 		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
    691 		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
    692 		xmlChar *escaped;
    693 		xmlChar *tmp = value;
    694 
    695 		while (IS_BLANK_CH(*tmp)) tmp++;
    696 
    697 		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
    698 		if (escaped != NULL) {
    699 		    xmlBufferWriteQuotedString(buf->buffer, escaped);
    700 		    xmlFree(escaped);
    701 		} else {
    702 		    xmlBufferWriteQuotedString(buf->buffer, value);
    703 		}
    704 	    } else {
    705 		xmlBufferWriteQuotedString(buf->buffer, value);
    706 	    }
    707 	    xmlFree(value);
    708 	} else  {
    709 	    xmlOutputBufferWriteString(buf, "=\"\"");
    710 	}
    711     }
    712 }
    713 
    714 /**
    715  * htmlAttrListDumpOutput:
    716  * @buf:  the HTML buffer output
    717  * @doc:  the document
    718  * @cur:  the first attribute pointer
    719  * @encoding:  the encoding string
    720  *
    721  * Dump a list of HTML attributes
    722  */
    723 static void
    724 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
    725     if (cur == NULL) {
    726 	return;
    727     }
    728     while (cur != NULL) {
    729         htmlAttrDumpOutput(buf, doc, cur, encoding);
    730 	cur = cur->next;
    731     }
    732 }
    733 
    734 
    735 
    736 /**
    737  * htmlNodeListDumpOutput:
    738  * @buf:  the HTML buffer output
    739  * @doc:  the document
    740  * @cur:  the first node
    741  * @encoding:  the encoding string
    742  * @format:  should formatting spaces been added
    743  *
    744  * Dump an HTML node list, recursive behaviour,children are printed too.
    745  */
    746 static void
    747 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    748 	               xmlNodePtr cur, const char *encoding, int format) {
    749     if (cur == NULL) {
    750 	return;
    751     }
    752     while (cur != NULL) {
    753         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
    754 	cur = cur->next;
    755     }
    756 }
    757 
    758 /**
    759  * htmlNodeDumpFormatOutput:
    760  * @buf:  the HTML buffer output
    761  * @doc:  the document
    762  * @cur:  the current node
    763  * @encoding:  the encoding string
    764  * @format:  should formatting spaces been added
    765  *
    766  * Dump an HTML node, recursive behaviour,children are printed too.
    767  */
    768 void
    769 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    770 	                 xmlNodePtr cur, const char *encoding, int format) {
    771     const htmlElemDesc * info;
    772 
    773     xmlInitParser();
    774 
    775     if ((cur == NULL) || (buf == NULL)) {
    776 	return;
    777     }
    778     /*
    779      * Special cases.
    780      */
    781     if (cur->type == XML_DTD_NODE)
    782 	return;
    783     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
    784         (cur->type == XML_DOCUMENT_NODE)){
    785 	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
    786 	return;
    787     }
    788     if (cur->type == XML_ATTRIBUTE_NODE) {
    789         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
    790 	return;
    791     }
    792     if (cur->type == HTML_TEXT_NODE) {
    793 	if (cur->content != NULL) {
    794 	    if (((cur->name == (const xmlChar *)xmlStringText) ||
    795 		 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
    796 		((cur->parent == NULL) ||
    797 		 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
    798 		  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
    799 		xmlChar *buffer;
    800 
    801 		buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
    802 		if (buffer != NULL) {
    803 		    xmlOutputBufferWriteString(buf, (const char *)buffer);
    804 		    xmlFree(buffer);
    805 		}
    806 	    } else {
    807 		xmlOutputBufferWriteString(buf, (const char *)cur->content);
    808 	    }
    809 	}
    810 	return;
    811     }
    812     if (cur->type == HTML_COMMENT_NODE) {
    813 	if (cur->content != NULL) {
    814 	    xmlOutputBufferWriteString(buf, "<!--");
    815 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    816 	    xmlOutputBufferWriteString(buf, "-->");
    817 	}
    818 	return;
    819     }
    820     if (cur->type == HTML_PI_NODE) {
    821 	if (cur->name == NULL)
    822 	    return;
    823 	xmlOutputBufferWriteString(buf, "<?");
    824 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
    825 	if (cur->content != NULL) {
    826 	    xmlOutputBufferWriteString(buf, " ");
    827 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    828 	}
    829 	xmlOutputBufferWriteString(buf, ">");
    830 	return;
    831     }
    832     if (cur->type == HTML_ENTITY_REF_NODE) {
    833         xmlOutputBufferWriteString(buf, "&");
    834 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
    835         xmlOutputBufferWriteString(buf, ";");
    836 	return;
    837     }
    838     if (cur->type == HTML_PRESERVE_NODE) {
    839 	if (cur->content != NULL) {
    840 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    841 	}
    842 	return;
    843     }
    844 
    845     /*
    846      * Get specific HTML info for that node.
    847      */
    848     if (cur->ns == NULL)
    849 	info = htmlTagLookup(cur->name);
    850     else
    851 	info = NULL;
    852 
    853     xmlOutputBufferWriteString(buf, "<");
    854     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    855         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    856 	xmlOutputBufferWriteString(buf, ":");
    857     }
    858     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    859     if (cur->nsDef)
    860 	xmlNsListDumpOutput(buf, cur->nsDef);
    861     if (cur->properties != NULL)
    862         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
    863 
    864     if ((info != NULL) && (info->empty)) {
    865         xmlOutputBufferWriteString(buf, ">");
    866 	if ((format) && (!info->isinline) && (cur->next != NULL)) {
    867 	    if ((cur->next->type != HTML_TEXT_NODE) &&
    868 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
    869 		(cur->parent != NULL) &&
    870 		(cur->parent->name != NULL) &&
    871 		(cur->parent->name[0] != 'p')) /* p, pre, param */
    872 		xmlOutputBufferWriteString(buf, "\n");
    873 	}
    874 	return;
    875     }
    876     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
    877 	(cur->children == NULL)) {
    878         if ((info != NULL) && (info->saveEndTag != 0) &&
    879 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
    880 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
    881 	    xmlOutputBufferWriteString(buf, ">");
    882 	} else {
    883 	    xmlOutputBufferWriteString(buf, "></");
    884             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    885                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    886                 xmlOutputBufferWriteString(buf, ":");
    887             }
    888 	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
    889 	    xmlOutputBufferWriteString(buf, ">");
    890 	}
    891 	if ((format) && (cur->next != NULL) &&
    892             (info != NULL) && (!info->isinline)) {
    893 	    if ((cur->next->type != HTML_TEXT_NODE) &&
    894 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
    895 		(cur->parent != NULL) &&
    896 		(cur->parent->name != NULL) &&
    897 		(cur->parent->name[0] != 'p')) /* p, pre, param */
    898 		xmlOutputBufferWriteString(buf, "\n");
    899 	}
    900 	return;
    901     }
    902     xmlOutputBufferWriteString(buf, ">");
    903     if ((cur->type != XML_ELEMENT_NODE) &&
    904 	(cur->content != NULL)) {
    905 	    /*
    906 	     * Uses the OutputBuffer property to automatically convert
    907 	     * invalids to charrefs
    908 	     */
    909 
    910             xmlOutputBufferWriteString(buf, (const char *) cur->content);
    911     }
    912     if (cur->children != NULL) {
    913         if ((format) && (info != NULL) && (!info->isinline) &&
    914 	    (cur->children->type != HTML_TEXT_NODE) &&
    915 	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
    916 	    (cur->children != cur->last) &&
    917 	    (cur->name != NULL) &&
    918 	    (cur->name[0] != 'p')) /* p, pre, param */
    919 	    xmlOutputBufferWriteString(buf, "\n");
    920 	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
    921         if ((format) && (info != NULL) && (!info->isinline) &&
    922 	    (cur->last->type != HTML_TEXT_NODE) &&
    923 	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
    924 	    (cur->children != cur->last) &&
    925 	    (cur->name != NULL) &&
    926 	    (cur->name[0] != 'p')) /* p, pre, param */
    927 	    xmlOutputBufferWriteString(buf, "\n");
    928     }
    929     xmlOutputBufferWriteString(buf, "</");
    930     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    931         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    932 	xmlOutputBufferWriteString(buf, ":");
    933     }
    934     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    935     xmlOutputBufferWriteString(buf, ">");
    936     if ((format) && (info != NULL) && (!info->isinline) &&
    937 	(cur->next != NULL)) {
    938         if ((cur->next->type != HTML_TEXT_NODE) &&
    939 	    (cur->next->type != HTML_ENTITY_REF_NODE) &&
    940 	    (cur->parent != NULL) &&
    941 	    (cur->parent->name != NULL) &&
    942 	    (cur->parent->name[0] != 'p')) /* p, pre, param */
    943 	    xmlOutputBufferWriteString(buf, "\n");
    944     }
    945 }
    946 
    947 /**
    948  * htmlNodeDumpOutput:
    949  * @buf:  the HTML buffer output
    950  * @doc:  the document
    951  * @cur:  the current node
    952  * @encoding:  the encoding string
    953  *
    954  * Dump an HTML node, recursive behaviour,children are printed too,
    955  * and formatting returns/spaces are added.
    956  */
    957 void
    958 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    959 	           xmlNodePtr cur, const char *encoding) {
    960     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
    961 }
    962 
    963 /**
    964  * htmlDocContentDumpFormatOutput:
    965  * @buf:  the HTML buffer output
    966  * @cur:  the document
    967  * @encoding:  the encoding string
    968  * @format:  should formatting spaces been added
    969  *
    970  * Dump an HTML document.
    971  */
    972 void
    973 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
    974 	                       const char *encoding, int format) {
    975     int type;
    976 
    977     xmlInitParser();
    978 
    979     if ((buf == NULL) || (cur == NULL))
    980         return;
    981 
    982     /*
    983      * force to output the stuff as HTML, especially for entities
    984      */
    985     type = cur->type;
    986     cur->type = XML_HTML_DOCUMENT_NODE;
    987     if (cur->intSubset != NULL) {
    988         htmlDtdDumpOutput(buf, cur, NULL);
    989     }
    990     if (cur->children != NULL) {
    991         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
    992     }
    993     xmlOutputBufferWriteString(buf, "\n");
    994     cur->type = (xmlElementType) type;
    995 }
    996 
    997 /**
    998  * htmlDocContentDumpOutput:
    999  * @buf:  the HTML buffer output
   1000  * @cur:  the document
   1001  * @encoding:  the encoding string
   1002  *
   1003  * Dump an HTML document. Formating return/spaces are added.
   1004  */
   1005 void
   1006 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
   1007 	                 const char *encoding) {
   1008     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
   1009 }
   1010 
   1011 /************************************************************************
   1012  *									*
   1013  *		Saving functions front-ends				*
   1014  *									*
   1015  ************************************************************************/
   1016 
   1017 /**
   1018  * htmlDocDump:
   1019  * @f:  the FILE*
   1020  * @cur:  the document
   1021  *
   1022  * Dump an HTML document to an open FILE.
   1023  *
   1024  * returns: the number of byte written or -1 in case of failure.
   1025  */
   1026 int
   1027 htmlDocDump(FILE *f, xmlDocPtr cur) {
   1028     xmlOutputBufferPtr buf;
   1029     xmlCharEncodingHandlerPtr handler = NULL;
   1030     const char *encoding;
   1031     int ret;
   1032 
   1033     xmlInitParser();
   1034 
   1035     if ((cur == NULL) || (f == NULL)) {
   1036 	return(-1);
   1037     }
   1038 
   1039     encoding = (const char *) htmlGetMetaEncoding(cur);
   1040 
   1041     if (encoding != NULL) {
   1042 	xmlCharEncoding enc;
   1043 
   1044 	enc = xmlParseCharEncoding(encoding);
   1045 	if (enc != cur->charset) {
   1046 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1047 		/*
   1048 		 * Not supported yet
   1049 		 */
   1050 		return(-1);
   1051 	    }
   1052 
   1053 	    handler = xmlFindCharEncodingHandler(encoding);
   1054 	    if (handler == NULL)
   1055 		return(-1);
   1056 	} else {
   1057 	    handler = xmlFindCharEncodingHandler(encoding);
   1058 	}
   1059     }
   1060 
   1061     /*
   1062      * Fallback to HTML or ASCII when the encoding is unspecified
   1063      */
   1064     if (handler == NULL)
   1065 	handler = xmlFindCharEncodingHandler("HTML");
   1066     if (handler == NULL)
   1067 	handler = xmlFindCharEncodingHandler("ascii");
   1068 
   1069     buf = xmlOutputBufferCreateFile(f, handler);
   1070     if (buf == NULL) return(-1);
   1071     htmlDocContentDumpOutput(buf, cur, NULL);
   1072 
   1073     ret = xmlOutputBufferClose(buf);
   1074     return(ret);
   1075 }
   1076 
   1077 /**
   1078  * htmlSaveFile:
   1079  * @filename:  the filename (or URL)
   1080  * @cur:  the document
   1081  *
   1082  * Dump an HTML document to a file. If @filename is "-" the stdout file is
   1083  * used.
   1084  * returns: the number of byte written or -1 in case of failure.
   1085  */
   1086 int
   1087 htmlSaveFile(const char *filename, xmlDocPtr cur) {
   1088     xmlOutputBufferPtr buf;
   1089     xmlCharEncodingHandlerPtr handler = NULL;
   1090     const char *encoding;
   1091     int ret;
   1092 
   1093     if ((cur == NULL) || (filename == NULL))
   1094         return(-1);
   1095 
   1096     xmlInitParser();
   1097 
   1098     encoding = (const char *) htmlGetMetaEncoding(cur);
   1099 
   1100     if (encoding != NULL) {
   1101 	xmlCharEncoding enc;
   1102 
   1103 	enc = xmlParseCharEncoding(encoding);
   1104 	if (enc != cur->charset) {
   1105 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1106 		/*
   1107 		 * Not supported yet
   1108 		 */
   1109 		return(-1);
   1110 	    }
   1111 
   1112 	    handler = xmlFindCharEncodingHandler(encoding);
   1113 	    if (handler == NULL)
   1114 		return(-1);
   1115 	}
   1116     }
   1117 
   1118     /*
   1119      * Fallback to HTML or ASCII when the encoding is unspecified
   1120      */
   1121     if (handler == NULL)
   1122 	handler = xmlFindCharEncodingHandler("HTML");
   1123     if (handler == NULL)
   1124 	handler = xmlFindCharEncodingHandler("ascii");
   1125 
   1126     /*
   1127      * save the content to a temp buffer.
   1128      */
   1129     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
   1130     if (buf == NULL) return(0);
   1131 
   1132     htmlDocContentDumpOutput(buf, cur, NULL);
   1133 
   1134     ret = xmlOutputBufferClose(buf);
   1135     return(ret);
   1136 }
   1137 
   1138 /**
   1139  * htmlSaveFileFormat:
   1140  * @filename:  the filename
   1141  * @cur:  the document
   1142  * @format:  should formatting spaces been added
   1143  * @encoding: the document encoding
   1144  *
   1145  * Dump an HTML document to a file using a given encoding.
   1146  *
   1147  * returns: the number of byte written or -1 in case of failure.
   1148  */
   1149 int
   1150 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
   1151 	           const char *encoding, int format) {
   1152     xmlOutputBufferPtr buf;
   1153     xmlCharEncodingHandlerPtr handler = NULL;
   1154     int ret;
   1155 
   1156     if ((cur == NULL) || (filename == NULL))
   1157         return(-1);
   1158 
   1159     xmlInitParser();
   1160 
   1161     if (encoding != NULL) {
   1162 	xmlCharEncoding enc;
   1163 
   1164 	enc = xmlParseCharEncoding(encoding);
   1165 	if (enc != cur->charset) {
   1166 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1167 		/*
   1168 		 * Not supported yet
   1169 		 */
   1170 		return(-1);
   1171 	    }
   1172 
   1173 	    handler = xmlFindCharEncodingHandler(encoding);
   1174 	    if (handler == NULL)
   1175 		return(-1);
   1176             htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
   1177 	}
   1178     } else {
   1179 	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
   1180     }
   1181 
   1182     /*
   1183      * Fallback to HTML or ASCII when the encoding is unspecified
   1184      */
   1185     if (handler == NULL)
   1186 	handler = xmlFindCharEncodingHandler("HTML");
   1187     if (handler == NULL)
   1188 	handler = xmlFindCharEncodingHandler("ascii");
   1189 
   1190     /*
   1191      * save the content to a temp buffer.
   1192      */
   1193     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
   1194     if (buf == NULL) return(0);
   1195 
   1196     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
   1197 
   1198     ret = xmlOutputBufferClose(buf);
   1199     return(ret);
   1200 }
   1201 
   1202 /**
   1203  * htmlSaveFileEnc:
   1204  * @filename:  the filename
   1205  * @cur:  the document
   1206  * @encoding: the document encoding
   1207  *
   1208  * Dump an HTML document to a file using a given encoding
   1209  * and formatting returns/spaces are added.
   1210  *
   1211  * returns: the number of byte written or -1 in case of failure.
   1212  */
   1213 int
   1214 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
   1215     return(htmlSaveFileFormat(filename, cur, encoding, 1));
   1216 }
   1217 
   1218 #endif /* LIBXML_OUTPUT_ENABLED */
   1219 
   1220 #define bottom_HTMLtree
   1221 #include "elfgcchack.h"
   1222 #endif /* LIBXML_HTML_ENABLED */
   1223