Home | History | Annotate | Download | only in src
      1 /*
      2  * HTMLtree.c : implementation of access function for an HTML tree.
      3  *
      4  * See Copyright for the status of this software.
      5  *
      6  * daniel (at) veillard.com
      7  */
      8 
      9 
     10 #define IN_LIBXML
     11 #include "libxml.h"
     12 #ifdef LIBXML_HTML_ENABLED
     13 
     14 #include <string.h> /* for memset() only ! */
     15 
     16 #ifdef HAVE_CTYPE_H
     17 #include <ctype.h>
     18 #endif
     19 #ifdef HAVE_STDLIB_H
     20 #include <stdlib.h>
     21 #endif
     22 
     23 #include <libxml/xmlmemory.h>
     24 #include <libxml/HTMLparser.h>
     25 #include <libxml/HTMLtree.h>
     26 #include <libxml/entities.h>
     27 #include <libxml/valid.h>
     28 #include <libxml/xmlerror.h>
     29 #include <libxml/parserInternals.h>
     30 #include <libxml/globals.h>
     31 #include <libxml/uri.h>
     32 
     33 /************************************************************************
     34  *									*
     35  *   		Getting/Setting encoding meta tags			*
     36  *									*
     37  ************************************************************************/
     38 
     39 /**
     40  * htmlGetMetaEncoding:
     41  * @doc:  the document
     42  *
     43  * Encoding definition lookup in the Meta tags
     44  *
     45  * Returns the current encoding as flagged in the HTML source
     46  */
     47 const xmlChar *
     48 htmlGetMetaEncoding(htmlDocPtr doc) {
     49     htmlNodePtr cur;
     50     const xmlChar *content;
     51     const xmlChar *encoding;
     52 
     53     if (doc == NULL)
     54 	return(NULL);
     55     cur = doc->children;
     56 
     57     /*
     58      * Search the html
     59      */
     60     while (cur != NULL) {
     61 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     62 	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
     63 		break;
     64 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
     65 		goto found_head;
     66 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
     67 		goto found_meta;
     68 	}
     69 	cur = cur->next;
     70     }
     71     if (cur == NULL)
     72 	return(NULL);
     73     cur = cur->children;
     74 
     75     /*
     76      * Search the head
     77      */
     78     while (cur != NULL) {
     79 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     80 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
     81 		break;
     82 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
     83 		goto found_meta;
     84 	}
     85 	cur = cur->next;
     86     }
     87     if (cur == NULL)
     88 	return(NULL);
     89 found_head:
     90     cur = cur->children;
     91 
     92     /*
     93      * Search the meta elements
     94      */
     95 found_meta:
     96     while (cur != NULL) {
     97 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
     98 	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
     99 		xmlAttrPtr attr = cur->properties;
    100 		int http;
    101 		const xmlChar *value;
    102 
    103 		content = NULL;
    104 		http = 0;
    105 		while (attr != NULL) {
    106 		    if ((attr->children != NULL) &&
    107 		        (attr->children->type == XML_TEXT_NODE) &&
    108 		        (attr->children->next == NULL)) {
    109 			value = attr->children->content;
    110 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
    111 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
    112 			    http = 1;
    113 			else if ((value != NULL)
    114 			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
    115 			    content = value;
    116 			if ((http != 0) && (content != NULL))
    117 			    goto found_content;
    118 		    }
    119 		    attr = attr->next;
    120 		}
    121 	    }
    122 	}
    123 	cur = cur->next;
    124     }
    125     return(NULL);
    126 
    127 found_content:
    128     encoding = xmlStrstr(content, BAD_CAST"charset=");
    129     if (encoding == NULL)
    130 	encoding = xmlStrstr(content, BAD_CAST"Charset=");
    131     if (encoding == NULL)
    132 	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
    133     if (encoding != NULL) {
    134 	encoding += 8;
    135     } else {
    136 	encoding = xmlStrstr(content, BAD_CAST"charset =");
    137 	if (encoding == NULL)
    138 	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
    139 	if (encoding == NULL)
    140 	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
    141 	if (encoding != NULL)
    142 	    encoding += 9;
    143     }
    144     if (encoding != NULL) {
    145 	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
    146     }
    147     return(encoding);
    148 }
    149 
    150 /**
    151  * htmlSetMetaEncoding:
    152  * @doc:  the document
    153  * @encoding:  the encoding string
    154  *
    155  * Sets the current encoding in the Meta tags
    156  * NOTE: this will not change the document content encoding, just
    157  * the META flag associated.
    158  *
    159  * Returns 0 in case of success and -1 in case of error
    160  */
    161 int
    162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
    163     htmlNodePtr cur, meta = NULL, head = NULL;
    164     const xmlChar *content = NULL;
    165     char newcontent[100];
    166 
    167 
    168     if (doc == NULL)
    169 	return(-1);
    170 
    171     /* html isn't a real encoding it's just libxml2 way to get entities */
    172     if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
    173         return(-1);
    174 
    175     if (encoding != NULL) {
    176 	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
    177                 (char *)encoding);
    178 	newcontent[sizeof(newcontent) - 1] = 0;
    179     }
    180 
    181     cur = doc->children;
    182 
    183     /*
    184      * Search the html
    185      */
    186     while (cur != NULL) {
    187 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    188 	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
    189 		break;
    190 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
    191 		goto found_head;
    192 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
    193 		goto found_meta;
    194 	}
    195 	cur = cur->next;
    196     }
    197     if (cur == NULL)
    198 	return(-1);
    199     cur = cur->children;
    200 
    201     /*
    202      * Search the head
    203      */
    204     while (cur != NULL) {
    205 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    206 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
    207 		break;
    208 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
    209                 head = cur->parent;
    210 		goto found_meta;
    211             }
    212 	}
    213 	cur = cur->next;
    214     }
    215     if (cur == NULL)
    216 	return(-1);
    217 found_head:
    218     head = cur;
    219     if (cur->children == NULL)
    220         goto create;
    221     cur = cur->children;
    222 
    223 found_meta:
    224     /*
    225      * Search and update all the remaining the meta elements carrying
    226      * encoding informations
    227      */
    228     while (cur != NULL) {
    229 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
    230 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
    231 		xmlAttrPtr attr = cur->properties;
    232 		int http;
    233 		const xmlChar *value;
    234 
    235 		content = NULL;
    236 		http = 0;
    237 		while (attr != NULL) {
    238 		    if ((attr->children != NULL) &&
    239 		        (attr->children->type == XML_TEXT_NODE) &&
    240 		        (attr->children->next == NULL)) {
    241 			value = attr->children->content;
    242 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
    243 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
    244 			    http = 1;
    245 			else
    246                         {
    247                            if ((value != NULL) &&
    248                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
    249 			       content = value;
    250                         }
    251 		        if ((http != 0) && (content != NULL))
    252 			    break;
    253 		    }
    254 		    attr = attr->next;
    255 		}
    256 		if ((http != 0) && (content != NULL)) {
    257 		    meta = cur;
    258 		    break;
    259 		}
    260 
    261 	    }
    262 	}
    263 	cur = cur->next;
    264     }
    265 create:
    266     if (meta == NULL) {
    267         if ((encoding != NULL) && (head != NULL)) {
    268             /*
    269              * Create a new Meta element with the right attributes
    270              */
    271 
    272             meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
    273             if (head->children == NULL)
    274                 xmlAddChild(head, meta);
    275             else
    276                 xmlAddPrevSibling(head->children, meta);
    277             xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
    278             xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
    279         }
    280     } else {
    281         /* change the document only if there is a real encoding change */
    282         if (xmlStrcasestr(content, encoding) == NULL) {
    283             xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
    284         }
    285     }
    286 
    287 
    288     return(0);
    289 }
    290 
    291 /**
    292  * booleanHTMLAttrs:
    293  *
    294  * These are the HTML attributes which will be output
    295  * in minimized form, i.e. <option selected="selected"> will be
    296  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
    297  *
    298  */
    299 static const char* htmlBooleanAttrs[] = {
    300   "checked", "compact", "declare", "defer", "disabled", "ismap",
    301   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
    302   "selected", NULL
    303 };
    304 
    305 
    306 /**
    307  * htmlIsBooleanAttr:
    308  * @name:  the name of the attribute to check
    309  *
    310  * Determine if a given attribute is a boolean attribute.
    311  *
    312  * returns: false if the attribute is not boolean, true otherwise.
    313  */
    314 int
    315 htmlIsBooleanAttr(const xmlChar *name)
    316 {
    317     int i = 0;
    318 
    319     while (htmlBooleanAttrs[i] != NULL) {
    320         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
    321             return 1;
    322         i++;
    323     }
    324     return 0;
    325 }
    326 
    327 #ifdef LIBXML_OUTPUT_ENABLED
    328 /*
    329  * private routine exported from xmlIO.c
    330  */
    331 xmlOutputBufferPtr
    332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
    333 /************************************************************************
    334  *									*
    335  * 			Output error handlers				*
    336  *									*
    337  ************************************************************************/
    338 /**
    339  * htmlSaveErrMemory:
    340  * @extra:  extra informations
    341  *
    342  * Handle an out of memory condition
    343  */
    344 static void
    345 htmlSaveErrMemory(const char *extra)
    346 {
    347     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
    348 }
    349 
    350 /**
    351  * htmlSaveErr:
    352  * @code:  the error number
    353  * @node:  the location of the error.
    354  * @extra:  extra informations
    355  *
    356  * Handle an out of memory condition
    357  */
    358 static void
    359 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
    360 {
    361     const char *msg = NULL;
    362 
    363     switch(code) {
    364         case XML_SAVE_NOT_UTF8:
    365 	    msg = "string is not in UTF-8\n";
    366 	    break;
    367 	case XML_SAVE_CHAR_INVALID:
    368 	    msg = "invalid character value\n";
    369 	    break;
    370 	case XML_SAVE_UNKNOWN_ENCODING:
    371 	    msg = "unknown encoding %s\n";
    372 	    break;
    373 	case XML_SAVE_NO_DOCTYPE:
    374 	    msg = "HTML has no DOCTYPE\n";
    375 	    break;
    376 	default:
    377 	    msg = "unexpected error number\n";
    378     }
    379     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
    380 }
    381 
    382 /************************************************************************
    383  *									*
    384  *   		Dumping HTML tree content to a simple buffer		*
    385  *									*
    386  ************************************************************************/
    387 
    388 static int
    389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
    390 	           int format);
    391 
    392 /**
    393  * htmlNodeDumpFormat:
    394  * @buf:  the HTML buffer output
    395  * @doc:  the document
    396  * @cur:  the current node
    397  * @format:  should formatting spaces been added
    398  *
    399  * Dump an HTML node, recursive behaviour,children are printed too.
    400  *
    401  * Returns the number of byte written or -1 in case of error
    402  */
    403 static int
    404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
    405 	           int format) {
    406     unsigned int use;
    407     int ret;
    408     xmlOutputBufferPtr outbuf;
    409 
    410     if (cur == NULL) {
    411 	return (-1);
    412     }
    413     if (buf == NULL) {
    414 	return (-1);
    415     }
    416     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
    417     if (outbuf == NULL) {
    418         htmlSaveErrMemory("allocating HTML output buffer");
    419 	return (-1);
    420     }
    421     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
    422     outbuf->buffer = buf;
    423     outbuf->encoder = NULL;
    424     outbuf->writecallback = NULL;
    425     outbuf->closecallback = NULL;
    426     outbuf->context = NULL;
    427     outbuf->written = 0;
    428 
    429     use = buf->use;
    430     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
    431     xmlFree(outbuf);
    432     ret = buf->use - use;
    433     return (ret);
    434 }
    435 
    436 /**
    437  * htmlNodeDump:
    438  * @buf:  the HTML buffer output
    439  * @doc:  the document
    440  * @cur:  the current node
    441  *
    442  * Dump an HTML node, recursive behaviour,children are printed too,
    443  * and formatting returns are added.
    444  *
    445  * Returns the number of byte written or -1 in case of error
    446  */
    447 int
    448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
    449     xmlInitParser();
    450 
    451     return(htmlNodeDumpFormat(buf, doc, cur, 1));
    452 }
    453 
    454 /**
    455  * htmlNodeDumpFileFormat:
    456  * @out:  the FILE pointer
    457  * @doc:  the document
    458  * @cur:  the current node
    459  * @encoding: the document encoding
    460  * @format:  should formatting spaces been added
    461  *
    462  * Dump an HTML node, recursive behaviour,children are printed too.
    463  *
    464  * TODO: if encoding == NULL try to save in the doc encoding
    465  *
    466  * returns: the number of byte written or -1 in case of failure.
    467  */
    468 int
    469 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
    470 	               xmlNodePtr cur, const char *encoding, int format) {
    471     xmlOutputBufferPtr buf;
    472     xmlCharEncodingHandlerPtr handler = NULL;
    473     int ret;
    474 
    475     xmlInitParser();
    476 
    477     if (encoding != NULL) {
    478 	xmlCharEncoding enc;
    479 
    480 	enc = xmlParseCharEncoding(encoding);
    481 	if (enc != XML_CHAR_ENCODING_UTF8) {
    482 	    handler = xmlFindCharEncodingHandler(encoding);
    483 	    if (handler == NULL)
    484 		return(-1);
    485 	}
    486     }
    487 
    488     /*
    489      * Fallback to HTML or ASCII when the encoding is unspecified
    490      */
    491     if (handler == NULL)
    492 	handler = xmlFindCharEncodingHandler("HTML");
    493     if (handler == NULL)
    494 	handler = xmlFindCharEncodingHandler("ascii");
    495 
    496     /*
    497      * save the content to a temp buffer.
    498      */
    499     buf = xmlOutputBufferCreateFile(out, handler);
    500     if (buf == NULL) return(0);
    501 
    502     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
    503 
    504     ret = xmlOutputBufferClose(buf);
    505     return(ret);
    506 }
    507 
    508 /**
    509  * htmlNodeDumpFile:
    510  * @out:  the FILE pointer
    511  * @doc:  the document
    512  * @cur:  the current node
    513  *
    514  * Dump an HTML node, recursive behaviour,children are printed too,
    515  * and formatting returns are added.
    516  */
    517 void
    518 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
    519     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
    520 }
    521 
    522 /**
    523  * htmlDocDumpMemoryFormat:
    524  * @cur:  the document
    525  * @mem:  OUT: the memory pointer
    526  * @size:  OUT: the memory length
    527  * @format:  should formatting spaces been added
    528  *
    529  * Dump an HTML document in memory and return the xmlChar * and it's size.
    530  * It's up to the caller to free the memory.
    531  */
    532 void
    533 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
    534     xmlOutputBufferPtr buf;
    535     xmlCharEncodingHandlerPtr handler = NULL;
    536     const char *encoding;
    537 
    538     xmlInitParser();
    539 
    540     if ((mem == NULL) || (size == NULL))
    541         return;
    542     if (cur == NULL) {
    543 	*mem = NULL;
    544 	*size = 0;
    545 	return;
    546     }
    547 
    548     encoding = (const char *) htmlGetMetaEncoding(cur);
    549 
    550     if (encoding != NULL) {
    551 	xmlCharEncoding enc;
    552 
    553 	enc = xmlParseCharEncoding(encoding);
    554 	if (enc != cur->charset) {
    555 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
    556 		/*
    557 		 * Not supported yet
    558 		 */
    559 		*mem = NULL;
    560 		*size = 0;
    561 		return;
    562 	    }
    563 
    564 	    handler = xmlFindCharEncodingHandler(encoding);
    565 	    if (handler == NULL) {
    566 		*mem = NULL;
    567 		*size = 0;
    568 		return;
    569 	    }
    570 	} else {
    571 	    handler = xmlFindCharEncodingHandler(encoding);
    572 	}
    573     }
    574 
    575     /*
    576      * Fallback to HTML or ASCII when the encoding is unspecified
    577      */
    578     if (handler == NULL)
    579 	handler = xmlFindCharEncodingHandler("HTML");
    580     if (handler == NULL)
    581 	handler = xmlFindCharEncodingHandler("ascii");
    582 
    583     buf = xmlAllocOutputBufferInternal(handler);
    584     if (buf == NULL) {
    585 	*mem = NULL;
    586 	*size = 0;
    587 	return;
    588     }
    589 
    590 	htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
    591 
    592     xmlOutputBufferFlush(buf);
    593     if (buf->conv != NULL) {
    594 	*size = buf->conv->use;
    595 	*mem = xmlStrndup(buf->conv->content, *size);
    596     } else {
    597 	*size = buf->buffer->use;
    598 	*mem = xmlStrndup(buf->buffer->content, *size);
    599     }
    600     (void)xmlOutputBufferClose(buf);
    601 }
    602 
    603 /**
    604  * htmlDocDumpMemory:
    605  * @cur:  the document
    606  * @mem:  OUT: the memory pointer
    607  * @size:  OUT: the memory length
    608  *
    609  * Dump an HTML document in memory and return the xmlChar * and it's size.
    610  * It's up to the caller to free the memory.
    611  */
    612 void
    613 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
    614 	htmlDocDumpMemoryFormat(cur, mem, size, 1);
    615 }
    616 
    617 
    618 /************************************************************************
    619  *									*
    620  *   		Dumping HTML tree content to an I/O output buffer	*
    621  *									*
    622  ************************************************************************/
    623 
    624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
    625 
    626 /**
    627  * htmlDtdDumpOutput:
    628  * @buf:  the HTML buffer output
    629  * @doc:  the document
    630  * @encoding:  the encoding string
    631  *
    632  * TODO: check whether encoding is needed
    633  *
    634  * Dump the HTML document DTD, if any.
    635  */
    636 static void
    637 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    638 	          const char *encoding ATTRIBUTE_UNUSED) {
    639     xmlDtdPtr cur = doc->intSubset;
    640 
    641     if (cur == NULL) {
    642 	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
    643 	return;
    644     }
    645     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
    646     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    647     if (cur->ExternalID != NULL) {
    648 	xmlOutputBufferWriteString(buf, " PUBLIC ");
    649 	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
    650 	if (cur->SystemID != NULL) {
    651 	    xmlOutputBufferWriteString(buf, " ");
    652 	    xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
    653 	}
    654     }  else if (cur->SystemID != NULL) {
    655 	xmlOutputBufferWriteString(buf, " SYSTEM ");
    656 	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
    657     }
    658     xmlOutputBufferWriteString(buf, ">\n");
    659 }
    660 
    661 /**
    662  * htmlAttrDumpOutput:
    663  * @buf:  the HTML buffer output
    664  * @doc:  the document
    665  * @cur:  the attribute pointer
    666  * @encoding:  the encoding string
    667  *
    668  * Dump an HTML attribute
    669  */
    670 static void
    671 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
    672 	           const char *encoding ATTRIBUTE_UNUSED) {
    673     xmlChar *value;
    674 
    675     /*
    676      * TODO: The html output method should not escape a & character
    677      *       occurring in an attribute value immediately followed by
    678      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
    679      */
    680 
    681     if (cur == NULL) {
    682 	return;
    683     }
    684     xmlOutputBufferWriteString(buf, " ");
    685     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    686         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    687 	xmlOutputBufferWriteString(buf, ":");
    688     }
    689     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    690     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
    691 	value = xmlNodeListGetString(doc, cur->children, 0);
    692 	if (value) {
    693 	    xmlOutputBufferWriteString(buf, "=");
    694 	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
    695 		(cur->parent->ns == NULL) &&
    696 		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
    697 	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
    698 		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
    699 		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
    700 		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
    701 		xmlChar *escaped;
    702 		xmlChar *tmp = value;
    703 
    704 		while (IS_BLANK_CH(*tmp)) tmp++;
    705 
    706 		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
    707 		if (escaped != NULL) {
    708 		    xmlBufferWriteQuotedString(buf->buffer, escaped);
    709 		    xmlFree(escaped);
    710 		} else {
    711 		    xmlBufferWriteQuotedString(buf->buffer, value);
    712 		}
    713 	    } else {
    714 		xmlBufferWriteQuotedString(buf->buffer, value);
    715 	    }
    716 	    xmlFree(value);
    717 	} else  {
    718 	    xmlOutputBufferWriteString(buf, "=\"\"");
    719 	}
    720     }
    721 }
    722 
    723 /**
    724  * htmlAttrListDumpOutput:
    725  * @buf:  the HTML buffer output
    726  * @doc:  the document
    727  * @cur:  the first attribute pointer
    728  * @encoding:  the encoding string
    729  *
    730  * Dump a list of HTML attributes
    731  */
    732 static void
    733 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
    734     if (cur == NULL) {
    735 	return;
    736     }
    737     while (cur != NULL) {
    738         htmlAttrDumpOutput(buf, doc, cur, encoding);
    739 	cur = cur->next;
    740     }
    741 }
    742 
    743 
    744 
    745 /**
    746  * htmlNodeListDumpOutput:
    747  * @buf:  the HTML buffer output
    748  * @doc:  the document
    749  * @cur:  the first node
    750  * @encoding:  the encoding string
    751  * @format:  should formatting spaces been added
    752  *
    753  * Dump an HTML node list, recursive behaviour,children are printed too.
    754  */
    755 static void
    756 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    757 	               xmlNodePtr cur, const char *encoding, int format) {
    758     if (cur == NULL) {
    759 	return;
    760     }
    761     while (cur != NULL) {
    762         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
    763 	cur = cur->next;
    764     }
    765 }
    766 
    767 /**
    768  * htmlNodeDumpFormatOutput:
    769  * @buf:  the HTML buffer output
    770  * @doc:  the document
    771  * @cur:  the current node
    772  * @encoding:  the encoding string
    773  * @format:  should formatting spaces been added
    774  *
    775  * Dump an HTML node, recursive behaviour,children are printed too.
    776  */
    777 void
    778 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    779 	                 xmlNodePtr cur, const char *encoding, int format) {
    780     const htmlElemDesc * info;
    781 
    782     xmlInitParser();
    783 
    784     if ((cur == NULL) || (buf == NULL)) {
    785 	return;
    786     }
    787     /*
    788      * Special cases.
    789      */
    790     if (cur->type == XML_DTD_NODE)
    791 	return;
    792     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
    793         (cur->type == XML_DOCUMENT_NODE)){
    794 	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
    795 	return;
    796     }
    797     if (cur->type == XML_ATTRIBUTE_NODE) {
    798         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
    799 	return;
    800     }
    801     if (cur->type == HTML_TEXT_NODE) {
    802 	if (cur->content != NULL) {
    803 	    if (((cur->name == (const xmlChar *)xmlStringText) ||
    804 		 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
    805 		((cur->parent == NULL) ||
    806 		 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
    807 		  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
    808 		xmlChar *buffer;
    809 
    810 		buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
    811 		if (buffer != NULL) {
    812 		    xmlOutputBufferWriteString(buf, (const char *)buffer);
    813 		    xmlFree(buffer);
    814 		}
    815 	    } else {
    816 		xmlOutputBufferWriteString(buf, (const char *)cur->content);
    817 	    }
    818 	}
    819 	return;
    820     }
    821     if (cur->type == HTML_COMMENT_NODE) {
    822 	if (cur->content != NULL) {
    823 	    xmlOutputBufferWriteString(buf, "<!--");
    824 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    825 	    xmlOutputBufferWriteString(buf, "-->");
    826 	}
    827 	return;
    828     }
    829     if (cur->type == HTML_PI_NODE) {
    830 	if (cur->name == NULL)
    831 	    return;
    832 	xmlOutputBufferWriteString(buf, "<?");
    833 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
    834 	if (cur->content != NULL) {
    835 	    xmlOutputBufferWriteString(buf, " ");
    836 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    837 	}
    838 	xmlOutputBufferWriteString(buf, ">");
    839 	return;
    840     }
    841     if (cur->type == HTML_ENTITY_REF_NODE) {
    842         xmlOutputBufferWriteString(buf, "&");
    843 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
    844         xmlOutputBufferWriteString(buf, ";");
    845 	return;
    846     }
    847     if (cur->type == HTML_PRESERVE_NODE) {
    848 	if (cur->content != NULL) {
    849 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
    850 	}
    851 	return;
    852     }
    853 
    854     /*
    855      * Get specific HTML info for that node.
    856      */
    857     if (cur->ns == NULL)
    858 	info = htmlTagLookup(cur->name);
    859     else
    860 	info = NULL;
    861 
    862     xmlOutputBufferWriteString(buf, "<");
    863     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    864         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    865 	xmlOutputBufferWriteString(buf, ":");
    866     }
    867     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    868     if (cur->nsDef)
    869 	xmlNsListDumpOutput(buf, cur->nsDef);
    870     if (cur->properties != NULL)
    871         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
    872 
    873     if ((info != NULL) && (info->empty)) {
    874         xmlOutputBufferWriteString(buf, ">");
    875 	if ((format) && (!info->isinline) && (cur->next != NULL)) {
    876 	    if ((cur->next->type != HTML_TEXT_NODE) &&
    877 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
    878 		(cur->parent != NULL) &&
    879 		(cur->parent->name != NULL) &&
    880 		(cur->parent->name[0] != 'p')) /* p, pre, param */
    881 		xmlOutputBufferWriteString(buf, "\n");
    882 	}
    883 	return;
    884     }
    885     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
    886 	(cur->children == NULL)) {
    887         if ((info != NULL) && (info->saveEndTag != 0) &&
    888 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
    889 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
    890 	    xmlOutputBufferWriteString(buf, ">");
    891 	} else {
    892 	    xmlOutputBufferWriteString(buf, "></");
    893             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    894                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    895                 xmlOutputBufferWriteString(buf, ":");
    896             }
    897 	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
    898 	    xmlOutputBufferWriteString(buf, ">");
    899 	}
    900 	if ((format) && (cur->next != NULL) &&
    901             (info != NULL) && (!info->isinline)) {
    902 	    if ((cur->next->type != HTML_TEXT_NODE) &&
    903 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
    904 		(cur->parent != NULL) &&
    905 		(cur->parent->name != NULL) &&
    906 		(cur->parent->name[0] != 'p')) /* p, pre, param */
    907 		xmlOutputBufferWriteString(buf, "\n");
    908 	}
    909 	return;
    910     }
    911     xmlOutputBufferWriteString(buf, ">");
    912     if ((cur->type != XML_ELEMENT_NODE) &&
    913 	(cur->content != NULL)) {
    914 	    /*
    915 	     * Uses the OutputBuffer property to automatically convert
    916 	     * invalids to charrefs
    917 	     */
    918 
    919             xmlOutputBufferWriteString(buf, (const char *) cur->content);
    920     }
    921     if (cur->children != NULL) {
    922         if ((format) && (info != NULL) && (!info->isinline) &&
    923 	    (cur->children->type != HTML_TEXT_NODE) &&
    924 	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
    925 	    (cur->children != cur->last) &&
    926 	    (cur->name != NULL) &&
    927 	    (cur->name[0] != 'p')) /* p, pre, param */
    928 	    xmlOutputBufferWriteString(buf, "\n");
    929 	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
    930         if ((format) && (info != NULL) && (!info->isinline) &&
    931 	    (cur->last->type != HTML_TEXT_NODE) &&
    932 	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
    933 	    (cur->children != cur->last) &&
    934 	    (cur->name != NULL) &&
    935 	    (cur->name[0] != 'p')) /* p, pre, param */
    936 	    xmlOutputBufferWriteString(buf, "\n");
    937     }
    938     xmlOutputBufferWriteString(buf, "</");
    939     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
    940         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
    941 	xmlOutputBufferWriteString(buf, ":");
    942     }
    943     xmlOutputBufferWriteString(buf, (const char *)cur->name);
    944     xmlOutputBufferWriteString(buf, ">");
    945     if ((format) && (info != NULL) && (!info->isinline) &&
    946 	(cur->next != NULL)) {
    947         if ((cur->next->type != HTML_TEXT_NODE) &&
    948 	    (cur->next->type != HTML_ENTITY_REF_NODE) &&
    949 	    (cur->parent != NULL) &&
    950 	    (cur->parent->name != NULL) &&
    951 	    (cur->parent->name[0] != 'p')) /* p, pre, param */
    952 	    xmlOutputBufferWriteString(buf, "\n");
    953     }
    954 }
    955 
    956 /**
    957  * htmlNodeDumpOutput:
    958  * @buf:  the HTML buffer output
    959  * @doc:  the document
    960  * @cur:  the current node
    961  * @encoding:  the encoding string
    962  *
    963  * Dump an HTML node, recursive behaviour,children are printed too,
    964  * and formatting returns/spaces are added.
    965  */
    966 void
    967 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
    968 	           xmlNodePtr cur, const char *encoding) {
    969     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
    970 }
    971 
    972 /**
    973  * htmlDocContentDumpFormatOutput:
    974  * @buf:  the HTML buffer output
    975  * @cur:  the document
    976  * @encoding:  the encoding string
    977  * @format:  should formatting spaces been added
    978  *
    979  * Dump an HTML document.
    980  */
    981 void
    982 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
    983 	                       const char *encoding, int format) {
    984     int type;
    985 
    986     xmlInitParser();
    987 
    988     if ((buf == NULL) || (cur == NULL))
    989         return;
    990 
    991     /*
    992      * force to output the stuff as HTML, especially for entities
    993      */
    994     type = cur->type;
    995     cur->type = XML_HTML_DOCUMENT_NODE;
    996     if (cur->intSubset != NULL) {
    997         htmlDtdDumpOutput(buf, cur, NULL);
    998     }
    999     if (cur->children != NULL) {
   1000         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
   1001     }
   1002     xmlOutputBufferWriteString(buf, "\n");
   1003     cur->type = (xmlElementType) type;
   1004 }
   1005 
   1006 /**
   1007  * htmlDocContentDumpOutput:
   1008  * @buf:  the HTML buffer output
   1009  * @cur:  the document
   1010  * @encoding:  the encoding string
   1011  *
   1012  * Dump an HTML document. Formating return/spaces are added.
   1013  */
   1014 void
   1015 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
   1016 	                 const char *encoding) {
   1017     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
   1018 }
   1019 
   1020 /************************************************************************
   1021  *									*
   1022  *		Saving functions front-ends				*
   1023  *									*
   1024  ************************************************************************/
   1025 
   1026 /**
   1027  * htmlDocDump:
   1028  * @f:  the FILE*
   1029  * @cur:  the document
   1030  *
   1031  * Dump an HTML document to an open FILE.
   1032  *
   1033  * returns: the number of byte written or -1 in case of failure.
   1034  */
   1035 int
   1036 htmlDocDump(FILE *f, xmlDocPtr cur) {
   1037     xmlOutputBufferPtr buf;
   1038     xmlCharEncodingHandlerPtr handler = NULL;
   1039     const char *encoding;
   1040     int ret;
   1041 
   1042     xmlInitParser();
   1043 
   1044     if ((cur == NULL) || (f == NULL)) {
   1045 	return(-1);
   1046     }
   1047 
   1048     encoding = (const char *) htmlGetMetaEncoding(cur);
   1049 
   1050     if (encoding != NULL) {
   1051 	xmlCharEncoding enc;
   1052 
   1053 	enc = xmlParseCharEncoding(encoding);
   1054 	if (enc != cur->charset) {
   1055 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1056 		/*
   1057 		 * Not supported yet
   1058 		 */
   1059 		return(-1);
   1060 	    }
   1061 
   1062 	    handler = xmlFindCharEncodingHandler(encoding);
   1063 	    if (handler == NULL)
   1064 		return(-1);
   1065 	} else {
   1066 	    handler = xmlFindCharEncodingHandler(encoding);
   1067 	}
   1068     }
   1069 
   1070     /*
   1071      * Fallback to HTML or ASCII when the encoding is unspecified
   1072      */
   1073     if (handler == NULL)
   1074 	handler = xmlFindCharEncodingHandler("HTML");
   1075     if (handler == NULL)
   1076 	handler = xmlFindCharEncodingHandler("ascii");
   1077 
   1078     buf = xmlOutputBufferCreateFile(f, handler);
   1079     if (buf == NULL) return(-1);
   1080     htmlDocContentDumpOutput(buf, cur, NULL);
   1081 
   1082     ret = xmlOutputBufferClose(buf);
   1083     return(ret);
   1084 }
   1085 
   1086 /**
   1087  * htmlSaveFile:
   1088  * @filename:  the filename (or URL)
   1089  * @cur:  the document
   1090  *
   1091  * Dump an HTML document to a file. If @filename is "-" the stdout file is
   1092  * used.
   1093  * returns: the number of byte written or -1 in case of failure.
   1094  */
   1095 int
   1096 htmlSaveFile(const char *filename, xmlDocPtr cur) {
   1097     xmlOutputBufferPtr buf;
   1098     xmlCharEncodingHandlerPtr handler = NULL;
   1099     const char *encoding;
   1100     int ret;
   1101 
   1102     if ((cur == NULL) || (filename == NULL))
   1103         return(-1);
   1104 
   1105     xmlInitParser();
   1106 
   1107     encoding = (const char *) htmlGetMetaEncoding(cur);
   1108 
   1109     if (encoding != NULL) {
   1110 	xmlCharEncoding enc;
   1111 
   1112 	enc = xmlParseCharEncoding(encoding);
   1113 	if (enc != cur->charset) {
   1114 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1115 		/*
   1116 		 * Not supported yet
   1117 		 */
   1118 		return(-1);
   1119 	    }
   1120 
   1121 	    handler = xmlFindCharEncodingHandler(encoding);
   1122 	    if (handler == NULL)
   1123 		return(-1);
   1124 	}
   1125     }
   1126 
   1127     /*
   1128      * Fallback to HTML or ASCII when the encoding is unspecified
   1129      */
   1130     if (handler == NULL)
   1131 	handler = xmlFindCharEncodingHandler("HTML");
   1132     if (handler == NULL)
   1133 	handler = xmlFindCharEncodingHandler("ascii");
   1134 
   1135     /*
   1136      * save the content to a temp buffer.
   1137      */
   1138     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
   1139     if (buf == NULL) return(0);
   1140 
   1141     htmlDocContentDumpOutput(buf, cur, NULL);
   1142 
   1143     ret = xmlOutputBufferClose(buf);
   1144     return(ret);
   1145 }
   1146 
   1147 /**
   1148  * htmlSaveFileFormat:
   1149  * @filename:  the filename
   1150  * @cur:  the document
   1151  * @format:  should formatting spaces been added
   1152  * @encoding: the document encoding
   1153  *
   1154  * Dump an HTML document to a file using a given encoding.
   1155  *
   1156  * returns: the number of byte written or -1 in case of failure.
   1157  */
   1158 int
   1159 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
   1160 	           const char *encoding, int format) {
   1161     xmlOutputBufferPtr buf;
   1162     xmlCharEncodingHandlerPtr handler = NULL;
   1163     int ret;
   1164 
   1165     if ((cur == NULL) || (filename == NULL))
   1166         return(-1);
   1167 
   1168     xmlInitParser();
   1169 
   1170     if (encoding != NULL) {
   1171 	xmlCharEncoding enc;
   1172 
   1173 	enc = xmlParseCharEncoding(encoding);
   1174 	if (enc != cur->charset) {
   1175 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
   1176 		/*
   1177 		 * Not supported yet
   1178 		 */
   1179 		return(-1);
   1180 	    }
   1181 
   1182 	    handler = xmlFindCharEncodingHandler(encoding);
   1183 	    if (handler == NULL)
   1184 		return(-1);
   1185 	}
   1186         htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
   1187     } else {
   1188 	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
   1189     }
   1190 
   1191     /*
   1192      * Fallback to HTML or ASCII when the encoding is unspecified
   1193      */
   1194     if (handler == NULL)
   1195 	handler = xmlFindCharEncodingHandler("HTML");
   1196     if (handler == NULL)
   1197 	handler = xmlFindCharEncodingHandler("ascii");
   1198 
   1199     /*
   1200      * save the content to a temp buffer.
   1201      */
   1202     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
   1203     if (buf == NULL) return(0);
   1204 
   1205     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
   1206 
   1207     ret = xmlOutputBufferClose(buf);
   1208     return(ret);
   1209 }
   1210 
   1211 /**
   1212  * htmlSaveFileEnc:
   1213  * @filename:  the filename
   1214  * @cur:  the document
   1215  * @encoding: the document encoding
   1216  *
   1217  * Dump an HTML document to a file using a given encoding
   1218  * and formatting returns/spaces are added.
   1219  *
   1220  * returns: the number of byte written or -1 in case of failure.
   1221  */
   1222 int
   1223 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
   1224     return(htmlSaveFileFormat(filename, cur, encoding, 1));
   1225 }
   1226 
   1227 #endif /* LIBXML_OUTPUT_ENABLED */
   1228 
   1229 #define bottom_HTMLtree
   1230 #include "elfgcchack.h"
   1231 #endif /* LIBXML_HTML_ENABLED */
   1232