Home | History | Annotate | Download | only in libxml2

Lines Matching refs:ctxt

56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
58 static void htmlParseComment(htmlParserCtxtPtr ctxt);
68 * @ctxt: an HTML parser context
74 htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
76 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
77 (ctxt->instate == XML_PARSER_EOF))
79 if (ctxt != NULL) {
80 ctxt->errNo = XML_ERR_NO_MEMORY;
81 ctxt->instate = XML_PARSER_EOF;
82 ctxt->disableSAX = 1;
85 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
90 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
97 * @ctxt: an HTML parser context
106 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
109 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
110 (ctxt->instate == XML_PARSER_EOF))
112 if (ctxt != NULL)
113 ctxt->errNo = error;
114 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
119 if (ctxt != NULL)
120 ctxt->wellFormed = 0;
125 * @ctxt: an HTML parser context
133 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
136 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
137 (ctxt->instate == XML_PARSER_EOF))
139 if (ctxt != NULL)
140 ctxt->errNo = error;
141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
144 if (ctxt != NULL)
145 ctxt->wellFormed = 0;
156 * @ctxt: an HTML parser context
164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
166 if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))
167 ctxt->html = 3;
168 if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))
169 ctxt->html = 10;
170 if (ctxt->nameNr >= ctxt->nameMax) {
171 ctxt->nameMax *= 2;
172 ctxt->nameTab = (const xmlChar * *)
173 xmlRealloc((xmlChar * *)ctxt->nameTab,
174 ctxt->nameMax *
175 sizeof(ctxt->nameTab[0]));
176 if (ctxt->nameTab == NULL) {
177 htmlErrMemory(ctxt, NULL);
181 ctxt->nameTab[ctxt->nameNr] = value;
182 ctxt->name = value;
183 return (ctxt->nameNr++);
187 * @ctxt: an HTML parser context
194 htmlnamePop(htmlParserCtxtPtr ctxt)
198 if (ctxt->nameNr <= 0)
200 ctxt->nameNr--;
201 if (ctxt->nameNr < 0)
203 if (ctxt->nameNr > 0)
204 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
206 ctxt->name = NULL;
207 ret = ctxt->nameTab[ctxt->nameNr];
208 ctxt->nameTab[ctxt->nameNr] = NULL;
214 * @ctxt: an HTML parser context
222 htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)
224 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
225 if (ctxt->nodeInfoMax == 0)
226 ctxt->nodeInfoMax = 5;
227 ctxt->nodeInfoMax *= 2;
228 ctxt->nodeInfoTab = (htmlParserNodeInfo *)
229 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
230 ctxt->nodeInfoMax *
231 sizeof(ctxt->nodeInfoTab[0]));
232 if (ctxt->nodeInfoTab == NULL) {
233 htmlErrMemory(ctxt, NULL);
237 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;
238 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
239 return (ctxt->nodeInfoNr++);
244 * @ctxt: an HTML parser context
251 htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
253 if (ctxt->nodeInfoNr <= 0)
255 ctxt->nodeInfoNr--;
256 if (ctxt->nodeInfoNr < 0)
258 if (ctxt->nodeInfoNr > 0)
259 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
261 ctxt->nodeInfo = NULL;
262 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
294 #define UPPER (toupper(*ctxt->input->cur))
296 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
298 #define NXT(val) ctxt->input->cur[(val)]
300 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
302 #define CUR_PTR ctxt->input->cur
304 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
305 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
306 xmlParserInputShrink(ctxt->input)
308 #define GROW if ((ctxt->progressive == 0) && \
309 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
312 #define CURRENT ((int) (*ctxt->input->cur))
314 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)
318 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
319 #define CUR ((int) (*ctxt->input->cur))
320 #define NEXT xmlNextChar(ctxt)
322 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
326 if (*(ctxt->input->cur) == '\n') { \
327 ctxt->input->line++; ctxt->input->col = 1; \
328 } else ctxt->input->col++; \
329 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
334 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
335 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
338 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
339 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
360 htmlFindEncoding(xmlParserCtxtPtr ctxt) {
363 if ((ctxt == NULL) || (ctxt->input == NULL) ||
364 (ctxt->input->encoding != NULL) || (ctxt->input->buf == NULL) ||
365 (ctxt->input->buf->encoder != NULL))
367 if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL))
370 start = ctxt->input->cur;
371 end = ctxt->input->end;
399 * @ctxt: the HTML parser context
412 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
413 if (ctxt->instate == XML_PARSER_EOF)
416 if (ctxt->token != 0) {
418 return(ctxt->token);
420 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
432 const unsigned char *cur = ctxt->input->cur;
439 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
440 cur = ctxt->input->cur;
447 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
448 cur = ctxt->input->cur;
454 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
455 cur = ctxt->input->cur;
480 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
485 if ((*ctxt->input->cur == 0) &&
486 (ctxt->input->cur < ctxt->input->end)) {
487 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
494 return((int) *ctxt->input->cur);
503 if ((int) *ctxt->input->cur < 0x80)
504 return((int) *ctxt->input->cur);
513 guess = htmlFindEncoding(ctxt);
515 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
517 if (ctxt->input->encoding != NULL)
518 xmlFree((xmlChar *) ctxt->input->encoding);
519 ctxt->input->encoding = guess;
522 xmlSwitchToEncoding(ctxt, handler);
524 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
528 ctxt->charset = XML_CHAR_ENCODING_UTF8;
531 return(xmlCurrentChar(ctxt, len));
544 if (ctxt->input->end - ctxt->input->cur >= 4) {
546 ctxt->input->cur[0], ctxt->input->cur[1],
547 ctxt->input->cur[2], ctxt->input->cur[3]);
549 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
551 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
556 ctxt->charset = XML_CHAR_ENCODING_8859_1;
558 return((int) *ctxt->input->cur);
563 * @ctxt: the HTML parser context
571 htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
574 while (IS_BLANK_CH(*(ctxt->input->cur))) {
575 if ((*ctxt->input->cur == 0) &&
576 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
577 xmlPopInput(ctxt);
579 if (*(ctxt->input->cur) == '\n') {
580 ctxt->input->line++; ctxt->input->col = 1;
581 } else ctxt->input->col++;
582 ctxt->input->cur++;
583 ctxt->nbChars++;
584 if (*ctxt->input->cur == 0)
585 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1285 * @ctxt: an HTML parser context
1292 htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1299 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1301 if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1309 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1315 while (!xmlStrEqual(newtag, ctxt->name)) {
1316 info = htmlTagLookup(ctxt->name);
1318 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1320 newtag, ctxt->name);
1322 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1323 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1324 htmlnamePop(ctxt);
1330 * @ctxt: an HTML parser context
1335 htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1339 if (ctxt->nameNr == 0)
1341 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1342 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1343 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1344 htmlnamePop(ctxt);
1350 * @ctxt: an HTML parser context
1361 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1363 while ((newtag != NULL) && (ctxt->name != NULL) &&
1364 (htmlCheckAutoClose(newtag, ctxt->name))) {
1365 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1366 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1367 htmlnamePop(ctxt);
1370 htmlAutoCloseOnEnd(ctxt);
1373 while ((newtag == NULL) && (ctxt->name != NULL) &&
1374 ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
1375 (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
1376 (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
1377 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1378 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1379 htmlnamePop(ctxt);
1437 * @ctxt: an HTML parser context
1445 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1448 if (ctxt->options & HTML_PARSE_NOIMPLIED)
1454 if (ctxt->nameNr <= 0) {
1455 htmlnamePush(ctxt, BAD_CAST"html");
1456 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1457 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1461 if ((ctxt->nameNr <= 1) &&
1468 if (ctxt->html >= 3) {
1476 htmlnamePush(ctxt, BAD_CAST"head");
1477 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1478 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1482 if (ctxt->html >= 10) {
1486 for (i = 0;i < ctxt->nameNr;i++) {
1487 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1490 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1495 htmlnamePush(ctxt, BAD_CAST"body");
1496 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1497 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1503 * @ctxt: an HTML parser context
1513 htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1517 if (ctxt == NULL)
1519 tag = ctxt->name;
1521 htmlAutoClose(ctxt, BAD_CAST"p");
1522 htmlCheckImplied(ctxt, BAD_CAST"p");
1523 htmlnamePush(ctxt, BAD_CAST"p");
1524 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1525 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1532 htmlAutoClose(ctxt, BAD_CAST"p");
1533 htmlCheckImplied(ctxt, BAD_CAST"p");
1534 htmlnamePush(ctxt, BAD_CAST"p");
1535 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1536 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1875 htmlErrMemory(ctxt, "growing buffer\n"); \
2140 * @ctxt: an HTML parser context
2146 htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2151 htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2193 * @ctxt: an HTML parser context
2202 static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2213 if (ctxt->name == NULL)
2215 if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2217 if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2221 if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2222 dtd = xmlGetIntSubset(ctxt->myDoc);
2230 if (ctxt->node == NULL) return(0);
2231 lastChild = xmlGetLastChild(ctxt->node);
2235 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2236 (ctxt->node->content != NULL)) return(0);
2240 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2337 static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2341 * @ctxt: an HTML parser context
2350 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2368 return(xmlDictLookup(ctxt->dict, loc, i));
2374 * @ctxt: an HTML parser context
2384 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2399 return(xmlDictLookup(ctxt->dict, loc, i));
2405 * @ctxt: an HTML parser context
2413 htmlParseName(htmlParserCtxtPtr ctxt) {
2423 in = ctxt->input->cur;
2435 count = in - ctxt->input->cur;
2436 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2437 ctxt->input->cur = in;
2438 ctxt->nbChars += count;
2439 ctxt->input->col += count;
2443 return(htmlParseNameComplex(ctxt));
2447 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2477 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2483 * @ctxt: an HTML parser context
2493 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2507 htmlErrMemory(ctxt, "buffer allocation failed\n");
2523 c = htmlParseCharRef(ctxt);
2544 ent = htmlParseEntityRef(ctxt, &name);
2622 * @ctxt: an HTML parser context
2633 htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2638 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
2642 name = htmlParseName(ctxt);
2644 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2659 htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2672 * @ctxt: an HTML parser context
2677 * asked for ctxt->replaceEntities != 0
2683 htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2688 ret = htmlParseHTMLAttribute(ctxt, '"');
2690 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2696 ret = htmlParseHTMLAttribute(ctxt, '\'');
2698 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2706 ret = htmlParseHTMLAttribute(ctxt, 0);
2708 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2717 * @ctxt: an HTML parser context
2727 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2737 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2749 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2756 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2765 * @ctxt: an HTML parser context
2775 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2786 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2798 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2805 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2814 * @ctxt: an HTML parser context
2834 htmlParseScript(htmlParserCtxtPtr ctxt) {
2854 if (ctxt->recovery) {
2855 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
2856 xmlStrlen(ctxt->name)) == 0)
2860 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
2862 ctxt->name, NULL);
2874 if (ctxt->sax->cdataBlock!= NULL) {
2878 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2879 } else if (ctxt->sax->characters != NULL) {
2880 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2889 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
2890 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2892 if (ctxt->input->cur < ctxt->input->end) {
2897 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2898 if (ctxt->sax->cdataBlock!= NULL) {
2902 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2903 } else if (ctxt->sax->characters != NULL) {
2904 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2912 * @ctxt: an HTML parser context
2921 htmlParseCharData(htmlParserCtxtPtr ctxt) {
2929 while (((cur != '<') || (ctxt->token == '<')) &&
2930 ((cur != '&') || (ctxt->token == '&')) &&
2933 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2942 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2943 if (areBlanks(ctxt, buf, nbchar)) {
2944 if (ctxt->sax->ignorableWhitespace != NULL)
2945 ctxt->sax->ignorableWhitespace(ctxt->userData,
2948 htmlCheckParagraph(ctxt);
2949 if (ctxt->sax->characters != NULL)
2950 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2975 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2976 if (areBlanks(ctxt, buf, nbchar)) {
2977 if (ctxt->sax->ignorableWhitespace != NULL)
2978 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2980 htmlCheckParagraph(ctxt);
2981 if (ctxt->sax->characters != NULL)
2982 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2990 ctxt->instate = XML_PARSER_EOF;
2996 * @ctxt: an HTML parser context
3012 htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
3020 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3024 URI = htmlParseSystemLiteral(ctxt);
3026 htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
3034 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3038 *publicID = htmlParsePubidLiteral(ctxt);
3040 htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
3046 URI = htmlParseSystemLiteral(ctxt);
3054 * @ctxt: an XML parser context
3061 htmlParsePI(htmlParserCtxtPtr ctxt) {
3071 state = ctxt->instate;
3072 ctxt->instate = XML_PARSER_PI;
3083 target = htmlParseName(ctxt);
3091 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3092 (ctxt->sax->processingInstruction != NULL))
3093 ctxt->sax->processingInstruction(ctxt->userData,
3095 ctxt->instate = state;
3100 htmlErrMemory(ctxt, NULL);
3101 ctxt->instate = state;
3106 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3118 htmlErrMemory(ctxt, NULL);
3120 ctxt->instate = state;
3141 htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
3149 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3150 (ctxt->sax->processingInstruction != NULL))
3151 ctxt->sax->processingInstruction(ctxt->userData,
3156 htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
3159 ctxt->instate = state;
3165 * @ctxt: an HTML parser context
3172 htmlParseComment(htmlParserCtxtPtr ctxt) {
3187 state = ctxt->instate;
3188 ctxt->instate = XML_PARSER_COMMENT;
3193 htmlErrMemory(ctxt, "buffer allocation failed\n");
3194 ctxt->instate = state;
3213 htmlErrMemory(ctxt, "growing buffer failed\n");
3214 ctxt->instate = state;
3234 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3239 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3240 (!ctxt->disableSAX))
3241 ctxt->sax->comment(ctxt->userData, buf);
3244 ctxt->instate = state;
3249 * @ctxt: an HTML parser context
3259 htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3262 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3263 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3279 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3294 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3304 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3313 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3323 * @ctxt: an HTML parser context
3332 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3347 name = htmlParseName(ctxt);
3349 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3362 URI = htmlParseExternalID(ctxt, &ExternalID);
3369 htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
3378 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3379 (!ctxt->disableSAX))
3380 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3391 * @ctxt: an HTML parser context
3411 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3416 name = htmlParseHTMLName(ctxt);
3418 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3430 val = htmlParseAttValue(ctxt);
3439 * @ctxt: an HTML parser context
3448 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3451 if ((ctxt == NULL) || (attvalue == NULL) ||
3452 (ctxt->options & HTML_PARSE_IGNORE_ENC))
3456 if (ctxt->input->encoding != NULL)
3473 if (ctxt->input->encoding != NULL)
3474 xmlFree((xmlChar *) ctxt->input->encoding);
3475 ctxt->input->encoding = xmlStrdup(encoding);
3486 (ctxt->input->buf != NULL) &&
3487 (ctxt->input->buf->encoder == NULL)) {
3488 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3492 xmlSwitchEncoding(ctxt, enc);
3494 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3501 xmlSwitchToEncoding(ctxt, handler);
3502 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3504 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
3510 if ((ctxt->input->buf != NULL) &&
3511 (ctxt->input->buf->encoder != NULL) &&
3512 (ctxt->input->buf->raw != NULL) &&
3513 (ctxt->input->buf->buffer != NULL)) {
3520 processed = ctxt->input->cur - ctxt->input->base;
3521 xmlBufferShrink(ctxt->input->buf->buffer, processed);
3522 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
3523 ctxt->input->buf->buffer,
3524 ctxt->input->buf->raw);
3526 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3530 ctxt->input->base =
3531 ctxt->input->cur = ctxt->input->buf->buffer->content;
3532 ctxt->input->end =
3533 &ctxt->input->base[ctxt->input->buf->buffer->use];
3540 * @ctxt: an HTML parser context
3546 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3552 if ((ctxt == NULL) || (atts == NULL))
3567 htmlCheckEncoding(ctxt, content);
3573 * @ctxt: an HTML parser context
3592 htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3603 if (ctxt->instate == XML_PARSER_EOF)
3605 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3606 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3613 atts = ctxt->atts;
3614 maxatts = ctxt->maxatts;
3617 name = htmlParseHTMLName(ctxt);
3619 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3624 (ctxt->instate != XML_PARSER_EOF))
3634 htmlAutoClose(ctxt, name);
3639 htmlCheckImplied(ctxt, name);
3645 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
3646 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3650 ctxt->depth++;
3652 if ((ctxt->nameNr != 1) &&
3654 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3658 ctxt->depth++;
3662 for (indx = 0;indx < ctxt->nameNr;indx++) {
3663 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
3664 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3668 ctxt->depth++;
3682 long cons = ctxt->nbChars;
3685 attname = htmlParseAttribute(ctxt, &attvalue);
3693 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
3709 htmlErrMemory(ctxt, NULL);
3714 ctxt->atts = atts;
3715 ctxt->maxatts = maxatts;
3723 htmlErrMemory(ctxt, NULL);
3729 ctxt->atts = atts;
3730 ctxt->maxatts = maxatts;
3750 if (cons == ctxt->nbChars) {
3751 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3762 htmlCheckMeta(ctxt, atts);
3768 htmlnamePush(ctxt, name);
3769 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
3771 ctxt->sax->startElement(ctxt->userData, name, atts);
3773 ctxt->sax->startElement(ctxt->userData, name, NULL);
3789 * @ctxt: an HTML parser context
3803 htmlParseEndTag(htmlParserCtxtPtr ctxt)
3810 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
3816 name = htmlParseHTMLName(ctxt);
3824 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
3826 if (ctxt->recovery) {
3842 if ((ctxt->depth > 0) &&
3846 ctxt->depth--;
3854 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
3855 if (xmlStrEqual(name, ctxt->nameTab[i]))
3859 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3869 htmlAutoCloseOnClose(ctxt, name);
3876 if (!xmlStrEqual(name, ctxt->name)) {
3877 if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
3878 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3880 name, ctxt->name);
3887 oldname = ctxt->name;
3889 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3890 ctxt->sax->endElement(ctxt->userData, name);
3891 htmlnamePop(ctxt);
3903 * @ctxt: an HTML parser context
3910 htmlParseReference(htmlParserCtxtPtr ctxt) {
3920 c = htmlParseCharRef(ctxt);
3934 htmlCheckParagraph(ctxt);
3935 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3936 ctxt->sax->characters(ctxt->userData, out, i);
3938 ent = htmlParseEntityRef(ctxt, &name);
3940 htmlCheckParagraph(ctxt);
3941 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3942 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3946 htmlCheckParagraph(ctxt);
3947 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
3948 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3949 ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
3950 /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
3971 htmlCheckParagraph(ctxt);
3972 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3973 ctxt->sax->characters(ctxt->userData, out, i);
3980 * @ctxt: an HTML parser context
3987 htmlParseContent(htmlParserCtxtPtr ctxt) {
3992 currentNode = xmlStrdup(ctxt->name);
3993 depth = ctxt->nameNr;
3995 long cons = ctxt->nbChars;
3999 if (ctxt->instate == XML_PARSER_EOF)
4006 if (htmlParseEndTag(ctxt) &&
4007 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
4018 name = htmlParseHTMLName_nonInvasive(ctxt);
4020 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4032 if (ctxt->name != NULL) {
4033 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
4034 htmlAutoClose(ctxt, name);
4044 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
4045 (!xmlStrEqual(currentNode, ctxt->name)))
4056 htmlParseScript(ctxt);
4066 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4069 htmlParseDocTypeDecl(ctxt);
4077 htmlParseComment(ctxt);
4084 htmlParsePI(ctxt);
4091 htmlParseElement(ctxt);
4099 htmlParseReference(ctxt);
4106 htmlAutoCloseOnEnd(ctxt);
4114 htmlParseCharData(ctxt);
4117 if (cons == ctxt->nbChars) {
4118 if (ctxt->node != NULL) {
4119 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4133 * @ctxt: an HTML parser context
4144 htmlParseElement(htmlParserCtxtPtr ctxt) {
4153 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4154 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4159 if (ctxt->instate == XML_PARSER_EOF)
4163 if (ctxt->record_info) {
4164 node_info.begin_pos = ctxt->input->consumed +
4165 (CUR_PTR - ctxt->input->base);
4166 node_info.begin_line = ctxt->input->line;
4169 failed = htmlParseStartTag(ctxt);
4170 name = ctxt->name;
4182 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4191 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4192 ctxt->sax->endElement(ctxt->userData, name);
4193 htmlnamePop(ctxt);
4200 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4206 if (xmlStrEqual(name, ctxt->name)) {
4207 nodePop(ctxt);
4208 htmlnamePop(ctxt);
4214 if (ctxt->record_info) {
4215 node_info.end_pos = ctxt->input->consumed +
4216 (CUR_PTR - ctxt->input->base);
4217 node_info.end_line = ctxt->input->line;
4218 node_info.node = ctxt->node;
4219 xmlParserAddNodeInfo(ctxt, &node_info);
4228 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4229 ctxt->sax->endElement(ctxt->userData, name);
4230 htmlnamePop(ctxt);
4237 currentNode = xmlStrdup(ctxt->name);
4238 depth = ctxt->nameNr;
4240 oldptr = ctxt->input->cur;
4241 htmlParseContent(ctxt);
4242 if (oldptr==ctxt->input->cur) break;
4243 if (ctxt->nameNr < depth) break;
4249 if ( currentNode != NULL && ctxt->record_info ) {
4250 node_info.end_pos = ctxt->input->consumed +
4251 (CUR_PTR - ctxt->input->base);
4252 node_info.end_line = ctxt->input->line;
4253 node_info.node = ctxt->node;
4254 xmlParserAddNodeInfo(ctxt, &node_info);
4257 htmlAutoCloseOnEnd(ctxt);
4265 htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
4269 if ( ctxt->node != NULL && ctxt->record_info ) {
4270 ctxt->nodeInfo->end_pos = ctxt->input->consumed +
4271 (CUR_PTR - ctxt->input->base);
4272 ctxt->nodeInfo->end_line = ctxt->input->line;
4273 ctxt->nodeInfo->node = ctxt->node;
4274 xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
4275 htmlNodeInfoPop(ctxt);
4278 htmlAutoCloseOnEnd(ctxt);
4284 * @ctxt: an HTML parser context
4294 htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4300 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4301 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4306 if (ctxt->instate == XML_PARSER_EOF)
4310 if (ctxt->record_info) {
4311 node_info.begin_pos = ctxt->input->consumed +
4312 (CUR_PTR - ctxt->input->base);
4313 node_info.begin_line = ctxt->input->line;
4316 failed = htmlParseStartTag(ctxt);
4317 name = ctxt->name;
4329 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4338 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4339 ctxt->sax->endElement(ctxt->userData, name);
4340 htmlnamePop(ctxt);
4347 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4353 if (xmlStrEqual(name, ctxt->name)) {
4354 nodePop(ctxt);
4355 htmlnamePop(ctxt);
4358 if (ctxt->record_info)
4359 htmlNodeInfoPush(ctxt, &node_info);
4360 htmlParserFinishElementParsing(ctxt);
4368 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4369 ctxt->sax->endElement(ctxt->userData, name);
4370 htmlnamePop(ctxt);
4374 if (ctxt->record_info)
4375 htmlNodeInfoPush(ctxt, &node_info);
4380 * @ctxt: an HTML parser context
4387 htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
4392 currentNode = xmlStrdup(ctxt->name);
4393 depth = ctxt->nameNr;
4395 long cons = ctxt->nbChars;
4399 if (ctxt->instate == XML_PARSER_EOF)
4406 if (htmlParseEndTag(ctxt) &&
4407 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
4411 currentNode = xmlStrdup(ctxt->name);
4412 depth = ctxt->nameNr;
4420 name = htmlParseHTMLName_nonInvasive(ctxt);
4422 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4429 htmlParserFinishElementParsing(ctxt);
4433 currentNode = xmlStrdup(ctxt->name);
4434 depth = ctxt->nameNr;
4438 if (ctxt->name != NULL) {
4439 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
4440 htmlAutoClose(ctxt, name);
4450 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
4451 (!xmlStrEqual(currentNode, ctxt->name)))
4453 htmlParserFinishElementParsing(ctxt);
4456 currentNode = xmlStrdup(ctxt->name);
4457 depth = ctxt->nameNr;
4466 htmlParseScript(ctxt);
4476 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4479 htmlParseDocTypeDecl(ctxt);
4487 htmlParseComment(ctxt);
4494 htmlParsePI(ctxt);
4501 htmlParseElementInternal(ctxt);
4504 currentNode = xmlStrdup(ctxt->name);
4505 depth = ctxt->nameNr;
4513 htmlParseReference(ctxt);
4520 htmlAutoCloseOnEnd(ctxt);
4528 htmlParseCharData(ctxt);
4531 if (cons == ctxt->nbChars) {
4532 if (ctxt->node != NULL) {
4533 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4547 * @ctxt: an HTML parser context
4554 __htmlParseContent(void *ctxt) {
4555 if (ctxt != NULL)
4556 htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
4561 * @ctxt: an HTML parser context
4571 htmlParseDocument(htmlParserCtxtPtr ctxt) {
4580 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4581 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4585 ctxt->html = 1;
4586 ctxt->linenumbers = 1;
4591 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4592 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4594 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
4595 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
4607 xmlSwitchEncoding(ctxt, enc);
4616 htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
4620 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4621 ctxt->sax->startDocument(ctxt->userData);
4630 htmlParseComment(ctxt);
4631 htmlParsePI(ctxt);
4645 htmlParseDocTypeDecl(ctxt);
4655 htmlParseComment(ctxt);
4656 htmlParsePI(ctxt);
4663 htmlParseContentInternal(ctxt);
4669 htmlAutoCloseOnEnd(ctxt);
4675 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4676 ctxt->sax->endDocument(ctxt->userData);
4678 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
4679 dtd = xmlGetIntSubset(ctxt->myDoc);
4681 ctxt->myDoc->intSubset =
4682 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4686 if (! ctxt->wellFormed) return(-1);
4699 * @ctxt: an HTML parser context
4707 htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
4711 if (ctxt == NULL) return(-1);
4712 memset(ctxt, 0, sizeof(htmlParserCtxt));
4714 ctxt->dict = xmlDictCreate();
4715 if (ctxt->dict == NULL) {
4728 ctxt->inputTab = (htmlParserInputPtr *)
4730 if (ctxt->inputTab == NULL) {
4732 ctxt->inputNr = 0;
4733 ctxt->inputMax = 0;
4734 ctxt->input = NULL;
4737 ctxt->inputNr = 0;
4738 ctxt->inputMax = 5;
4739 ctxt->input = NULL;
4740 ctxt->version = NULL;
4741 ctxt->encoding = NULL;
4742 ctxt->standalone = -1;
4743 ctxt->instate = XML_PARSER_START;
4746 ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
4747 if (ctxt->nodeTab == NULL) {
4749 ctxt->nodeNr = 0;
4750 ctxt->nodeMax = 0;
4751 ctxt->node = NULL;
4752 ctxt->inputNr = 0;
4753 ctxt->inputMax = 0;
4754 ctxt->input = NULL;
4757 ctxt->nodeNr = 0;
4758 ctxt->nodeMax = 10;
4759 ctxt->node = NULL;
4762 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
4763 if (ctxt->nameTab == NULL) {
4765 ctxt->nameNr = 0;
4766 ctxt->nameMax = 0;
4767 ctxt->name = NULL;
4768 ctxt->nodeNr = 0;
4769 ctxt->nodeMax = 0;
4770 ctxt->node = NULL;
4771 ctxt->inputNr = 0;
4772 ctxt->inputMax = 0;
4773 ctxt->input = NULL;
4776 ctxt->nameNr = 0;
4777 ctxt->nameMax = 10;
4778 ctxt->name = NULL;
4780 ctxt->nodeInfoTab = NULL;
4781 ctxt->nodeInfoNr = 0;
4782 ctxt->nodeInfoMax = 0;
4784 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
4786 ctxt->sax = sax;
4789 ctxt->userData = ctxt;
4790 ctxt->myDoc = NULL;
4791 ctxt->wellFormed = 1;
4792 ctxt->replaceEntities = 0;
4793 ctxt->linenumbers = xmlLineNumbersDefaultValue;
4794 ctxt->html = 1;
4795 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
4796 ctxt->vctxt.userData = ctxt;
4797 ctxt->vctxt.error = xmlParserValidityError;
4798 ctxt->vctxt.warning = xmlParserValidityWarning;
4799 ctxt->record_info = 0;
4800 ctxt->validate = 0;
4801 ctxt->nbChars = 0;
4802 ctxt->checkIndex = 0;
4803 ctxt->catalogs = NULL;
4804 xmlInitNodeInfoSeq(&ctxt->node_seq);
4810 * @ctxt: an HTML parser context
4813 * document in ctxt->myDoc is not freed.
4817 htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
4819 xmlFreeParserCtxt(ctxt);
4833 xmlParserCtxtPtr ctxt;
4835 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
4836 if (ctxt == NULL) {
4840 memset(ctxt, 0, sizeof(xmlParserCtxt));
4841 if (htmlInitParserCtxt(ctxt) < 0) {
4842 htmlFreeParserCtxt(ctxt);
4845 return(ctxt);
4859 xmlParserCtxtPtr ctxt;
4868 ctxt = htmlNewParserCtxt();
4869 if (ctxt == NULL)
4875 input = xmlNewInputStream(ctxt);
4877 xmlFreeParserCtxt(ctxt);
4887 inputPush(ctxt, input);
4888 return(ctxt);
4905 htmlParserCtxtPtr ctxt;
4910 ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
4911 if (ctxt == NULL)
4918 if (ctxt->input->encoding != NULL)
4919 xmlFree((xmlChar *) ctxt->input->encoding);
4920 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
4927 xmlSwitchEncoding(ctxt, enc);
4928 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4929 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4939 xmlSwitchToEncoding(ctxt, handler);
4941 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4947 return(ctxt);
4959 * @ctxt: an HTML parser context
4967 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
4976 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
4987 in = ctxt->input;
4995 if (ctxt->checkIndex > base)
4996 base = ctxt->checkIndex;
5054 ctxt->checkIndex = 0;
5073 ctxt->checkIndex = base;
5091 * @ctxt: an HTML parser context
5097 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
5105 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
5114 in = ctxt->input;
5122 if (ctxt->checkIndex > base)
5123 base = ctxt->checkIndex;
5154 ctxt->checkIndex = 0;
5159 ctxt->checkIndex = base;
5165 * @ctxt: an HTML parser context
5173 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
5180 switch (ctxt->instate) {
5234 in = ctxt->input;
5241 htmlAutoCloseOnEnd(ctxt);
5242 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5246 ctxt->instate = XML_PARSER_EOF;
5247 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5248 ctxt->sax->endDocument(ctxt->userData);
5259 switch (ctxt->instate) {
5277 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5278 ctxt->sax->setDocumentLocator(ctxt->userData,
5280 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5281 (!ctxt->disableSAX))
5282 ctxt->sax->startDocument(ctxt->userData);
5292 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5298 htmlParseDocTypeDecl(ctxt);
5299 ctxt->instate = XML_PARSER_PROLOG;
5305 ctxt->instate = XML_PARSER_MISC;
5325 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
5331 htmlParseComment(ctxt);
5332 ctxt->instate = XML_PARSER_MISC;
5335 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5341 htmlParsePI(ctxt);
5342 ctxt->instate = XML_PARSER_MISC;
5349 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5355 htmlParseDocTypeDecl(ctxt);
5356 ctxt->instate = XML_PARSER_PROLOG;
5365 ctxt->instate = XML_PARSER_START_TAG;
5385 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
5391 htmlParseComment(ctxt);
5392 ctxt->instate = XML_PARSER_PROLOG;
5395 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5401 htmlParsePI(ctxt);
5402 ctxt->instate = XML_PARSER_PROLOG;
5407 ctxt->instate = XML_PARSER_START_TAG;
5423 htmlParseCharData(ctxt);
5432 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
5438 htmlParseComment(ctxt);
5439 ctxt->instate = XML_PARSER_EPILOG;
5442 (htmlParseLookupSequence(ctxt
5448 htmlParsePI(ctxt);
5449 ctxt->instate = XML_PARSER_EPILOG;
5454 ctxt->errNo = XML_ERR_DOCUMENT_END;
5455 ctxt->wellFormed = 0;
5456 ctxt->instate = XML_PARSER_EOF;
5461 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5462 ctxt->sax->endDocument(ctxt->userData);
5475 ctxt->instate = XML_PARSER_CONTENT;
5483 ctxt->instate = XML_PARSER_END_TAG;
5484 ctxt->checkIndex = 0;
5492 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5495 failed = htmlParseStartTag(ctxt);
5496 name = ctxt->name;
5509 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
5518 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5519 ctxt->sax->endElement(ctxt->userData, name);
5520 htmlnamePop(ctxt);
5521 ctxt->instate = XML_PARSER_CONTENT;
5532 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
5539 if (xmlStrEqual(name, ctxt->name)) {
5540 nodePop(ctxt);
5541 htmlnamePop(ctxt);
5544 ctxt->instate = XML_PARSER_CONTENT;
5556 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5557 ctxt->sax->endElement(ctxt->userData, name);
5558 htmlnamePop(ctxt);
5560 ctxt->instate = XML_PARSER_CONTENT;
5572 if (ctxt->token != 0) {
5575 chr[0] = (xmlChar) ctxt->token;
5576 htmlCheckParagraph(ctxt);
5577 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5578 ctxt->sax->characters(ctxt->userData, chr, 1);
5579 ctxt->token = 0;
5580 ctxt->checkIndex = 0;
5585 if (ctxt->sax != NULL) {
5587 if (ctxt->sax->ignorableWhitespace != NULL)
5588 ctxt->sax->ignorableWhitespace(
5589 ctxt->userData, &cur, 1);
5591 htmlCheckParagraph(ctxt);
5592 if (ctxt->sax->characters != NULL)
5593 ctxt->sax->characters(
5594 ctxt->userData, &cur, 1);
5597 ctxt->token = 0;
5598 ctxt->checkIndex = 0;
5607 cons = ctxt->nbChars;
5608 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
5609 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
5617 idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0);
5624 htmlParseScript(ctxt);
5626 ctxt->instate = XML_PARSER_END_TAG;
5627 ctxt->checkIndex = 0;
5644 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5646 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
5649 htmlParseDocTypeDecl(ctxt);
5654 ctxt, '-', '-', '>', 1, 1) < 0))
5660 htmlParseComment(ctxt);
5661 ctxt->instate = XML_PARSER_CONTENT;
5664 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5670 htmlParsePI(ctxt);
5671 ctxt->instate = XML_PARSER_CONTENT;
5675 ctxt->instate = XML_PARSER_END_TAG;
5676 ctxt->checkIndex = 0;
5683 ctxt->instate = XML_PARSER_START_TAG;
5684 ctxt->checkIndex = 0;
5692 (htmlParseLookupChars(ctxt,
5700 htmlParseReference(ctxt);
5709 (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))
5711 ctxt->checkIndex = 0;
5716 htmlParseCharData(ctxt);
5719 if (cons == ctxt->nbChars) {
5720 if (ctxt->node != NULL) {
5721 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5735 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5737 htmlParseEndTag(ctxt);
5738 if (ctxt->nameNr == 0) {
5739 ctxt->instate = XML_PARSER_EPILOG;
5741 ctxt->instate = XML_PARSER_CONTENT;
5743 ctxt->checkIndex = 0;
5750 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5753 ctxt->instate = XML_PARSER_CONTENT;
5754 ctxt->checkIndex = 0;
5761 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5764 ctxt->instate = XML_PARSER_CONTENT;
5765 ctxt->checkIndex = 0;
5772 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5775 ctxt->instate = XML_PARSER_CONTENT;
5776 ctxt->checkIndex = 0;
5783 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5786 ctxt->instate = XML_PARSER_CONTENT;
5787 ctxt->checkIndex = 0;
5794 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5797 ctxt->instate = XML_PARSER_CONTENT;
5798 ctxt->checkIndex = 0;
5805 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5808 ctxt->instate = XML_PARSER_CONTENT;
5809 ctxt->checkIndex = 0;
5816 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5819 ctxt->instate = XML_PARSER_START_TAG;
5820 ctxt->checkIndex = 0;
5827 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5830 ctxt->instate = XML_PARSER_CONTENT;
5831 ctxt->checkIndex = 0;
5838 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5841 ctxt->instate = XML_PARSER_CONTENT;
5842 ctxt->checkIndex = 0;
5849 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5852 ctxt->instate = XML_PARSER_CONTENT;
5853 ctxt->checkIndex = 0;
5864 htmlAutoCloseOnEnd(ctxt);
5865 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5869 ctxt->instate = XML_PARSER_EOF;
5870 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5871 ctxt->sax->endDocument(ctxt->userData);
5874 if ((ctxt->myDoc != NULL) &&
5875 ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5876 (ctxt->instate == XML_PARSER_EPILOG))) {
5878 dtd = xmlGetIntSubset(ctxt->myDoc);
5880 ctxt->myDoc->intSubset =
5881 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5893 * @ctxt: an HTML parser context
5903 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
5905 if ((ctxt == NULL) || (ctxt->input == NULL)) {
5906 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5910 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5911 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
5912 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5913 int cur = ctxt->input->cur - ctxt->input->base;
5916 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5918 ctxt->errNo = XML_PARSER_EOF;
5919 ctxt->disableSAX = 1;
5922 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5923 ctxt->input->cur = ctxt->input->base + cur;
5924 ctxt->input->end =
5925 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5931 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5932 htmlParseTryOrFinish(ctxt, terminate);
5934 } else if (ctxt->instate != XML_PARSER_EOF) {
5935 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
5936 xmlParserInputBufferPtr in = ctxt->input->buf;
5943 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5950 htmlParseTryOrFinish(ctxt, terminate);
5952 if ((ctxt->instate != XML_PARSER_EOF) &&
5953 (ctxt->instate != XML_PARSER_EPILOG) &&
5954 (ctxt->instate != XML_PARSER_MISC)) {
5955 ctxt->errNo = XML_ERR_DOCUMENT_END;
5956 ctxt->wellFormed = 0;
5958 if (ctxt->instate != XML_PARSER_EOF) {
5959 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5960 ctxt->sax->endDocument(ctxt->userData);
5962 ctxt->instate = XML_PARSER_EOF;
5964 return((xmlParserErrors) ctxt->errNo);
5992 htmlParserCtxtPtr ctxt;
6001 ctxt = htmlNewParserCtxt();
6002 if (ctxt == NULL) {
6007 ctxt->charset=XML_CHAR_ENCODING_UTF8;
6009 if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
6010 xmlFree(ctxt->sax);
6011 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
6012 if (ctxt->sax == NULL) {
6014 xmlFree(ctxt);
6017 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
6019 ctxt->userData = user_data;
6022 ctxt->directory = NULL;
6024 ctxt->directory = xmlParserGetDirectory(filename);
6027 inputStream = htmlNewInputStream(ctxt);
6029 ctxt);
6045 inputPush(ctxt, inputStream);
6047 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
6048 (ctxt->input->buf != NULL)) {
6049 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
6050 int cur = ctxt->input->cur - ctxt->input->base;
6052 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
6054 ctxt->input->base = ctxt->input->buf->buffer->content + base;
6055 ctxt->input->cur = ctxt->input->base + cur;
6056 ctxt->input->end =
6057 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
6062 ctxt->progressive = 1;
6064 return(ctxt);
6086 htmlParserCtxtPtr ctxt;
6093 ctxt = htmlCreateDocParserCtxt(cur, encoding);
6094 if (ctxt == NULL) return(NULL);
6096 if (ctxt->sax != NULL) xmlFree (ctxt->sax);
6097 ctxt->sax = sax;
6098 ctxt->userData = userData;
6101 htmlParseDocument(ctxt);
6102 ret = ctxt->myDoc;
6104 ctxt->sax = NULL;
6105 ctxt->userData = NULL;
6107 htmlFreeParserCtxt(ctxt);
6142 htmlParserCtxtPtr ctxt;
6151 ctxt = htmlNewParserCtxt();
6152 if (ctxt == NULL) {
6162 xmlFreeParserCtxt(ctxt);
6166 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
6169 xmlFreeParserCtxt(ctxt);
6173 inputPush(ctxt, inputStream);
6181 htmlCheckEncoding (ctxt, content);
6186 return(ctxt);
6209 htmlParserCtxtPtr ctxt;
6214 ctxt = htmlCreateFileParserCtxt(filename, encoding);
6215 if (ctxt == NULL) return(NULL);
6217 oldsax = ctxt->sax;
6218 ctxt->sax = sax;
6219 ctxt->userData = userData;
6222 htmlParseDocument(ctxt);
6224 ret = ctxt->myDoc;
6226 ctxt->sax = oldsax;
6227 ctxt->userData = NULL;
6229 htmlFreeParserCtxt(ctxt);
6398 * @ctxt: an HTML parser context
6403 htmlCtxtReset(htmlParserCtxtPtr ctxt)
6408 if (ctxt == NULL)
6412 dict = ctxt->dict;
6414 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
6417 ctxt->inputNr = 0;
6418 ctxt->input = NULL;
6420 ctxt->spaceNr = 0;
6421 if (ctxt->spaceTab != NULL) {
6422 ctxt->spaceTab[0] = -1;
6423 ctxt->space = &ctxt->spaceTab[0];
6425 ctxt->space = NULL;
6429 ctxt->nodeNr = 0;
6430 ctxt->node = NULL;
6432 ctxt->nameNr = 0;
6433 ctxt->name = NULL;
6435 DICT_FREE(ctxt->version);
6436 ctxt->version = NULL;
6437 DICT_FREE(ctxt->encoding);
6438 ctxt->encoding = NULL;
6439 DICT_FREE(ctxt->directory);
6440 ctxt->directory = NULL;
6441 DICT_FREE(ctxt->extSubURI);
6442 ctxt->extSubURI = NULL;
6443 DICT_FREE(ctxt->extSubSystem);
6444 ctxt->extSubSystem = NULL;
6445 if (ctxt->myDoc != NULL)
6446 xmlFreeDoc(ctxt->myDoc);
6447 ctxt->myDoc = NULL;
6449 ctxt->standalone = -1;
6450 ctxt->hasExternalSubset = 0;
6451 ctxt->hasPErefs = 0;
6452 ctxt->html = 1;
6453 ctxt->external = 0;
6454 ctxt->instate = XML_PARSER_START;
6455 ctxt->token = 0;
6457 ctxt->wellFormed = 1;
6458 ctxt->nsWellFormed = 1;
6459 ctxt->disableSAX = 0;
6460 ctxt->valid = 1;
6461 ctxt->vctxt.userData = ctxt;
6462 ctxt->vctxt.error = xmlParserValidityError;
6463 ctxt->vctxt.warning = xmlParserValidityWarning;
6464 ctxt->record_info = 0;
6465 ctxt->nbChars = 0;
6466 ctxt->checkIndex = 0;
6467 ctxt->inSubset = 0;
6468 ctxt->errNo = XML_ERR_OK;
6469 ctxt->depth = 0;
6470 ctxt->charset = XML_CHAR_ENCODING_NONE;
6471 ctxt->catalogs = NULL;
6472 xmlInitNodeInfoSeq(&ctxt->node_seq);
6474 if (ctxt->attsDefault != NULL) {
6475 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
6476 ctxt->attsDefault = NULL;
6478 if (ctxt->attsSpecial != NULL) {
6479 xmlHashFree(ctxt->attsSpecial, NULL);
6480 ctxt->attsSpecial = NULL;
6486 * @ctxt: an HTML parser context
6495 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
6497 if (ctxt == NULL)
6501 ctxt->sax->warning = NULL;
6502 ctxt->vctxt.warning = NULL;
6504 ctxt->options |= XML_PARSE_NOWARNING;
6507 ctxt->sax->error = NULL;
6508 ctxt->vctxt.error = NULL;
6509 ctxt->sax->fatalError = NULL;
6511 ctxt->options |= XML_PARSE_NOERROR;
6514 ctxt->pedantic = 1;
6516 ctxt->options |= XML_PARSE_PEDANTIC;
6518 ctxt->pedantic = 0;
6520 ctxt->keepBlanks = 0;
6521 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
6523 ctxt->options |= XML_PARSE_NOBLANKS;
6525 ctxt->keepBlanks = 1;
6527 ctxt->recovery = 1;
6530 ctxt->recovery = 0;
6532 ctxt->options |= HTML_PARSE_COMPACT;
6536 ctxt->options |= XML_PARSE_HUGE;
6540 ctxt->options |= HTML_PARSE_NODEFDTD;
6544 ctxt->options |= HTML_PARSE_IGNORE_ENC;
6547 ctxt->dictNames = 0;
6553 * @ctxt: an HTML parser context
6564 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
6569 htmlCtxtUseOptions(ctxt, options);
6570 ctxt->html = 1;
6576 xmlSwitchToEncoding(ctxt, hdlr);
6577 if (ctxt->input->encoding != NULL)
6578 xmlFree((xmlChar *) ctxt->input->encoding);
6579 ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);
6582 if ((URL != NULL) && (ctxt->input != NULL) &&
6583 (ctxt->input->filename == NULL))
6584 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
6585 htmlParseDocument(ctxt);
6586 ret = ctxt->myDoc;
6587 ctxt->myDoc = NULL;
6589 if ((ctxt->dictNames) &&
6591 (ret->dict == ctxt->dict))
6592 ctxt->dict = NULL;
6593 xmlFreeParserCtxt(ctxt);
6612 htmlParserCtxtPtr ctxt;
6618 ctxt = htmlCreateDocParserCtxt(cur, NULL);
6619 if (ctxt == NULL)
6621 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6637 htmlParserCtxtPtr ctxt;
6640 ctxt = htmlCreateFileParserCtxt(filename, encoding);
6641 ctxt == NULL)
6643 return (htmlDoRead(ctxt, NULL, NULL, options, 0));
6661 htmlParserCtxtPtr ctxt;
6664 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6665 if (ctxt == NULL)
6668 if (ctxt->sax != NULL)
6669 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
6670 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6687 htmlParserCtxtPtr ctxt;
6698 ctxt = xmlNewParserCtxt();
6699 if (ctxt == NULL) {
6703 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6706 xmlFreeParserCtxt(ctxt);
6709 inputPush(ctxt, stream);
6710 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6730 htmlParserCtxtPtr ctxt;
6742 ctxt = htmlNewParserCtxt();
6743 if (ctxt == NULL) {
6747 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6750 xmlFreeParserCtxt(ctxt);
6753 inputPush(ctxt, stream);
6754 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6759 * @ctxt: an HTML parser context
6766 * This reuses the existing @ctxt parser context
6771 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
6778 if (ctxt == NULL)
6781 htmlCtxtReset(ctxt);
6783 stream = xmlNewStringInputStream(ctxt, cur);
6787 inputPush(ctxt, stream);
6788 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6793 * @ctxt: an HTML parser context
6799 * This reuses the existing @ctxt parser context
6804 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
6811 if (ctxt == NULL)
6814 htmlCtxtReset(ctxt);
6816 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
6820 inputPush(ctxt, stream);
6821 return (htmlDoRead(ctxt, NULL, encoding, options, 1));
6826 * @ctxt: an HTML parser context
6834 * This reuses the existing @ctxt parser context
6839 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
6845 if (ctxt == NULL)
6850 htmlCtxtReset(ctxt);
6857 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6863 inputPush(ctxt, stream);
6864 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6869 * @ctxt: an HTML parser context
6876 * This reuses the existing @ctxt parser context
6881 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
6889 if (ctxt == NULL)
6892 htmlCtxtReset(ctxt);
6898 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6903 inputPush(ctxt, stream);
6904 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6909 * @ctxt: an HTML parser context
6918 * This reuses the existing @ctxt parser context
6923 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
6933 if (ctxt == NULL)
6936 htmlCtxtReset(ctxt);
6942 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6947 inputPush(ctxt, stream);
6948 return (htmlDoRead(ctxt, URL, encoding, options, 1));