Home | History | Annotate | Download | only in libxml2

Lines Matching refs:ctxt

56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
58 static void htmlParseComment(htmlParserCtxtPtr ctxt);
68 * @ctxt: an HTML parser context
74 htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
76 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
77 (ctxt->instate == XML_PARSER_EOF))
79 if (ctxt != NULL) {
80 ctxt->errNo = XML_ERR_NO_MEMORY;
81 ctxt->instate = XML_PARSER_EOF;
82 ctxt->disableSAX = 1;
85 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
90 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
97 * @ctxt: an HTML parser context
106 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
109 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
110 (ctxt->instate == XML_PARSER_EOF))
112 if (ctxt != NULL)
113 ctxt->errNo = error;
114 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
119 if (ctxt != NULL)
120 ctxt->wellFormed = 0;
125 * @ctxt: an HTML parser context
133 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
136 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
137 (ctxt->instate == XML_PARSER_EOF))
139 if (ctxt != NULL)
140 ctxt->errNo = error;
141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
144 if (ctxt != NULL)
145 ctxt->wellFormed = 0;
156 * @ctxt: an HTML parser context
164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
166 if (ctxt->nameNr >= ctxt->nameMax) {
167 ctxt->nameMax *= 2;
168 ctxt->nameTab = (const xmlChar * *)
169 xmlRealloc((xmlChar * *)ctxt->nameTab,
170 ctxt->nameMax *
171 sizeof(ctxt->nameTab[0]));
172 if (ctxt->nameTab == NULL) {
173 htmlErrMemory(ctxt, NULL);
177 ctxt->nameTab[ctxt->nameNr] = value;
178 ctxt->name = value;
179 return (ctxt->nameNr++);
183 * @ctxt: an HTML parser context
190 htmlnamePop(htmlParserCtxtPtr ctxt)
194 if (ctxt->nameNr <= 0)
196 ctxt->nameNr--;
197 if (ctxt->nameNr < 0)
199 if (ctxt->nameNr > 0)
200 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
202 ctxt->name = NULL;
203 ret = ctxt->nameTab[ctxt->nameNr];
204 ctxt->nameTab[ctxt->nameNr] = NULL;
237 #define UPPER (toupper(*ctxt->input->cur))
239 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
241 #define NXT(val) ctxt->input->cur[(val)]
243 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
245 #define CUR_PTR ctxt->input->cur
247 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
248 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
249 xmlParserInputShrink(ctxt->input)
251 #define GROW if ((ctxt->progressive == 0) && \
252 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
253 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
255 #define CURRENT ((int) (*ctxt->input->cur))
257 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)
261 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
262 #define CUR ((int) (*ctxt->input->cur))
263 #define NEXT xmlNextChar(ctxt)
265 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
266 #define NXT(val) ctxt->input->cur[(val)]
267 #define CUR_PTR ctxt->input->cur
271 if (*(ctxt->input->cur) == '\n') { \
272 ctxt->input->line++; ctxt->input->col = 1; \
273 } else ctxt->input->col++; \
274 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
280 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
283 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
284 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
292 * @ctxt: the HTML parser context
305 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
306 if (ctxt->instate == XML_PARSER_EOF)
309 if (ctxt->token != 0) {
311 return(ctxt->token);
313 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
325 const unsigned char *cur = ctxt->input->cur;
332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
338 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
343 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
367 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
374 return((int) *ctxt->input->cur);
383 if ((int) *ctxt->input->cur < 0x80)
384 return((int) *ctxt->input->cur);
389 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
390 ctxt->charset = XML_CHAR_ENCODING_UTF8;
391 return(xmlCurrentChar(ctxt, len));
404 if (ctxt->input->end - ctxt->input->cur >= 4) {
406 ctxt->input->cur[0], ctxt->input->cur[1],
407 ctxt->input->cur[2], ctxt->input->cur[3]);
409 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
411 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
416 ctxt->charset = XML_CHAR_ENCODING_8859_1;
418 return((int) *ctxt->input->cur);
423 * @ctxt: the HTML parser context
431 htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
434 while (IS_BLANK_CH(*(ctxt->input->cur))) {
435 if ((*ctxt->input->cur == 0) &&
436 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
437 xmlPopInput(ctxt);
439 if (*(ctxt->input->cur) == '\n') {
440 ctxt->input->line++; ctxt->input->col = 1;
441 } else ctxt->input->col++;
442 ctxt->input->cur++;
443 ctxt->nbChars++;
444 if (*ctxt->input->cur == 0)
445 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1145 * @ctxt: an HTML parser context
1152 htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1159 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1161 if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1169 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1175 while (!xmlStrEqual(newtag, ctxt->name)) {
1176 info = htmlTagLookup(ctxt->name);
1178 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1180 newtag, ctxt->name);
1182 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1183 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1184 htmlnamePop(ctxt);
1190 * @ctxt: an HTML parser context
1195 htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1199 if (ctxt->nameNr == 0)
1201 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1202 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1203 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1204 htmlnamePop(ctxt);
1210 * @ctxt: an HTML parser context
1221 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1223 while ((newtag != NULL) && (ctxt->name != NULL) &&
1224 (htmlCheckAutoClose(newtag, ctxt->name))) {
1225 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1226 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1227 htmlnamePop(ctxt);
1230 htmlAutoCloseOnEnd(ctxt);
1233 while ((newtag == NULL) && (ctxt->name != NULL) &&
1234 ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
1235 (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
1236 (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
1237 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1238 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1239 htmlnamePop(ctxt);
1297 * @ctxt: an HTML parser context
1305 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1310 if (ctxt->nameNr <= 0) {
1311 htmlnamePush(ctxt, BAD_CAST"html");
1312 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1313 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1317 if ((ctxt->nameNr <= 1) &&
1328 htmlnamePush(ctxt, BAD_CAST"head");
1329 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1330 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1335 for (i = 0;i < ctxt->nameNr;i++) {
1336 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1339 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1344 htmlnamePush(ctxt, BAD_CAST"body");
1345 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1346 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1352 * @ctxt: an HTML parser context
1362 htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1366 if (ctxt == NULL)
1368 tag = ctxt->name;
1370 htmlAutoClose(ctxt, BAD_CAST"p");
1371 htmlCheckImplied(ctxt, BAD_CAST"p");
1372 htmlnamePush(ctxt, BAD_CAST"p");
1373 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1374 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1381 htmlAutoClose(ctxt, BAD_CAST"p");
1382 htmlCheckImplied(ctxt, BAD_CAST"p");
1383 htmlnamePush(ctxt, BAD_CAST"p");
1384 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1385 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1724 htmlErrMemory(ctxt, "growing buffer\n"); \
1989 * @ctxt: an HTML parser context
1995 htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2000 htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2042 * @ctxt: an HTML parser context
2051 static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2062 if (ctxt->name == NULL)
2064 if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2066 if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2070 if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2071 dtd = xmlGetIntSubset(ctxt->myDoc);
2079 if (ctxt->node == NULL) return(0);
2080 lastChild = xmlGetLastChild(ctxt->node);
2084 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2085 (ctxt->node->content != NULL)) return(0);
2089 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2186 static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2190 * @ctxt: an HTML parser context
2199 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2216 return(xmlDictLookup(ctxt->dict, loc, i));
2222 * @ctxt: an HTML parser context
2232 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2247 return(xmlDictLookup(ctxt->dict, loc, i));
2253 * @ctxt: an HTML parser context
2261 htmlParseName(htmlParserCtxtPtr ctxt) {
2271 in = ctxt->input->cur;
2283 count = in - ctxt->input->cur;
2284 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2285 ctxt->input->cur = in;
2286 ctxt->nbChars += count;
2287 ctxt->input->col += count;
2291 return(htmlParseNameComplex(ctxt));
2295 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2325 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2331 * @ctxt: an HTML parser context
2341 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2355 htmlErrMemory(ctxt, "buffer allocation failed\n");
2371 c = htmlParseCharRef(ctxt);
2392 ent = htmlParseEntityRef(ctxt, &name);
2470 * @ctxt: an HTML parser context
2481 htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2486 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
2490 name = htmlParseName(ctxt);
2492 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2507 htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2520 * @ctxt: an HTML parser context
2525 * asked for ctxt->replaceEntities != 0
2531 htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2536 ret = htmlParseHTMLAttribute(ctxt, '"');
2538 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2544 ret = htmlParseHTMLAttribute(ctxt, '\'');
2546 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2554 ret = htmlParseHTMLAttribute(ctxt, 0);
2556 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2565 * @ctxt: an HTML parser context
2575 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2585 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2597 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2604 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2613 * @ctxt: an HTML parser context
2623 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2634 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2646 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2653 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2662 * @ctxt: an HTML parser context
2682 htmlParseScript(htmlParserCtxtPtr ctxt) {
2702 if (ctxt->recovery) {
2703 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
2704 xmlStrlen(ctxt->name)) == 0)
2708 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
2710 ctxt->name, NULL);
2722 if (ctxt->sax->cdataBlock!= NULL) {
2726 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2727 } else if (ctxt->sax->characters != NULL) {
2728 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2737 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
2738 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2743 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2744 if (ctxt->sax->cdataBlock!= NULL) {
2748 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2749 } else if (ctxt->sax->characters != NULL) {
2750 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2758 * @ctxt: an HTML parser context
2767 htmlParseCharData(htmlParserCtxtPtr ctxt) {
2775 while (((cur != '<') || (ctxt->token == '<')) &&
2776 ((cur != '&') || (ctxt->token == '&')) &&
2779 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2788 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2789 if (areBlanks(ctxt, buf, nbchar)) {
2790 if (ctxt->sax->ignorableWhitespace != NULL)
2791 ctxt->sax->ignorableWhitespace(ctxt->userData,
2794 htmlCheckParagraph(ctxt);
2795 if (ctxt->sax->characters != NULL)
2796 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2821 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2822 if (areBlanks(ctxt, buf, nbchar)) {
2823 if (ctxt->sax->ignorableWhitespace != NULL)
2824 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2826 htmlCheckParagraph(ctxt);
2827 if (ctxt->sax->characters != NULL)
2828 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2836 ctxt->instate = XML_PARSER_EOF;
2842 * @ctxt: an HTML parser context
2858 htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
2866 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2870 URI = htmlParseSystemLiteral(ctxt);
2872 htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
2880 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2884 *publicID = htmlParsePubidLiteral(ctxt);
2886 htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
2892 URI = htmlParseSystemLiteral(ctxt);
2900 * @ctxt: an XML parser context
2907 htmlParsePI(htmlParserCtxtPtr ctxt) {
2917 state = ctxt->instate;
2918 ctxt->instate = XML_PARSER_PI;
2929 target = htmlParseName(ctxt);
2937 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2938 ctxt->sax->processingInstruction != NULL))
2939 ctxt->sax->processingInstruction(ctxt->userData,
2941 ctxt->instate = state;
2946 htmlErrMemory(ctxt, NULL);
2947 ctxt->instate = state;
2952 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2964 htmlErrMemory(ctxt, NULL);
2966 ctxt->instate = state;
2987 htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
2995 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2996 (ctxt->sax->processingInstruction != NULL))
2997 ctxt->sax->processingInstruction(ctxt->userData,
3002 htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
3005 ctxt->instate = state;
3011 * @ctxt: an HTML parser context
3018 htmlParseComment(htmlParserCtxtPtr ctxt) {
3033 state = ctxt->instate;
3034 ctxt->instate = XML_PARSER_COMMENT;
3039 htmlErrMemory(ctxt, "buffer allocation failed\n");
3040 ctxt->instate = state;
3059 htmlErrMemory(ctxt, "growing buffer failed\n");
3060 ctxt->instate = state;
3080 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3085 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3086 (!ctxt->disableSAX))
3087 ctxt->sax->comment(ctxt->userData, buf);
3090 ctxt->instate = state;
3095 * @ctxt: an HTML parser context
3105 htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3108 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3109 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3125 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3140 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3150 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3159 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3169 * @ctxt: an HTML parser context
3178 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3193 name = htmlParseName(ctxt);
3195 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3208 URI = htmlParseExternalID(ctxt, &ExternalID);
3215 htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
3224 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3225 (!ctxt->disableSAX))
3226 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3237 * @ctxt: an HTML parser context
3257 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3262 name = htmlParseHTMLName(ctxt);
3264 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3276 val = htmlParseAttValue(ctxt);
3290 * @ctxt: an HTML parser context
3299 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3302 if ((ctxt == NULL) || (attvalue == NULL))
3306 if (ctxt->input->encoding != NULL)
3323 if (ctxt->input->encoding != NULL)
3324 xmlFree((xmlChar *) ctxt->input->encoding);
3325 ctxt->input->encoding = xmlStrdup(encoding);
3336 (ctxt->input->buf != NULL) &&
3337 (ctxt->input->buf->encoder == NULL)) {
3338 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3342 xmlSwitchEncoding(ctxt, enc);
3344 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3351 xmlSwitchToEncoding(ctxt, handler);
3352 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3354 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
3358 if ((ctxt->input->buf != NULL) &&
3359 (ctxt->input->buf->encoder != NULL) &&
3360 (ctxt->input->buf->raw != NULL) &&
3361 (ctxt->input->buf->buffer != NULL)) {
3368 processed = ctxt->input->cur - ctxt->input->base;
3369 xmlBufferShrink(ctxt->input->buf->buffer, processed);
3370 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
3371 ctxt->input->buf->buffer,
3372 ctxt->input->buf->raw);
3374 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3378 ctxt->input->base =
3379 ctxt->input->cur = ctxt->input->buf->buffer->content;
3386 * @ctxt: an HTML parser context
3392 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3398 if ((ctxt == NULL) || (atts == NULL))
3413 htmlCheckEncoding(ctxt, content);
3419 * @ctxt: an HTML parser context
3438 htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3449 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3450 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3457 atts = ctxt->atts;
3458 maxatts = ctxt->maxatts;
3461 name = htmlParseHTMLName(ctxt);
3463 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3477 htmlAutoClose(ctxt, name);
3482 htmlCheckImplied(ctxt, name);
3488 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
3489 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3493 ctxt->depth++;
3495 if ((ctxt->nameNr != 1) &&
3497 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3501 ctxt->depth++;
3505 for (indx = 0;indx < ctxt->nameNr;indx++) {
3506 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
3507 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3511 ctxt->depth++;
3525 long cons = ctxt->nbChars;
3528 attname = htmlParseAttribute(ctxt, &attvalue);
3536 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
3552 htmlErrMemory(ctxt, NULL);
3557 ctxt->atts = atts;
3558 ctxt->maxatts = maxatts;
3566 htmlErrMemory(ctxt, NULL);
3572 ctxt->atts = atts;
3573 ctxt->maxatts = maxatts;
3593 if (cons == ctxt->nbChars) {
3594 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3605 htmlCheckMeta(ctxt, atts);
3611 htmlnamePush(ctxt, name);
3612 ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
3614 ctxt->sax->startElement(ctxt->userData, name, atts);
3616 ctxt->sax->startElement(ctxt->userData, name, NULL);
3632 * @ctxt: an HTML parser context
3646 htmlParseEndTag(htmlParserCtxtPtr ctxt)
3653 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
3659 name = htmlParseHTMLName(ctxt);
3667 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
3669 if (ctxt->recovery) {
3685 if ((ctxt->depth > 0) &&
3689 ctxt->depth--;
3697 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
3698 if (xmlStrEqual(name, ctxt->nameTab[i]))
3702 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3712 htmlAutoCloseOnClose(ctxt, name);
3719 if (!xmlStrEqual(name, ctxt->name)) {
3720 if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
3721 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3723 name, ctxt->name);
3730 oldname = ctxt->name;
3732 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3733 ctxt->sax->endElement(ctxt->userData, name);
3734 htmlnamePop(ctxt);
3746 * @ctxt: an HTML parser context
3753 htmlParseReference(htmlParserCtxtPtr ctxt) {
3763 c = htmlParseCharRef(ctxt);
3777 htmlCheckParagraph(ctxt);
3778 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3779 ctxt->sax->characters(ctxt->userData, out, i);
3781 ent = htmlParseEntityRef(ctxt, &name);
3783 htmlCheckParagraph(ctxt);
3784 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3785 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3789 htmlCheckParagraph(ctxt);
3790 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
3791 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3792 ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
3793 /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
3814 htmlCheckParagraph(ctxt);
3815 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3816 ctxt->sax->characters(ctxt->userData, out, i);
3823 * @ctxt: an HTML parser context
3829 htmlParseContent(htmlParserCtxtPtr ctxt) {
3834 currentNode = xmlStrdup(ctxt->name);
3835 depth = ctxt->nameNr;
3837 long cons = ctxt->nbChars;
3844 if (htmlParseEndTag(ctxt) &&
3845 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
3856 name = htmlParseHTMLName_nonInvasive(ctxt);
3858 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3870 if (ctxt->name != NULL) {
3871 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
3872 htmlAutoClose(ctxt, name);
3882 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
3883 (!xmlStrEqual(currentNode, ctxt->name)))
3894 htmlParseScript(ctxt);
3904 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3907 htmlParseDocTypeDecl(ctxt);
3915 htmlParseComment(ctxt);
3922 htmlParsePI(ctxt);
3929 htmlParseElement(ctxt);
3937 htmlParseReference(ctxt);
3944 htmlAutoCloseOnEnd(ctxt);
3952 htmlParseCharData(ctxt);
3955 if (cons == ctxt->nbChars) {
3956 if (ctxt->node != NULL) {
3957 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3971 * @ctxt: an HTML parser context
3977 __htmlParseContent(void *ctxt) {
3978 if (ctxt != NULL)
3979 htmlParseContent((htmlParserCtxtPtr) ctxt);
3984 * @ctxt: an HTML parser context
3994 htmlParseElement(htmlParserCtxtPtr ctxt) {
4003 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4004 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4009 if (ctxt->record_info) {
4010 node_info.begin_pos = ctxt->input->consumed +
4011 (CUR_PTR - ctxt->input->base);
4012 node_info.begin_line = ctxt->input->line;
4015 failed = htmlParseStartTag(ctxt);
4016 name = ctxt->name;
4028 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4037 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4038 ctxt->sax->endElement(ctxt->userData, name);
4039 htmlnamePop(ctxt);
4046 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4052 if (xmlStrEqual(name, ctxt->name)) {
4053 nodePop(ctxt);
4054 htmlnamePop(ctxt);
4060 if (ctxt->record_info) {
4061 node_info.end_pos = ctxt->input->consumed +
4062 (CUR_PTR - ctxt->input->base);
4063 node_info.end_line = ctxt->input->line;
4064 node_info.node = ctxt->node;
4065 xmlParserAddNodeInfo(ctxt, &node_info);
4074 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4075 ctxt->sax->endElement(ctxt->userData, name);
4076 htmlnamePop(ctxt);
4083 currentNode = xmlStrdup(ctxt->name);
4084 depth = ctxt->nameNr;
4086 oldptr = ctxt->input->cur;
4087 htmlParseContent(ctxt);
4088 if (oldptr==ctxt->input->cur) break;
4089 if (ctxt->nameNr < depth) break;
4095 if ( currentNode != NULL && ctxt->record_info ) {
4096 node_info.end_pos = ctxt->input->consumed +
4097 (CUR_PTR - ctxt->input->base);
4098 node_info.end_line = ctxt->input->line;
4099 node_info.node = ctxt->node;
4100 xmlParserAddNodeInfo(ctxt, &node_info);
4103 htmlAutoCloseOnEnd(ctxt);
4112 * @ctxt: an HTML parser context
4122 htmlParseDocument(htmlParserCtxtPtr ctxt) {
4131 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4132 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4136 ctxt->html = 1;
4141 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4142 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4144 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
4145 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
4157 xmlSwitchEncoding(ctxt, enc);
4166 htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
4170 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4171 ctxt->sax->startDocument(ctxt->userData);
4180 htmlParseComment(ctxt);
4181 htmlParsePI(ctxt);
4195 htmlParseDocTypeDecl(ctxt);
4205 htmlParseComment(ctxt);
4206 htmlParsePI(ctxt);
4213 htmlParseContent(ctxt);
4219 htmlAutoCloseOnEnd(ctxt);
4225 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4226 ctxt->sax->endDocument(ctxt->userData);
4228 if (ctxt->myDoc != NULL) {
4229 dtd = xmlGetIntSubset(ctxt->myDoc);
4231 ctxt->myDoc->intSubset =
4232 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4236 if (! ctxt->wellFormed) return(-1);
4249 * @ctxt: an HTML parser context
4257 htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
4261 if (ctxt == NULL) return(-1);
4262 memset(ctxt, 0, sizeof(htmlParserCtxt));
4264 ctxt->dict = xmlDictCreate();
4265 if (ctxt->dict == NULL) {
4278 ctxt->inputTab = (htmlParserInputPtr *)
4280 if (ctxt->inputTab == NULL) {
4282 ctxt->inputNr = 0;
4283 ctxt->inputMax = 0;
4284 ctxt->input = NULL;
4287 ctxt->inputNr = 0;
4288 ctxt->inputMax = 5;
4289 ctxt->input = NULL;
4290 ctxt->version = NULL;
4291 ctxt->encoding = NULL;
4292 ctxt->standalone = -1;
4293 ctxt->instate = XML_PARSER_START;
4296 ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
4297 if (ctxt->nodeTab == NULL) {
4299 ctxt->nodeNr = 0;
4300 ctxt->nodeMax = 0;
4301 ctxt->node = NULL;
4302 ctxt->inputNr = 0;
4303 ctxt->inputMax = 0;
4304 ctxt->input = NULL;
4307 ctxt->nodeNr = 0;
4308 ctxt->nodeMax = 10;
4309 ctxt->node = NULL;
4312 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
4313 if (ctxt->nameTab == NULL) {
4315 ctxt->nameNr = 0;
4316 ctxt->nameMax = 10;
4317 ctxt->name = NULL;
4318 ctxt->nodeNr = 0;
4319 ctxt->nodeMax = 0;
4320 ctxt->node = NULL;
4321 ctxt->inputNr = 0;
4322 ctxt->inputMax = 0;
4323 ctxt->input = NULL;
4326 ctxt->nameNr = 0;
4327 ctxt->nameMax = 10;
4328 ctxt->name = NULL;
4330 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
4332 ctxt->sax = sax;
4335 ctxt->userData = ctxt;
4336 ctxt->myDoc = NULL;
4337 ctxt->wellFormed = 1;
4338 ctxt->replaceEntities = 0;
4339 ctxt->linenumbers = xmlLineNumbersDefaultValue;
4340 ctxt->html = 1;
4341 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
4342 ctxt->vctxt.userData = ctxt;
4343 ctxt->vctxt.error = xmlParserValidityError;
4344 ctxt->vctxt.warning = xmlParserValidityWarning;
4345 ctxt->record_info = 0;
4346 ctxt->validate = 0;
4347 ctxt->nbChars = 0;
4348 ctxt->checkIndex = 0;
4349 ctxt->catalogs = NULL;
4350 xmlInitNodeInfoSeq(&ctxt->node_seq);
4356 * @ctxt: an HTML parser context
4359 * document in ctxt->myDoc is not freed.
4363 htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
4365 xmlFreeParserCtxt(ctxt);
4379 xmlParserCtxtPtr ctxt;
4381 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
4382 if (ctxt == NULL) {
4386 memset(ctxt, 0, sizeof(xmlParserCtxt));
4387 if (htmlInitParserCtxt(ctxt) < 0) {
4388 htmlFreeParserCtxt(ctxt);
4391 return(ctxt);
4405 xmlParserCtxtPtr ctxt;
4414 ctxt = htmlNewParserCtxt();
4415 if (ctxt == NULL)
4421 input = xmlNewInputStream(ctxt);
4423 xmlFreeParserCtxt(ctxt);
4433 inputPush(ctxt, input);
4434 return(ctxt);
4451 htmlParserCtxtPtr ctxt;
4456 ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
4457 if (ctxt == NULL)
4464 if (ctxt->input->encoding != NULL)
4465 xmlFree((xmlChar *) ctxt->input->encoding);
4466 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
4473 xmlSwitchEncoding(ctxt, enc);
4474 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4475 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4485 xmlSwitchToEncoding(ctxt, handler);
4487 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4493 return(ctxt);
4505 * @ctxt: an HTML parser context
4513 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
4522 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
4529 in = ctxt->input;
4533 if (ctxt->checkIndex > base)
4534 base = ctxt->checkIndex;
4571 ctxt->checkIndex = 0;
4589 ctxt->checkIndex = base;
4606 * @ctxt: an HTML parser context
4614 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
4621 switch (ctxt->instate) {
4675 in = ctxt->input;
4682 htmlAutoCloseOnEnd(ctxt);
4683 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
4687 ctxt->instate = XML_PARSER_EOF;
4688 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4689 ctxt->sax->endDocument(ctxt->userData);
4700 switch (ctxt->instate) {
4718 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4719 ctxt->sax->setDocumentLocator(ctxt->userData,
4721 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
4722 (!ctxt->disableSAX))
4723 ctxt->sax->startDocument(ctxt->userData);
4733 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4739 htmlParseDocTypeDecl(ctxt);
4740 ctxt->instate = XML_PARSER_PROLOG;
4746 ctxt->instate = XML_PARSER_MISC;
4766 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4772 htmlParseComment(ctxt);
4773 ctxt->instate = XML_PARSER_MISC;
4776 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4782 htmlParsePI(ctxt);
4783 ctxt->instate = XML_PARSER_MISC;
4790 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4796 htmlParseDocTypeDecl(ctxt);
4797 ctxt->instate = XML_PARSER_PROLOG;
4806 ctxt->instate = XML_PARSER_START_TAG;
4826 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4832 htmlParseComment(ctxt);
4833 ctxt->instate = XML_PARSER_PROLOG;
4836 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4842 htmlParsePI(ctxt);
4843 ctxt->instate = XML_PARSER_PROLOG;
4848 ctxt->instate = XML_PARSER_START_TAG;
4864 htmlParseCharData(ctxt);
4873 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4879 htmlParseComment(ctxt);
4880 ctxt->instate = XML_PARSER_EPILOG;
4883 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4889 htmlParsePI(ctxt);
4890 ctxt->instate = XML_PARSER_EPILOG;
4895 ctxt->errNo = XML_ERR_DOCUMENT_END;
4896 ctxt->wellFormed = 0;
4897 ctxt->instate = XML_PARSER_EOF;
4902 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4903 ctxt->sax->endDocument(ctxt->userData);
4916 ctxt->instate = XML_PARSER_CONTENT;
4924 ctxt->instate = XML_PARSER_END_TAG;
4925 ctxt->checkIndex = 0;
4933 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4936 failed = htmlParseStartTag(ctxt);
4937 name = ctxt->name;
4950 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4959 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4960 ctxt->sax->endElement(ctxt->userData, name);
4961 htmlnamePop(ctxt);
4962 ctxt->instate = XML_PARSER_CONTENT;
4973 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4980 if (xmlStrEqual(name, ctxt->name)) {
4981 nodePop(ctxt);
4982 htmlnamePop(ctxt);
4985 ctxt->instate = XML_PARSER_CONTENT;
4997 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4998 ctxt->sax->endElement(ctxt->userData, name);
4999 htmlnamePop(ctxt);
5001 ctxt->instate = XML_PARSER_CONTENT;
5013 if (ctxt->token != 0) {
5016 chr[0] = (xmlChar) ctxt->token;
5017 htmlCheckParagraph(ctxt);
5018 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5019 ctxt->sax->characters(ctxt->userData, chr, 1);
5020 ctxt->token = 0;
5021 ctxt->checkIndex = 0;
5026 if (ctxt->sax != NULL) {
5028 if (ctxt->sax->ignorableWhitespace != NULL)
5029 ctxt->sax->ignorableWhitespace(
5030 ctxt->userData, &cur, 1);
5032 htmlCheckParagraph(ctxt);
5033 if (ctxt->sax->characters != NULL)
5034 ctxt->sax->characters(
5035 ctxt->userData, &cur, 1);
5038 ctxt->token = 0;
5039 ctxt->checkIndex = 0;
5048 cons = ctxt->nbChars;
5049 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
5050 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
5058 idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0);
5065 htmlParseScript(ctxt);
5067 ctxt->instate = XML_PARSER_END_TAG;
5068 ctxt->checkIndex = 0;
5085 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5087 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
5090 htmlParseDocTypeDecl(ctxt);
5095 ctxt, '-', '-', '>', 1) < 0))
5101 htmlParseComment(ctxt);
5102 ctxt->instate = XML_PARSER_CONTENT;
5105 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5111 htmlParsePI(ctxt);
5112 ctxt->instate = XML_PARSER_CONTENT;
5116 ctxt->instate = XML_PARSER_END_TAG;
5117 ctxt->checkIndex = 0;
5124 ctxt->instate = XML_PARSER_START_TAG;
5125 ctxt->checkIndex = 0;
5133 (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0))
5140 htmlParseReference(ctxt);
5149 (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
5151 ctxt->checkIndex = 0;
5156 htmlParseCharData(ctxt);
5159 if (cons == ctxt->nbChars) {
5160 if (ctxt->node != NULL) {
5161 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5175 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5177 htmlParseEndTag(ctxt);
5178 if (ctxt->nameNr == 0) {
5179 ctxt->instate = XML_PARSER_EPILOG;
5181 ctxt->instate = XML_PARSER_CONTENT;
5183 ctxt->checkIndex = 0;
5190 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5193 ctxt->instate = XML_PARSER_CONTENT;
5194 ctxt->checkIndex = 0;
5201 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5204 ctxt->instate = XML_PARSER_CONTENT;
5205 ctxt->checkIndex = 0;
5212 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5215 ctxt->instate = XML_PARSER_CONTENT;
5216 ctxt->checkIndex = 0;
5223 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5226 ctxt->instate = XML_PARSER_CONTENT;
5227 ctxt->checkIndex = 0;
5234 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5237 ctxt->instate = XML_PARSER_CONTENT;
5238 ctxt->checkIndex = 0;
5245 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5248 ctxt->instate = XML_PARSER_CONTENT;
5249 ctxt->checkIndex = 0;
5256 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5259 ctxt->instate = XML_PARSER_START_TAG;
5260 ctxt->checkIndex = 0;
5267 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5270 ctxt->instate = XML_PARSER_CONTENT;
5271 ctxt->checkIndex = 0;
5278 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5281 ctxt->instate = XML_PARSER_CONTENT;
5282 ctxt->checkIndex = 0;
5289 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5292 ctxt->instate = XML_PARSER_CONTENT;
5293 ctxt->checkIndex = 0;
5304 htmlAutoCloseOnEnd(ctxt);
5305 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5309 ctxt->instate = XML_PARSER_EOF;
5310 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5311 ctxt->sax->endDocument(ctxt->userData);
5314 if ((ctxt->myDoc != NULL) &&
5315 ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5316 (ctxt->instate == XML_PARSER_EPILOG))) {
5318 dtd = xmlGetIntSubset(ctxt->myDoc);
5320 ctxt->myDoc->intSubset =
5321 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5333 * @ctxt: an HTML parser context
5343 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
5345 if ((ctxt == NULL) || (ctxt->input == NULL)) {
5346 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5350 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5351 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
5352 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5353 int cur = ctxt->input->cur - ctxt->input->base;
5356 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5358 ctxt->errNo = XML_PARSER_EOF;
5359 ctxt->disableSAX = 1;
5362 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5363 ctxt->input->cur = ctxt->input->base + cur;
5364 ctxt->input->end =
5365 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5371 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5372 htmlParseTryOrFinish(ctxt, terminate);
5374 } else if (ctxt->instate != XML_PARSER_EOF) {
5375 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
5376 xmlParserInputBufferPtr in = ctxt->input->buf;
5383 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5390 htmlParseTryOrFinish(ctxt, terminate);
5392 if ((ctxt->instate != XML_PARSER_EOF) &&
5393 (ctxt->instate != XML_PARSER_EPILOG) &&
5394 (ctxt->instate != XML_PARSER_MISC)) {
5395 ctxt->errNo = XML_ERR_DOCUMENT_END;
5396 ctxt->wellFormed = 0;
5398 if (ctxt->instate != XML_PARSER_EOF) {
5399 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5400 ctxt->sax->endDocument(ctxt->userData);
5402 ctxt->instate = XML_PARSER_EOF;
5404 return((xmlParserErrors) ctxt->errNo);
5432 htmlParserCtxtPtr ctxt;
5441 ctxt = htmlNewParserCtxt();
5442 if (ctxt == NULL) {
5447 ctxt->charset=XML_CHAR_ENCODING_UTF8;
5449 if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
5450 xmlFree(ctxt->sax);
5451 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
5452 if (ctxt->sax == NULL) {
5454 xmlFree(ctxt);
5457 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
5459 ctxt->userData = user_data;
5462 ctxt->directory = NULL;
5464 ctxt->directory = xmlParserGetDirectory(filename);
5467 inputStream = htmlNewInputStream(ctxt);
5469 xmlFreeParserCtxt(ctxt);
5485 inputPush(ctxt, inputStream);
5487 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5488 (ctxt->input->buf != NULL)) {
5489 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5490 int cur = ctxt->input->cur - ctxt->input->base;
5492 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5494 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5495 ctxt->input->cur = ctxt->input->base + cur;
5496 ctxt->input->end =
5497 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5502 ctxt->progressive = 1;
5504 return(ctxt);
5526 htmlParserCtxtPtr ctxt;
5533 ctxt = htmlCreateDocParserCtxt(cur, encoding);
5534 if (ctxt == NULL) return(NULL);
5536 if (ctxt->sax != NULL) xmlFree (ctxt->sax);
5537 ctxt->sax = sax;
5538 ctxt->userData = userData;
5541 htmlParseDocument(ctxt);
5542 ret = ctxt->myDoc;
5544 ctxt->sax = NULL;
5545 ctxt->userData = NULL;
5547 htmlFreeParserCtxt(ctxt);
5582 htmlParserCtxtPtr ctxt;
5591 ctxt = htmlNewParserCtxt();
5592 if (ctxt == NULL) {
5602 xmlFreeParserCtxt(ctxt);
5606 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
5609 xmlFreeParserCtxt(ctxt);
5613 inputPush(ctxt, inputStream);
5621 htmlCheckEncoding (ctxt, content);
5626 return(ctxt);
5649 htmlParserCtxtPtr ctxt;
5654 ctxt = htmlCreateFileParserCtxt(filename, encoding);
5655 if (ctxt == NULL) return(NULL);
5657 oldsax = ctxt->sax;
5658 ctxt->sax = sax;
5659 ctxt->userData = userData;
5662 htmlParseDocument(ctxt);
5664 ret = ctxt->myDoc;
5666 ctxt->sax = oldsax;
5667 ctxt->userData = NULL;
5669 htmlFreeParserCtxt(ctxt);
5838 * @ctxt: an HTML parser context
5843 htmlCtxtReset(htmlParserCtxtPtr ctxt)
5848 if (ctxt == NULL)
5852 dict = ctxt->dict;
5854 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
5857 ctxt->inputNr = 0;
5858 ctxt->input = NULL;
5860 ctxt->spaceNr = 0;
5861 if (ctxt->spaceTab != NULL) {
5862 ctxt->spaceTab[0] = -1;
5863 ctxt->space = &ctxt->spaceTab[0];
5865 ctxt->space = NULL;
5869 ctxt->nodeNr = 0;
5870 ctxt->node = NULL;
5872 ctxt->nameNr = 0;
5873 ctxt->name = NULL;
5875 DICT_FREE(ctxt->version);
5876 ctxt->version = NULL;
5877 DICT_FREE(ctxt->encoding);
5878 ctxt->encoding = NULL;
5879 DICT_FREE(ctxt->directory);
5880 ctxt->directory = NULL;
5881 DICT_FREE(ctxt->extSubURI);
5882 ctxt->extSubURI = NULL;
5883 DICT_FREE(ctxt->extSubSystem);
5884 ctxt->extSubSystem = NULL;
5885 if (ctxt->myDoc != NULL)
5886 xmlFreeDoc(ctxt->myDoc);
5887 ctxt->myDoc = NULL;
5889 ctxt->standalone = -1;
5890 ctxt->hasExternalSubset = 0;
5891 ctxt->hasPErefs = 0;
5892 ctxt->html = 1;
5893 ctxt->external = 0;
5894 ctxt->instate = XML_PARSER_START;
5895 ctxt->token = 0;
5897 ctxt->wellFormed = 1;
5898 ctxt->nsWellFormed = 1;
5899 ctxt->valid = 1;
5900 ctxt->vctxt.userData = ctxt;
5901 ctxt->vctxt.error = xmlParserValidityError;
5902 ctxt->vctxt.warning = xmlParserValidityWarning;
5903 ctxt->record_info = 0;
5904 ctxt->nbChars = 0;
5905 ctxt->checkIndex = 0;
5906 ctxt->inSubset = 0;
5907 ctxt->errNo = XML_ERR_OK;
5908 ctxt->depth = 0;
5909 ctxt->charset = XML_CHAR_ENCODING_NONE;
5910 ctxt->catalogs = NULL;
5911 xmlInitNodeInfoSeq(&ctxt->node_seq);
5913 if (ctxt->attsDefault != NULL) {
5914 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
5915 ctxt->attsDefault = NULL;
5917 if (ctxt->attsSpecial != NULL) {
5918 xmlHashFree(ctxt->attsSpecial, NULL);
5919 ctxt->attsSpecial = NULL;
5925 * @ctxt: an HTML parser context
5934 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
5936 if (ctxt == NULL)
5940 ctxt->sax->warning = NULL;
5941 ctxt->vctxt.warning = NULL;
5943 ctxt->options |= XML_PARSE_NOWARNING;
5946 ctxt->sax->error = NULL;
5947 ctxt->vctxt.error = NULL;
5948 ctxt->sax->fatalError = NULL;
5950 ctxt->options |= XML_PARSE_NOERROR;
5953 ctxt->pedantic = 1;
5955 ctxt->options |= XML_PARSE_PEDANTIC;
5957 ctxt->pedantic = 0;
5959 ctxt->keepBlanks = 0;
5960 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
5962 ctxt->options |= XML_PARSE_NOBLANKS;
5964 ctxt->keepBlanks = 1;
5966 ctxt->recovery = 1;
5969 ctxt->recovery = 0;
5971 ctxt->options |= HTML_PARSE_COMPACT;
5974 ctxt->dictNames = 0;
5980 * @ctxt: an HTML parser context
5991 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
5996 htmlCtxtUseOptions(ctxt, options);
5997 ctxt->html = 1;
6003 xmlSwitchToEncoding(ctxt, hdlr);
6004 if (ctxt->input->encoding != NULL)
6005 xmlFree((xmlChar *) ctxt->input->encoding);
6006 ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);
6009 if ((URL != NULL) && (ctxt->input != NULL) &&
6010 (ctxt->input->filename == NULL))
6011 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
6012 htmlParseDocument(ctxt);
6013 ret = ctxt->myDoc;
6014 ctxt->myDoc = NULL;
6016 if ((ctxt->dictNames) &&
6018 (ret->dict == ctxt->dict))
6019 ctxt->dict = NULL;
6020 xmlFreeParserCtxt(ctxt);
6039 htmlParserCtxtPtr ctxt;
6045 ctxt = htmlCreateDocParserCtxt(cur, NULL);
6046 if (ctxt == NULL)
6048 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6064 htmlParserCtxtPtr ctxt;
6067 ctxt = htmlCreateFileParserCtxt(filename, encoding);
6068 if (ctxt == NULL)
6070 return (htmlDoRead(ctxt, NULL, NULL, options, 0));
6088 htmlParserCtxtPtr ctxt;
6091 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6092 if (ctxt == NULL)
6095 if (ctxt->sax != NULL)
6096 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
6097 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6114 htmlParserCtxtPtr ctxt;
6125 ctxt = xmlNewParserCtxt();
6126 if (ctxt == NULL) {
6130 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6133 xmlFreeParserCtxt(ctxt);
6136 inputPush(ctxt, stream);
6137 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6157 htmlParserCtxtPtr ctxt;
6169 ctxt = htmlNewParserCtxt();
6170 if (ctxt == NULL) {
6174 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6177 xmlFreeParserCtxt(ctxt);
6180 inputPush(ctxt, stream);
6181 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6186 * @ctxt: an HTML parser context
6193 * This reuses the existing @ctxt parser context
6198 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
6205 if (ctxt == NULL)
6208 htmlCtxtReset(ctxt);
6210 stream = xmlNewStringInputStream(ctxt, cur);
6214 inputPush(ctxt, stream);
6215 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6220 * @ctxt: an HTML parser context
6226 * This reuses the existing @ctxt parser context
6231 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
6238 if (ctxt == NULL)
6241 htmlCtxtReset(ctxt);
6243 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
6247 inputPush(ctxt, stream);
6248 return (htmlDoRead(ctxt, NULL, encoding, options, 1));
6253 * @ctxt: an HTML parser context
6261 * This reuses the existing @ctxt parser context
6266 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
6272 if (ctxt == NULL)
6277 htmlCtxtReset(ctxt);
6284 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6290 inputPush(ctxt, stream);
6291 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6296 * @ctxt: an HTML parser context
6303 * This reuses the existing @ctxt parser context
6308 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
6316 if (ctxt == NULL)
6319 htmlCtxtReset(ctxt);
6325 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6330 inputPush(ctxt, stream);
6331 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6336 * @ctxt: an HTML parser context
6345 * This reuses the existing @ctxt parser context
6350 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
6360 if (ctxt == NULL)
6363 htmlCtxtReset(ctxt);
6369 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6374 inputPush(ctxt, stream);
6375 return (htmlDoRead(ctxt, URL, encoding, options, 1));