Home | History | Annotate | Download | only in tests
      1 from __future__ import absolute_import, division, unicode_literals
      2 
      3 import os
      4 import sys
      5 import traceback
      6 import warnings
      7 import re
      8 
      9 warnings.simplefilter("error")
     10 
     11 from .support import get_data_files
     12 from .support import TestData, convert, convertExpected, treeTypes
     13 from html5lib import html5parser, constants
     14 
     15 # Run the parse error checks
     16 checkParseErrors = False
     17 
     18 # XXX - There should just be one function here but for some reason the testcase
     19 # format differs from the treedump format by a single space character
     20 
     21 
     22 def convertTreeDump(data):
     23     return "\n".join(convert(3)(data).split("\n")[1:])
     24 
     25 namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
     26 
     27 
     28 def runParserTest(innerHTML, input, expected, errors, treeClass,
     29                   namespaceHTMLElements):
     30     with warnings.catch_warnings(record=True) as caughtWarnings:
     31         warnings.simplefilter("always")
     32         p = html5parser.HTMLParser(tree=treeClass,
     33                                    namespaceHTMLElements=namespaceHTMLElements)
     34 
     35         try:
     36             if innerHTML:
     37                 document = p.parseFragment(input, innerHTML)
     38             else:
     39                 document = p.parse(input)
     40         except:
     41             errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
     42                                   "\nTraceback:", traceback.format_exc()])
     43             assert False, errorMsg
     44 
     45     otherWarnings = [x for x in caughtWarnings
     46                      if not issubclass(x.category, constants.DataLossWarning)]
     47     assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings]
     48     if len(caughtWarnings):
     49         return
     50 
     51     output = convertTreeDump(p.tree.testSerializer(document))
     52 
     53     expected = convertExpected(expected)
     54     if namespaceHTMLElements:
     55         expected = namespaceExpected(r"\1<html \2>", expected)
     56 
     57     errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
     58                           "\nReceived:", output])
     59     assert expected == output, errorMsg
     60 
     61     errStr = []
     62     for (line, col), errorcode, datavars in p.errors:
     63         assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
     64         errStr.append("Line: %i Col: %i %s" % (line, col,
     65                                                constants.E[errorcode] % datavars))
     66 
     67     errorMsg2 = "\n".join(["\n\nInput:", input,
     68                            "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
     69                            "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
     70     if checkParseErrors:
     71             assert len(p.errors) == len(errors), errorMsg2
     72 
     73 
     74 def test_parser():
     75     sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
     76     files = get_data_files('tree-construction')
     77 
     78     for filename in files:
     79         testName = os.path.basename(filename).replace(".dat", "")
     80         if testName in ("template",):
     81             continue
     82 
     83         tests = TestData(filename, "data")
     84 
     85         for index, test in enumerate(tests):
     86             input, errors, innerHTML, expected = [test[key] for key in
     87                                                   ('data', 'errors',
     88                                                    'document-fragment',
     89                                                    'document')]
     90             if errors:
     91                 errors = errors.split("\n")
     92 
     93             for treeName, treeCls in treeTypes.items():
     94                 for namespaceHTMLElements in (True, False):
     95                     yield (runParserTest, innerHTML, input, expected, errors, treeCls,
     96                            namespaceHTMLElements)
     97