Home | History | Annotate | Download | only in empiricism
      1 <!doctype html>
      2 <html>
      3 <head>
      4 <title>HTML containment</title>
      5 <script>
      6 if (!Date.now) { Date.now = function () { return +new Date; }; }
      7 </script>
      8 <script src="html-containment.js"></script>
      9 <script>
     10 // Extract URL query parameters into options
     11 var opts = {
     12   // use a short list for quick iteration and debugging
     13   shortlist: false,
     14   rerun:     false
     15 };
     16 var cannedData;
     17 (function () {
     18   location.search.replace(
     19       /[?&]([^&=]*)(?:=(?:false|no|([^&]*))(?![^&]))?/ig,
     20       function (_, keyEncoded, valueEncoded) {
     21         var key   = decodeURIComponent(keyEncoded);
     22         var value = valueEncoded == null ? "true"
     23                   : decodeURIComponent(valueEncoded);
     24         opts[key] = value;
     25       });
     26 
     27   if (opts.rerun) {
     28     cannedData = newBlankObject();
     29   } else {
     30     document.write('<script src="canned-data.js"><\/script>');
     31   }
     32 })();
     33 </script>
     34 <script>
     35 // Includes both conforming and obsolete elements from
     36 // http://dev.w3.org/html5/html-author/#index-of-elements
     37 // It does not include foreign content.
     38 var elementNames =
     39   opts.shortlist
     40 ? [
     41   'a', 'font', 'form', 'frameset', 'h1', 'h2', 'iframe',
     42   'img', 'li', 'ol', 'plaintext', 'script', 'select', 'table', 'tbody',
     43   'textarea', 'td', 'tr', 'video', 'xmp'
     44 ]
     45 : [
     46   'a', 'abbr', 'acronym', 'address', 'applet', 'area', 'article', 'aside',
     47   'audio', 'b', 'base', 'basefont', 'bb', 'bdo', 'bgsound', 'big', 'blink',
     48   'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'center', 'cite',
     49   'code', 'col', 'colgroup', 'command', 'datagrid', 'datalist', 'dd', 'del',
     50   'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'embed',
     51   'fieldset', 'figure', 'font', 'footer', 'form', 'frame', 'frameset', 'h1',
     52   'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'i', 'iframe',
     53   'img', 'input', 'ins', 'isindex', 'kbd', 'label', 'legend', 'li', 'link',
     54   'listing', 'map', 'mark', 'marquee', 'menu', 'meta', 'meter', 'nav', 'nobr',
     55   'noembed', 'noframes', 'noscript', 'object', 'ol', 'optgroup', 'option',
     56   'output', 'p', 'param', 'plaintext', 'pre', 'progress', 'q', 'rp', 'rt',
     57   'ruby', 's', 'samp', 'script', 'section', 'select', 'small', 'source',
     58   'spacer', 'span', 'strike', 'strong', 'style', 'sub', 'sup', 'table',
     59   'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr',
     60   'tt', 'u', 'ul', 'var', 'video', 'wbr', 'xmp',
     61 
     62   'xcustom'
     63 ];
     64 </script>
     65 <style>
     66 pre.json  { white-space: pre-wrap }
     67 .json-kw          { color: #800 }
     68 .json-str         { color: #080 }
     69 .json-val         { color: #008 }
     70 .json-sep         { background: white }
     71 .json-ell         { color: blue }  /* ellipses are linky */
     72 
     73 /* Collapse inner blocks except on roll-over. */
     74                             .json-int { display: none }
     75 .json-ext.json-expanded   > .json-int,
     76 .json-ext.json-nocollapse > .json-int { display: inline }
     77 .json-ext.json-nocollapse > .json-ell { display: none }
     78 .json-ext.json-expanded   > .json-ell { color: transparent }
     79 
     80 #experiment-progress-counter:empty { display: none }
     81 #experiment-progress-counter {
     82   width: 25em;
     83   display: block;
     84   list-style-type: none;
     85   -webkit-padding-start: 0;
     86 }
     87 div #experiment-progress-counter:empty {
     88   border-width: 0px solid black;
     89   padding: 0 0 0 0;
     90 }
     91 div #experiment-progress-counter {
     92   border:1px solid black;
     93   padding: 0 0 2px 2px;
     94 }
     95 #experiment-progress-counter li {
     96   display: block;
     97   border: 1px solid black;
     98   padding: 2px;
     99   margin-top: 2px;
    100   height: 1em;
    101   background: #ddf;
    102   white-space: nowrap;
    103   font-size:8pt;
    104 }
    105 #experiment-iframes iframe {
    106   visibility:hidden;
    107   width:40em;
    108   height:1em;
    109 }
    110 em { color: #fff; font-weight: bold; background: #800; border: 1px solid #800; padding: 1px }
    111 </style>
    112 </head>
    113 <body>
    114 <p>
    115 This page tries to exhaustively combine tags for all pairings of HTML elements
    116 to answer the following questions about how HTML browsers parse tag soup:</p>
    117 <ul>
    118   <li><a href="#nests-in-body">Which elements can appear directly in the body of an HTML document?</ad></li>
    119   <li><a href="#can-contain">Which elements can nest directly in which other elements?</a></li>
    120   <li><a href="#text-content-model">Which elements can contain text content, comments, entities?</a></li>
    121   <li><a href="#containment-stack-json">Which elements can be introduced between the body and an element
    122       to allow it to nest properly?</a></li>
    123   <li>Which elements are implied by which tags? (TODO)</li>
    124   <li><a href="#explicit-closers">Which open tags close which other elements?</a></li>
    125   <li><a href="#closed-by-close">Which close tags close which elements?</a></li>
    126   <li><a href="#closed-by-open">Which open tags close which elements?</a></li>
    127 </ul>
    128 
    129 <p>A <a href="#result-dump">JSON dump</a>
    130    of the results is available at the end once running is done.</p>
    131 
    132 <div><ul id="experiment-progress-counter"></ul></div>
    133 
    134 <p>A few query parameters affect the behavior of this page:</p>
    135 <ul>
    136   <li><a href="?rerun"><tt><span class="basename"></span>?rerun</tt></a> &mdash;
    137       <em style="font-size:66%">&iexcl;VERY SLOW!</em>
    138       Rerun experiments on the browser intead of using the canned results from Chrome.
    139   <li><a href="?rerun&shortlist"><tt><span class="basename"></span>?rerun&amp;shortlist</tt></a> &mdash;
    140       Rerun experiments on the browser instead of using the canned results from Chrome,
    141       but with a short list of elements instead of the full 128+ HTML elements
    142       which speeds debugging.</li>
    143   <li><a href="?"><tt><span class="basename"></span>?</tt></a> &mdash;
    144       Quick browsing of canned results from Chrome.</li>
    145 </ul>
    146 <script>(function () {
    147   var basename = location.pathname.replace(/^[\s\S]*\//, '');
    148   function toCss(s) {
    149     return ('\x22'
    150         + s.replace(/[^\w\-.]/g, function (c) {
    151                       return '\\' + c.charCodeAt(0).toString(16) + ' ';
    152                     })
    153         + '\x22');
    154   }
    155   document.write('<style>.basename:after { content: ' + toCss(basename) + ' }<\/style>');
    156 }());</script>
    157 
    158 
    159 
    160 <!-- Contains iframes that are used to parse HTML since innerHTML parsing differs
    161      from regular parsing in many respects. -->
    162 <div id="experiment-iframes"></div>
    163 
    164 <h2 id="nests-in-body">Nests in body</h2>
    165 <p>Does a tag <tt>&lt;X&gt;</tt> directly inside
    166  <tt>&lt;body&gt;&hellip;&lt;/body&gt;</tt> parse to an element named X
    167  directly inside the document body?</p>
    168 <pre id="nests-in-body-json" class="json"></pre>
    169 <script>
    170 var canAppearInBody = getOwn(cannedData, 'canAppearInBody') || new Promise();
    171 (function () {
    172   // Generates HTML for the experiment.
    173   function nestInBody(elementName) {
    174     return '<' + elementName + '></' + elementName + '>';
    175   }
    176   // Examines the resulting body to fold a single experiment into the result.
    177   function isNestedInBody(elementName, body, result) {
    178     result[elementName] = !!(
    179       body.firstChild && body.firstChild.nodeName.toLowerCase() === elementName
    180     );
    181     return result;
    182   }
    183   // When the experiment is finished, replace the promise so that we can
    184   // kick off experiments that depend on the result of this experiment.
    185   function finish(result) {
    186     var toSatisfy = canAppearInBody;
    187     if (toSatisfy instanceof Promise) {
    188       canAppearInBody = result;
    189       toSatisfy.satisfy();
    190     }
    191     displayJson(result, document.getElementById('nests-in-body-json'))
    192   }
    193   if (canAppearInBody instanceof Promise) {
    194     runExperiment(nestInBody, isNestedInBody, newBlankObject(), finish);
    195   } else {
    196     finish(canAppearInBody);
    197   }
    198 }());
    199 </script>
    200 
    201 <h2 id="can-contain">Containment</h2>
    202 <p>For each element, what elements can contain it?</p>
    203 <p>E.g., <code>canAppearIn['x'].indexOf('y') >= 0</code> when
    204 <code>&lt;x&gt;&lt;y&gt;&lt;/y&gt;&lt;/x&gt;</code> parses to
    205 an element <tt>x</tt> that contains an element <tt>y</tt> when embedded
    206 in an element that can contain <code>&lt;x&gt;</code>.</p>
    207 <h3>Can Contain</h3>
    208 <pre class="json" id="can-contain-json"></pre>
    209 <h3>Can Appear In</h3>
    210 <pre class="json" id="can-appear-in-json"></pre>
    211 <h3>Containment stack</h3>
    212 <pre class="json" id="containment-stack-json"></pre>
    213 <script>
    214 // We use promises to allow experiment chaining where one
    215 // experiment depends on the results of another.
    216 
    217 var canContain = getOwn(cannedData, 'canContain') || new Promise();
    218 var canAppearIn = getOwn(cannedData, 'canAppearIn') || new Promise();
    219 // For a given element name, give a stack of elements that can
    220 // be validly embedded in body that have the element at the top.
    221 var containmentStackFor = new Promise();
    222 
    223 // HTML for the elements in the with the body HTML inside the
    224 // top-most element.
    225 function tagStackToHtml(stack, body) {
    226   var stackReverse = stack.slice();
    227   stackReverse.reverse();
    228   return (
    229     '<' + stack.join('><') + '>'
    230     + body
    231     + '</' + stackReverse.join('></') + '>'
    232   );
    233 }
    234 
    235 (function () {
    236   var nNeededLast = Infinity;
    237 
    238   // We need a function that tells us which elements we need to have on the
    239   // open element stack so that we can get the outer element on the stack to
    240   // test whether an inner tag leads to an inner element inside it.
    241   // For example, to test whether an <a> tag nestes properly in a <td>, we
    242   // need to construct <table><tbody><tr><td><a>.
    243   //
    244   // Knowing what needs to be on the open element stack for <td> requires
    245   // knowing what needs to be on the open element stack for <tr>.
    246   function containmentStackMaker(canAppearIn) {
    247     var memoTable = newBlankObject();
    248     return function (elementName, opt_exclusions) {
    249       var memoKey = opt_exclusions
    250           ? elementName + ' ' + opt_exclusions.join(' / ') : elementName;
    251 
    252       if (getOwn(canAppearInBody, elementName)) { return [elementName]; }
    253       var prior = getOwn(memoKey, elementName, void 0);
    254       if (prior !== void 0) { return prior ? prior.slice() : null; }
    255       var empty = [];
    256 
    257       function end(e) {
    258         return getOwn(canAppearInBody, e, false);
    259       }
    260       function eq (e, f) { return e === f; }
    261       function neighbors(e) {
    262         var neighbors = getOwn(canAppearIn, e, empty);
    263         if (opt_exclusions) {
    264           var exclusions = makeSet(opt_exclusions);
    265           var included = null;
    266           for (var i = 0, n = neighbors.length; i < n; ++i) {
    267             var neighbor = neighbors[i];
    268             if (inSet(exclusions, neighbor)) {
    269               if (!included) { included = neighbors.slice(0, i); }
    270             } else if (included) {
    271               included.push(neighbor);
    272             }
    273           }
    274           if (included) { neighbors = included; }
    275         }
    276         return neighbors;
    277       }
    278       var result = breadthFirstSearch(elementName, end, eq, neighbors) || null;
    279       memoTable[memoKey] = result;
    280       return result ? result.slice() : null;
    281     };
    282   }
    283 
    284   function run(result) {
    285 
    286     function makeContainerHtmlString(outer, inner) {
    287       if (neededSet[outer] !== neededSet) { return null; }
    288       // We try to assemble a stack of elements that can contain outer before
    289       // checking whether it can contain inner.
    290       // If we cannot, we punt so that we can retry later after we've fleshed
    291       // out more of canAppearIn.
    292       var stack = containmentStack(outer);
    293       if (!stack) { return null; }
    294       stack.push(inner);
    295       return tagStackToHtml(stack, '');
    296     }
    297 
    298     function checkCanContain(outer, inner, body, canContain) {
    299       var outerEls = body.getElementsByTagName(outer);
    300       if (outerEls.length) {
    301         var containees = getOwn(canContain, outer) || [];
    302         canContain[outer] = containees;
    303         var outerEl = outerEls[0];
    304         var firstChild = outerEl.firstChild;
    305         if (((firstChild && firstChild.nodeName.toLowerCase() === inner)
    306              || outerEl.getElementsByTagName(inner).length)
    307             && containees.indexOf(inner) < 0) {
    308           containees.push(inner);
    309         }
    310       }
    311       return canContain;
    312     }
    313 
    314     var elementNamesNeeded = [];
    315     for (var i = 0, n = elementNames.length; i < n; ++i) {
    316       var elementName = elementNames[i];
    317       if (!Object.hasOwnProperty.call(result, elementName)) {
    318         elementNamesNeeded.push(elementName);
    319       }
    320     }
    321     console.log('nNeededLast=%s, nNeeded=%d, result=%o',
    322                 nNeededLast, elementNamesNeeded.length, result);
    323     if (elementNamesNeeded.length === nNeededLast) {
    324       // We made no progress last run.
    325       console.log('cannot place ' + elementNamesNeeded);
    326       elementNamesNeeded.length = 0;
    327     }
    328 
    329     var containmentStack = containmentStackMaker(reverseMultiMap(result));
    330 
    331     var neededSet = newBlankObject();
    332     for (var i = elementNamesNeeded.length; --i >= 0;) {
    333       neededSet[elementNamesNeeded[i]] = neededSet;
    334     }
    335 
    336     if (elementNamesNeeded.length) {
    337       nNeededLast = elementNamesNeeded.length;
    338       return runExperiment(
    339           makeContainerHtmlString, checkCanContain, result, run,
    340           elementNames);
    341     } else {
    342       finishCanContain(result);
    343       return result;
    344     }
    345   }
    346 
    347   function finishCanContain(result) {
    348     var toSatisfy = canContain;
    349     if (toSatisfy instanceof Promise) {
    350       canContain = sortedMultiMap(result);
    351       toSatisfy.satisfy();
    352     }
    353     displayJson(canContain, document.getElementById('can-contain-json'));
    354   }
    355 
    356   if (canContain instanceof Promise) {
    357     when(function () { run(newBlankObject()); }, canAppearInBody);
    358   } else {
    359     finishCanContain(canContain);
    360   }
    361 
    362   function reverseMap() {
    363     var toSatisfy = canAppearIn;
    364     if (toSatisfy instanceof Promise) {
    365       canAppearIn = sortedMultiMap(reverseMultiMap(canContain));
    366       toSatisfy.satisfy();
    367     }
    368     displayJson(canAppearIn, document.getElementById('can-appear-in-json'));
    369     toSatisfy = containmentStackFor;
    370 
    371     containmentStackFor = containmentStackMaker(canAppearIn);
    372     toSatisfy.satisfy();
    373   }
    374 
    375   when(function () { reverseMap(); }, canContain);
    376 
    377   function mapStacks() {
    378     var containmentStackMap = newBlankObject();
    379     for (var i = 0, n = elementNames.length; i < n; ++i) {
    380       var elementName = elementNames[i];
    381       var stack = containmentStackFor(elementName);
    382       if (stack) { --stack.length; }
    383       containmentStackMap[elementName] = stack;
    384     }
    385     displayJson(containmentStackMap,
    386                 document.getElementById('containment-stack-json'));
    387   }
    388   when(mapStacks, containmentStackFor);
    389 }());
    390 
    391 
    392 

Text and comment content

393 394

Tests which elements can contain a non-whitespace text node and which can 395 contain comments or other non-text elements as a result of parsing.

396

textContentModel['x'].text is true when 397 &lt;x&gt;text&lt;/x&gt; parses to an X element containing 398 a text node.

399

textContentModel['x'].comments is true when 400 &lt;x&gt;&lt;!--comment--&gt;&lt;/x&gt; parses to an X element 401 containing a comment node.

402

textContentModel['x'].xml is true when 403 &lt;x&gt;&amp;amp;;&lt;/x&gt; parses to an X 404 element contains text nodes that normalize to &&amp;.

405

textContentModel['x'].raw is true when 406 &lt;x&gt;&lt;br&gt;&lt;/x&gt; parses to an X element 407 containing a text node.

408

textContentModel['x'].entities is true when 409 &lt;x&gt;&amp;amp;;&lt;/x&gt; parses to an X element 410 containing a text node &amp;amp;.

411

    412