1 <!doctype html> 2 <html> 3 <head> 4 <title>HTML containment</title> 5 <script> 6 if (!Date.now) { Date.now = function () { return +new Date; }; } 7 </script> 8 <script src="html-containment.js"></script> 9 <script> 10 // Extract URL query parameters into options 11 var opts = { 12 // use a short list for quick iteration and debugging 13 shortlist: false, 14 rerun: false 15 }; 16 var cannedData; 17 (function () { 18 location.search.replace( 19 /[?&]([^&=]*)(?:=(?:false|no|([^&]*))(?![^&]))?/ig, 20 function (_, keyEncoded, valueEncoded) { 21 var key = decodeURIComponent(keyEncoded); 22 var value = valueEncoded == null ? "true" 23 : decodeURIComponent(valueEncoded); 24 opts[key] = value; 25 }); 26 27 if (opts.rerun) { 28 cannedData = newBlankObject(); 29 } else { 30 document.write('<script src="canned-data.js"><\/script>'); 31 } 32 })(); 33 </script> 34 <script> 35 // Includes both conforming and obsolete elements from 36 // http://dev.w3.org/html5/html-author/#index-of-elements 37 // It does not include foreign content. 38 var elementNames = 39 opts.shortlist 40 ? [ 41 'a', 'font', 'form', 'frameset', 'h1', 'h2', 'iframe', 42 'img', 'li', 'ol', 'plaintext', 'script', 'select', 'table', 'tbody', 43 'textarea', 'td', 'tr', 'video', 'xmp' 44 ] 45 : [ 46 'a', 'abbr', 'acronym', 'address', 'applet', 'area', 'article', 'aside', 47 'audio', 'b', 'base', 'basefont', 'bb', 'bdo', 'bgsound', 'big', 'blink', 48 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 49 'code', 'col', 'colgroup', 'command', 'datagrid', 'datalist', 'dd', 'del', 50 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'embed', 51 'fieldset', 'figure', 'font', 'footer', 'form', 'frame', 'frameset', 'h1', 52 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'i', 'iframe', 53 'img', 'input', 'ins', 'isindex', 'kbd', 'label', 'legend', 'li', 'link', 54 'listing', 'map', 'mark', 'marquee', 'menu', 'meta', 'meter', 'nav', 'nobr', 55 'noembed', 'noframes', 'noscript', 'object', 'ol', 'optgroup', 'option', 56 'output', 'p', 'param', 'plaintext', 'pre', 'progress', 'q', 'rp', 'rt', 57 'ruby', 's', 'samp', 'script', 'section', 'select', 'small', 'source', 58 'spacer', 'span', 'strike', 'strong', 'style', 'sub', 'sup', 'table', 59 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 60 'tt', 'u', 'ul', 'var', 'video', 'wbr', 'xmp', 61 62 'xcustom' 63 ]; 64 </script> 65 <style> 66 pre.json { white-space: pre-wrap } 67 .json-kw { color: #800 } 68 .json-str { color: #080 } 69 .json-val { color: #008 } 70 .json-sep { background: white } 71 .json-ell { color: blue } /* ellipses are linky */ 72 73 /* Collapse inner blocks except on roll-over. */ 74 .json-int { display: none } 75 .json-ext.json-expanded > .json-int, 76 .json-ext.json-nocollapse > .json-int { display: inline } 77 .json-ext.json-nocollapse > .json-ell { display: none } 78 .json-ext.json-expanded > .json-ell { color: transparent } 79 80 #experiment-progress-counter:empty { display: none } 81 #experiment-progress-counter { 82 width: 25em; 83 display: block; 84 list-style-type: none; 85 -webkit-padding-start: 0; 86 } 87 div #experiment-progress-counter:empty { 88 border-width: 0px solid black; 89 padding: 0 0 0 0; 90 } 91 div #experiment-progress-counter { 92 border:1px solid black; 93 padding: 0 0 2px 2px; 94 } 95 #experiment-progress-counter li { 96 display: block; 97 border: 1px solid black; 98 padding: 2px; 99 margin-top: 2px; 100 height: 1em; 101 background: #ddf; 102 white-space: nowrap; 103 font-size:8pt; 104 } 105 #experiment-iframes iframe { 106 visibility:hidden; 107 width:40em; 108 height:1em; 109 } 110 em { color: #fff; font-weight: bold; background: #800; border: 1px solid #800; padding: 1px } 111 </style> 112 </head> 113 <body> 114 <p> 115 This page tries to exhaustively combine tags for all pairings of HTML elements 116 to answer the following questions about how HTML browsers parse tag soup:</p> 117 <ul> 118 <li><a href="#nests-in-body">Which elements can appear directly in the body of an HTML document?</ad></li> 119 <li><a href="#can-contain">Which elements can nest directly in which other elements?</a></li> 120 <li><a href="#text-content-model">Which elements can contain text content, comments, entities?</a></li> 121 <li><a href="#containment-stack-json">Which elements can be introduced between the body and an element 122 to allow it to nest properly?</a></li> 123 <li>Which elements are implied by which tags? (TODO)</li> 124 <li><a href="#explicit-closers">Which open tags close which other elements?</a></li> 125 <li><a href="#closed-by-close">Which close tags close which elements?</a></li> 126 <li><a href="#closed-by-open">Which open tags close which elements?</a></li> 127 </ul> 128 129 <p>A <a href="#result-dump">JSON dump</a> 130 of the results is available at the end once running is done.</p> 131 132 <div><ul id="experiment-progress-counter"></ul></div> 133 134 <p>A few query parameters affect the behavior of this page:</p> 135 <ul> 136 <li><a href="?rerun"><tt><span class="basename"></span>?rerun</tt></a> — 137 <em style="font-size:66%">¡VERY SLOW!</em> 138 Rerun experiments on the browser intead of using the canned results from Chrome. 139 <li><a href="?rerun&shortlist"><tt><span class="basename"></span>?rerun&shortlist</tt></a> — 140 Rerun experiments on the browser instead of using the canned results from Chrome, 141 but with a short list of elements instead of the full 128+ HTML elements 142 which speeds debugging.</li> 143 <li><a href="?"><tt><span class="basename"></span>?</tt></a> — 144 Quick browsing of canned results from Chrome.</li> 145 </ul> 146 <script>(function () { 147 var basename = location.pathname.replace(/^[\s\S]*\//, ''); 148 function toCss(s) { 149 return ('\x22' 150 + s.replace(/[^\w\-.]/g, function (c) { 151 return '\\' + c.charCodeAt(0).toString(16) + ' '; 152 }) 153 + '\x22'); 154 } 155 document.write('<style>.basename:after { content: ' + toCss(basename) + ' }<\/style>'); 156 }());</script> 157 158 159 160 <!-- Contains iframes that are used to parse HTML since innerHTML parsing differs 161 from regular parsing in many respects. --> 162 <div id="experiment-iframes"></div> 163 164 <h2 id="nests-in-body">Nests in body</h2> 165 <p>Does a tag <tt><X></tt> directly inside 166 <tt><body>…</body></tt> parse to an element named X 167 directly inside the document body?</p> 168 <pre id="nests-in-body-json" class="json"></pre> 169 <script> 170 var canAppearInBody = getOwn(cannedData, 'canAppearInBody') || new Promise(); 171 (function () { 172 // Generates HTML for the experiment. 173 function nestInBody(elementName) { 174 return '<' + elementName + '></' + elementName + '>'; 175 } 176 // Examines the resulting body to fold a single experiment into the result. 177 function isNestedInBody(elementName, body, result) { 178 result[elementName] = !!( 179 body.firstChild && body.firstChild.nodeName.toLowerCase() === elementName 180 ); 181 return result; 182 } 183 // When the experiment is finished, replace the promise so that we can 184 // kick off experiments that depend on the result of this experiment. 185 function finish(result) { 186 var toSatisfy = canAppearInBody; 187 if (toSatisfy instanceof Promise) { 188 canAppearInBody = result; 189 toSatisfy.satisfy(); 190 } 191 displayJson(result, document.getElementById('nests-in-body-json')) 192 } 193 if (canAppearInBody instanceof Promise) { 194 runExperiment(nestInBody, isNestedInBody, newBlankObject(), finish); 195 } else { 196 finish(canAppearInBody); 197 } 198 }()); 199 </script> 200 201 <h2 id="can-contain">Containment</h2> 202 <p>For each element, what elements can contain it?</p> 203 <p>E.g., <code>canAppearIn['x'].indexOf('y') >= 0</code> when 204 <code><x><y></y></x></code> parses to 205 an element <tt>x</tt> that contains an element <tt>y</tt> when embedded 206 in an element that can contain <code><x></code>.</p> 207 <h3>Can Contain</h3> 208 <pre class="json" id="can-contain-json"></pre> 209 <h3>Can Appear In</h3> 210 <pre class="json" id="can-appear-in-json"></pre> 211 <h3>Containment stack</h3> 212 <pre class="json" id="containment-stack-json"></pre> 213 <script> 214 // We use promises to allow experiment chaining where one 215 // experiment depends on the results of another. 216 217 var canContain = getOwn(cannedData, 'canContain') || new Promise(); 218 var canAppearIn = getOwn(cannedData, 'canAppearIn') || new Promise(); 219 // For a given element name, give a stack of elements that can 220 // be validly embedded in body that have the element at the top. 221 var containmentStackFor = new Promise(); 222 223 // HTML for the elements in the with the body HTML inside the 224 // top-most element. 225 function tagStackToHtml(stack, body) { 226 var stackReverse = stack.slice(); 227 stackReverse.reverse(); 228 return ( 229 '<' + stack.join('><') + '>' 230 + body 231 + '</' + stackReverse.join('></') + '>' 232 ); 233 } 234 235 (function () { 236 var nNeededLast = Infinity; 237 238 // We need a function that tells us which elements we need to have on the 239 // open element stack so that we can get the outer element on the stack to 240 // test whether an inner tag leads to an inner element inside it. 241 // For example, to test whether an <a> tag nestes properly in a <td>, we 242 // need to construct <table><tbody><tr><td><a>. 243 // 244 // Knowing what needs to be on the open element stack for <td> requires 245 // knowing what needs to be on the open element stack for <tr>. 246 function containmentStackMaker(canAppearIn) { 247 var memoTable = newBlankObject(); 248 return function (elementName, opt_exclusions) { 249 var memoKey = opt_exclusions 250 ? elementName + ' ' + opt_exclusions.join(' / ') : elementName; 251 252 if (getOwn(canAppearInBody, elementName)) { return [elementName]; } 253 var prior = getOwn(memoKey, elementName, void 0); 254 if (prior !== void 0) { return prior ? prior.slice() : null; } 255 var empty = []; 256 257 function end(e) { 258 return getOwn(canAppearInBody, e, false); 259 } 260 function eq (e, f) { return e === f; } 261 function neighbors(e) { 262 var neighbors = getOwn(canAppearIn, e, empty); 263 if (opt_exclusions) { 264 var exclusions = makeSet(opt_exclusions); 265 var included = null; 266 for (var i = 0, n = neighbors.length; i < n; ++i) { 267 var neighbor = neighbors[i]; 268 if (inSet(exclusions, neighbor)) { 269 if (!included) { included = neighbors.slice(0, i); } 270 } else if (included) { 271 included.push(neighbor); 272 } 273 } 274 if (included) { neighbors = included; } 275 } 276 return neighbors; 277 } 278 var result = breadthFirstSearch(elementName, end, eq, neighbors) || null; 279 memoTable[memoKey] = result; 280 return result ? result.slice() : null; 281 }; 282 } 283 284 function run(result) { 285 286 function makeContainerHtmlString(outer, inner) { 287 if (neededSet[outer] !== neededSet) { return null; } 288 // We try to assemble a stack of elements that can contain outer before 289 // checking whether it can contain inner. 290 // If we cannot, we punt so that we can retry later after we've fleshed 291 // out more of canAppearIn. 292 var stack = containmentStack(outer); 293 if (!stack) { return null; } 294 stack.push(inner); 295 return tagStackToHtml(stack, ''); 296 } 297 298 function checkCanContain(outer, inner, body, canContain) { 299 var outerEls = body.getElementsByTagName(outer); 300 if (outerEls.length) { 301 var containees = getOwn(canContain, outer) || []; 302 canContain[outer] = containees; 303 var outerEl = outerEls[0]; 304 var firstChild = outerEl.firstChild; 305 if (((firstChild && firstChild.nodeName.toLowerCase() === inner) 306 || outerEl.getElementsByTagName(inner).length) 307 && containees.indexOf(inner) < 0) { 308 containees.push(inner); 309 } 310 } 311 return canContain; 312 } 313 314 var elementNamesNeeded = []; 315 for (var i = 0, n = elementNames.length; i < n; ++i) { 316 var elementName = elementNames[i]; 317 if (!Object.hasOwnProperty.call(result, elementName)) { 318 elementNamesNeeded.push(elementName); 319 } 320 } 321 console.log('nNeededLast=%s, nNeeded=%d, result=%o', 322 nNeededLast, elementNamesNeeded.length, result); 323 if (elementNamesNeeded.length === nNeededLast) { 324 // We made no progress last run. 325 console.log('cannot place ' + elementNamesNeeded); 326 elementNamesNeeded.length = 0; 327 } 328 329 var containmentStack = containmentStackMaker(reverseMultiMap(result)); 330 331 var neededSet = newBlankObject(); 332 for (var i = elementNamesNeeded.length; --i >= 0;) { 333 neededSet[elementNamesNeeded[i]] = neededSet; 334 } 335 336 if (elementNamesNeeded.length) { 337 nNeededLast = elementNamesNeeded.length; 338 return runExperiment( 339 makeContainerHtmlString, checkCanContain, result, run, 340 elementNames); 341 } else { 342 finishCanContain(result); 343 return result; 344 } 345 } 346 347 function finishCanContain(result) { 348 var toSatisfy = canContain; 349 if (toSatisfy instanceof Promise) { 350 canContain = sortedMultiMap(result); 351 toSatisfy.satisfy(); 352 } 353 displayJson(canContain, document.getElementById('can-contain-json')); 354 } 355 356 if (canContain instanceof Promise) { 357 when(function () { run(newBlankObject()); }, canAppearInBody); 358 } else { 359 finishCanContain(canContain); 360 } 361 362 function reverseMap() { 363 var toSatisfy = canAppearIn; 364 if (toSatisfy instanceof Promise) { 365 canAppearIn = sortedMultiMap(reverseMultiMap(canContain)); 366 toSatisfy.satisfy(); 367 } 368 displayJson(canAppearIn, document.getElementById('can-appear-in-json')); 369 toSatisfy = containmentStackFor; 370 371 containmentStackFor = containmentStackMaker(canAppearIn); 372 toSatisfy.satisfy(); 373 } 374 375 when(function () { reverseMap(); }, canContain); 376 377 function mapStacks() { 378 var containmentStackMap = newBlankObject(); 379 for (var i = 0, n = elementNames.length; i < n; ++i) { 380 var elementName = elementNames[i]; 381 var stack = containmentStackFor(elementName); 382 if (stack) { --stack.length; } 383 containmentStackMap[elementName] = stack; 384 } 385 displayJson(containmentStackMap, 386 document.getElementById('containment-stack-json')); 387 } 388 when(mapStacks, containmentStackFor); 389 }()); 390 391 392Text and comment content
393 394Tests which elements can contain a non-whitespace text node and which can 395 contain comments or other non-text elements as a result of parsing.
396x'].text is true when 397
textContentModel['
<x>text</x>
parses to an X element containing 398 a text node. 399x'].comments is true when 400
textContentModel['
<x><!--comment--></x>
parses to an X element 401 containing a comment node. 402x'].xml is true when 403
textContentModel['
<x>&amp;;</x>
parses to an X 404 element contains text nodes that normalize to&&
. 405x'].raw is true when 406
textContentModel['
<x><br></x>
parses to an X element 407 containing a text node. 408x'].entities is true when 409
textContentModel['
<x>&amp;;</x>
parses to an X element 410 containing a text node &amp;. 411 412