Home | History | Annotate | Download | only in docs
      1 // Copyright (C) 2006 Google Inc.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 
     16 /**
     17  * @fileoverview
     18  * some functions for browser-side pretty printing of code contained in html.
     19  *
     20  * The lexer should work on a number of languages including C and friends,
     21  * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles.
     22  * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but,
     23  * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or
     24  * CAML-like languages.
     25  *
     26  * If there's a language not mentioned here, then I don't know it, and don't
     27  * know whether it works.  If it has a C-like, Bash-like, or XML-like syntax
     28  * then it should work passably.
     29  *
     30  * Usage:
     31  * 1) include this source file in an html page via
     32  * <script type="text/javascript" src="/path/to/prettify.js"></script>
     33  * 2) define style rules.  See the example page for examples.
     34  * 3) mark the <pre> and <code> tags in your source with class=prettyprint.
     35  *    You can also use the (html deprecated) <xmp> tag, but the pretty printer
     36  *    needs to do more substantial DOM manipulations to support that, so some
     37  *    css styles may not be preserved.
     38  * That's it.  I wanted to keep the API as simple as possible, so there's no
     39  * need to specify which language the code is in.
     40  *
     41  * Change log:
     42  * cbeust, 2006/08/22
     43  *   Java annotations (start with "@") are now captured as literals ("lit")
     44  */
     45 
     46 // JSLint declarations
     47 /*global console, document, navigator, setTimeout, window */
     48 
     49 /**
     50  * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
     51  * UI events.
     52  * If set to {@code false}, {@code prettyPrint()} is synchronous.
     53  */
     54 var PR_SHOULD_USE_CONTINUATION = true;
     55 
     56 /** the number of characters between tab columns */
     57 var PR_TAB_WIDTH = 8;
     58 
     59 /** Walks the DOM returning a properly escaped version of innerHTML.
     60   * @param {Node} node
     61   * @param {Array.<string>} out output buffer that receives chunks of HTML.
     62   */
     63 var PR_normalizedHtml;
     64 
     65 /** Contains functions for creating and registering new language handlers.
     66   * @type {Object}
     67   */
     68 var PR;
     69 
     70 /** Pretty print a chunk of code.
     71   *
     72   * @param {string} sourceCodeHtml code as html
     73   * @return {string} code as html, but prettier
     74   */
     75 var prettyPrintOne;
     76 /** find all the < pre > and < code > tags in the DOM with class=prettyprint
     77   * and prettify them.
     78   * @param {Function} opt_whenDone if specified, called when the last entry
     79   *     has been finished.
     80   */
     81 var prettyPrint;
     82 
     83 /** browser detection. @extern */
     84 function _pr_isIE6() {
     85   var isIE6 = navigator && navigator.userAgent &&
     86       /\bMSIE 6\./.test(navigator.userAgent);
     87   _pr_isIE6 = function () { return isIE6; };
     88   return isIE6;
     89 }
     90 
     91 
     92 (function () {
     93   /** Splits input on space and returns an Object mapping each non-empty part to
     94     * true.
     95     */
     96   function wordSet(words) {
     97     words = words.split(/ /g);
     98     var set = {};
     99     for (var i = words.length; --i >= 0;) {
    100       var w = words[i];
    101       if (w) { set[w] = null; }
    102     }
    103     return set;
    104   }
    105 
    106   // Keyword lists for various languages.
    107   var FLOW_CONTROL_KEYWORDS =
    108       "break continue do else for if return while ";
    109   var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " +
    110       "double enum extern float goto int long register short signed sizeof " +
    111       "static struct switch typedef union unsigned void volatile ";
    112   var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " +
    113       "new operator private protected public this throw true try ";
    114   var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " +
    115       "concept concept_map const_cast constexpr decltype " +
    116       "dynamic_cast explicit export friend inline late_check " +
    117       "mutable namespace nullptr reinterpret_cast static_assert static_cast " +
    118       "template typeid typename typeof using virtual wchar_t where ";
    119   var JAVA_KEYWORDS = COMMON_KEYWORDS +
    120       "boolean byte extends final finally implements import instanceof null " +
    121       "native package strictfp super synchronized throws transient ";
    122   var CSHARP_KEYWORDS = JAVA_KEYWORDS +
    123       "as base by checked decimal delegate descending event " +
    124       "fixed foreach from group implicit in interface internal into is lock " +
    125       "object out override orderby params readonly ref sbyte sealed " +
    126       "stackalloc string select uint ulong unchecked unsafe ushort var ";
    127   var JSCRIPT_KEYWORDS = COMMON_KEYWORDS +
    128       "debugger eval export function get null set undefined var with " +
    129       "Infinity NaN ";
    130   var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " +
    131       "goto if import last local my next no our print package redo require " +
    132       "sub undef unless until use wantarray while BEGIN END ";
    133   var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " +
    134       "elif except exec finally from global import in is lambda " +
    135       "nonlocal not or pass print raise try with yield " +
    136       "False True None ";
    137   var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" +
    138       " defined elsif end ensure false in module next nil not or redo rescue " +
    139       "retry self super then true undef unless until when yield BEGIN END ";
    140   var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " +
    141       "function in local set then until ";
    142   var ALL_KEYWORDS = (
    143       CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS +
    144       PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS);
    145 
    146   // token style names.  correspond to css classes
    147   /** token style for a string literal */
    148   var PR_STRING = 'str';
    149   /** token style for a keyword */
    150   var PR_KEYWORD = 'kwd';
    151   /** token style for a comment */
    152   var PR_COMMENT = 'com';
    153   /** token style for a type */
    154   var PR_TYPE = 'typ';
    155   /** token style for a literal value.  e.g. 1, null, true. */
    156   var PR_LITERAL = 'lit';
    157   /** token style for a punctuation string. */
    158   var PR_PUNCTUATION = 'pun';
    159   /** token style for a punctuation string. */
    160   var PR_PLAIN = 'pln';
    161 
    162   /** token style for an sgml tag. */
    163   var PR_TAG = 'tag';
    164   /** token style for a markup declaration such as a DOCTYPE. */
    165   var PR_DECLARATION = 'dec';
    166   /** token style for embedded source. */
    167   var PR_SOURCE = 'src';
    168   /** token style for an sgml attribute name. */
    169   var PR_ATTRIB_NAME = 'atn';
    170   /** token style for an sgml attribute value. */
    171   var PR_ATTRIB_VALUE = 'atv';
    172 
    173   /**
    174    * A class that indicates a section of markup that is not code, e.g. to allow
    175    * embedding of line numbers within code listings.
    176    */
    177   var PR_NOCODE = 'nocode';
    178 
    179   function isWordChar(ch) {
    180     return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
    181   }
    182 
    183   /** Splice one array into another.
    184     * Like the python <code>
    185     * container[containerPosition:containerPosition + countReplaced] = inserted
    186     * </code>
    187     * @param {Array} inserted
    188     * @param {Array} container modified in place
    189     * @param {Number} containerPosition
    190     * @param {Number} countReplaced
    191     */
    192   function spliceArrayInto(
    193       inserted, container, containerPosition, countReplaced) {
    194     inserted.unshift(containerPosition, countReplaced || 0);
    195     try {
    196       container.splice.apply(container, inserted);
    197     } finally {
    198       inserted.splice(0, 2);
    199     }
    200   }
    201 
    202   /** A set of tokens that can precede a regular expression literal in
    203     * javascript.
    204     * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full
    205     * list, but I've removed ones that might be problematic when seen in
    206     * languages that don't support regular expression literals.
    207     *
    208     * <p>Specifically, I've removed any keywords that can't precede a regexp
    209     * literal in a syntactically legal javascript program, and I've removed the
    210     * "in" keyword since it's not a keyword in many languages, and might be used
    211     * as a count of inches.
    212     * @private
    213     */
    214   var REGEXP_PRECEDER_PATTERN = function () {
    215       var preceders = [
    216           "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=",
    217           "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=",
    218           "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";",
    219           "<", "<<", "<<=", "<=", "=", "==", "===", ">",
    220           ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[",
    221           "^", "^=", "^^", "^^=", "{", "|", "|=", "||",
    222           "||=", "~" /* handles =~ and !~ */,
    223           "break", "case", "continue", "delete",
    224           "do", "else", "finally", "instanceof",
    225           "return", "throw", "try", "typeof"
    226           ];
    227       var pattern = '(?:' +
    228           '(?:(?:^|[^0-9.])\\.{1,3})|' +  // a dot that's not part of a number
    229           '(?:(?:^|[^\\+])\\+)|' +  // allow + but not ++
    230           '(?:(?:^|[^\\-])-)';  // allow - but not --
    231       for (var i = 0; i < preceders.length; ++i) {
    232         var preceder = preceders[i];
    233         if (isWordChar(preceder.charAt(0))) {
    234           pattern += '|\\b' + preceder;
    235         } else {
    236           pattern += '|' + preceder.replace(/([^=<>:&])/g, '\\$1');
    237         }
    238       }
    239       pattern += '|^)\\s*$';  // matches at end, and matches empty string
    240       return new RegExp(pattern);
    241       // CAVEAT: this does not properly handle the case where a regular
    242       // expression immediately follows another since a regular expression may
    243       // have flags for case-sensitivity and the like.  Having regexp tokens
    244       // adjacent is not
    245       // valid in any language I'm aware of, so I'm punting.
    246       // TODO: maybe style special characters inside a regexp as punctuation.
    247     }();
    248 
    249   // Define regexps here so that the interpreter doesn't have to create an
    250   // object each time the function containing them is called.
    251   // The language spec requires a new object created even if you don't access
    252   // the $1 members.
    253   var pr_amp = /&/g;
    254   var pr_lt = /</g;
    255   var pr_gt = />/g;
    256   var pr_quot = /\"/g;
    257   /** like textToHtml but escapes double quotes to be attribute safe. */
    258   function attribToHtml(str) {
    259     return str.replace(pr_amp, '&amp;')
    260         .replace(pr_lt, '&lt;')
    261         .replace(pr_gt, '&gt;')
    262         .replace(pr_quot, '&quot;');
    263   }
    264 
    265   /** escapest html special characters to html. */
    266   function textToHtml(str) {
    267     return str.replace(pr_amp, '&amp;')
    268         .replace(pr_lt, '&lt;')
    269         .replace(pr_gt, '&gt;');
    270   }
    271 
    272 
    273   var pr_ltEnt = /&lt;/g;
    274   var pr_gtEnt = /&gt;/g;
    275   var pr_aposEnt = /&apos;/g;
    276   var pr_quotEnt = /&quot;/g;
    277   var pr_ampEnt = /&amp;/g;
    278   var pr_nbspEnt = /&nbsp;/g;
    279   /** unescapes html to plain text. */
    280   function htmlToText(html) {
    281     var pos = html.indexOf('&');
    282     if (pos < 0) { return html; }
    283     // Handle numeric entities specially.  We can't use functional substitution
    284     // since that doesn't work in older versions of Safari.
    285     // These should be rare since most browsers convert them to normal chars.
    286     for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) {
    287       var end = html.indexOf(';', pos);
    288       if (end >= 0) {
    289         var num = html.substring(pos + 3, end);
    290         var radix = 10;
    291         if (num && num.charAt(0) === 'x') {
    292           num = num.substring(1);
    293           radix = 16;
    294         }
    295         var codePoint = parseInt(num, radix);
    296         if (!isNaN(codePoint)) {
    297           html = (html.substring(0, pos) + String.fromCharCode(codePoint) +
    298                   html.substring(end + 1));
    299         }
    300       }
    301     }
    302 
    303     return html.replace(pr_ltEnt, '<')
    304         .replace(pr_gtEnt, '>')
    305         .replace(pr_aposEnt, "'")
    306         .replace(pr_quotEnt, '"')
    307         .replace(pr_ampEnt, '&')
    308         .replace(pr_nbspEnt, ' ');
    309   }
    310 
    311   /** is the given node's innerHTML normally unescaped? */
    312   function isRawContent(node) {
    313     return 'XMP' === node.tagName;
    314   }
    315 
    316   function normalizedHtml(node, out) {
    317     switch (node.nodeType) {
    318       case 1:  // an element
    319         var name = node.tagName.toLowerCase();
    320         out.push('<', name);
    321         for (var i = 0; i < node.attributes.length; ++i) {
    322           var attr = node.attributes[i];
    323           if (!attr.specified) { continue; }
    324           out.push(' ');
    325           normalizedHtml(attr, out);
    326         }
    327         out.push('>');
    328         for (var child = node.firstChild; child; child = child.nextSibling) {
    329           normalizedHtml(child, out);
    330         }
    331         if (node.firstChild || !/^(?:br|link|img)$/.test(name)) {
    332           out.push('<\/', name, '>');
    333         }
    334         break;
    335       case 2: // an attribute
    336         out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"');
    337         break;
    338       case 3: case 4: // text
    339         out.push(textToHtml(node.nodeValue));
    340         break;
    341     }
    342   }
    343 
    344   var PR_innerHtmlWorks = null;
    345   function getInnerHtml(node) {
    346     // inner html is hopelessly broken in Safari 2.0.4 when the content is
    347     // an html description of well formed XML and the containing tag is a PRE
    348     // tag, so we detect that case and emulate innerHTML.
    349     if (null === PR_innerHtmlWorks) {
    350       var testNode = document.createElement('PRE');
    351       testNode.appendChild(
    352           document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />'));
    353       PR_innerHtmlWorks = !/</.test(testNode.innerHTML);
    354     }
    355 
    356     if (PR_innerHtmlWorks) {
    357       var content = node.innerHTML;
    358       // XMP tags contain unescaped entities so require special handling.
    359       if (isRawContent(node)) {
    360         content = textToHtml(content);
    361       }
    362       return content;
    363     }
    364 
    365     var out = [];
    366     for (var child = node.firstChild; child; child = child.nextSibling) {
    367       normalizedHtml(child, out);
    368     }
    369     return out.join('');
    370   }
    371 
    372   /** returns a function that expand tabs to spaces.  This function can be fed
    373     * successive chunks of text, and will maintain its own internal state to
    374     * keep track of how tabs are expanded.
    375     * @return {function (string) : string} a function that takes
    376     *   plain text and return the text with tabs expanded.
    377     * @private
    378     */
    379   function makeTabExpander(tabWidth) {
    380     var SPACES = '                ';
    381     var charInLine = 0;
    382 
    383     return function (plainText) {
    384       // walk over each character looking for tabs and newlines.
    385       // On tabs, expand them.  On newlines, reset charInLine.
    386       // Otherwise increment charInLine
    387       var out = null;
    388       var pos = 0;
    389       for (var i = 0, n = plainText.length; i < n; ++i) {
    390         var ch = plainText.charAt(i);
    391 
    392         switch (ch) {
    393           case '\t':
    394             if (!out) { out = []; }
    395             out.push(plainText.substring(pos, i));
    396             // calculate how much space we need in front of this part
    397             // nSpaces is the amount of padding -- the number of spaces needed
    398             // to move us to the next column, where columns occur at factors of
    399             // tabWidth.
    400             var nSpaces = tabWidth - (charInLine % tabWidth);
    401             charInLine += nSpaces;
    402             for (; nSpaces >= 0; nSpaces -= SPACES.length) {
    403               out.push(SPACES.substring(0, nSpaces));
    404             }
    405             pos = i + 1;
    406             break;
    407           case '\n':
    408             charInLine = 0;
    409             break;
    410           default:
    411             ++charInLine;
    412         }
    413       }
    414       if (!out) { return plainText; }
    415       out.push(plainText.substring(pos));
    416       return out.join('');
    417     };
    418   }
    419 
    420   // The below pattern matches one of the following
    421   // (1) /[^<]+/ : A run of characters other than '<'
    422   // (2) /<!--.*?-->/: an HTML comment
    423   // (3) /<!\[CDATA\[.*?\]\]>/: a cdata section
    424   // (3) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted
    425   // (4) /</ : A '<' that does not begin a larger chunk.  Treated as 1
    426   var pr_chunkPattern =
    427   /(?:[^<]+|<!--[\s\S]*?-->|<!\[CDATA\[([\s\S]*?)\]\]>|<\/?[a-zA-Z][^>]*>|<)/g;
    428   var pr_commentPrefix = /^<!--/;
    429   var pr_cdataPrefix = /^<\[CDATA\[/;
    430   var pr_brPrefix = /^<br\b/i;
    431   var pr_tagNameRe = /^<(\/?)([a-zA-Z]+)/;
    432 
    433   /** split markup into chunks of html tags (style null) and
    434     * plain text (style {@link #PR_PLAIN}), converting tags which are
    435     * significant for tokenization (<br>) into their textual equivalent.
    436     *
    437     * @param {string} s html where whitespace is considered significant.
    438     * @return {Object} source code and extracted tags.
    439     * @private
    440     */
    441   function extractTags(s) {
    442     // since the pattern has the 'g' modifier and defines no capturing groups,
    443     // this will return a list of all chunks which we then classify and wrap as
    444     // PR_Tokens
    445     var matches = s.match(pr_chunkPattern);
    446     var sourceBuf = [];
    447     var sourceBufLen = 0;
    448     var extractedTags = [];
    449     if (matches) {
    450       for (var i = 0, n = matches.length; i < n; ++i) {
    451         var match = matches[i];
    452         if (match.length > 1 && match.charAt(0) === '<') {
    453           if (pr_commentPrefix.test(match)) { continue; }
    454           if (pr_cdataPrefix.test(match)) {
    455             // strip CDATA prefix and suffix.  Don't unescape since it's CDATA
    456             sourceBuf.push(match.substring(9, match.length - 3));
    457             sourceBufLen += match.length - 12;
    458           } else if (pr_brPrefix.test(match)) {
    459             // <br> tags are lexically significant so convert them to text.
    460             // This is undone later.
    461             sourceBuf.push('\n');
    462             ++sourceBufLen;
    463           } else {
    464             if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) {
    465               // A <span class="nocode"> will start a section that should be
    466               // ignored.  Continue walking the list until we see a matching end
    467               // tag.
    468               var name = match.match(pr_tagNameRe)[2];
    469               var depth = 1;
    470               end_tag_loop:
    471               for (var j = i + 1; j < n; ++j) {
    472                 var name2 = matches[j].match(pr_tagNameRe);
    473                 if (name2 && name2[2] === name) {
    474                   if (name2[1] === '/') {
    475                     if (--depth === 0) { break end_tag_loop; }
    476                   } else {
    477                     ++depth;
    478                   }
    479                 }
    480               }
    481               if (j < n) {
    482                 extractedTags.push(
    483                     sourceBufLen, matches.slice(i, j + 1).join(''));
    484                 i = j;
    485               } else {  // Ignore unclosed sections.
    486                 extractedTags.push(sourceBufLen, match);
    487               }
    488             } else {
    489               extractedTags.push(sourceBufLen, match);
    490             }
    491           }
    492         } else {
    493           var literalText = htmlToText(match);
    494           sourceBuf.push(literalText);
    495           sourceBufLen += literalText.length;
    496         }
    497       }
    498     }
    499     return { source: sourceBuf.join(''), tags: extractedTags };
    500   }
    501 
    502   /** True if the given tag contains a class attribute with the nocode class. */
    503   function isNoCodeTag(tag) {
    504     return !!tag
    505         // First canonicalize the representation of attributes
    506         .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g,
    507                  ' $1="$2$3$4"')
    508         // Then look for the attribute we want.
    509         .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/);
    510   }
    511 
    512   /** Given triples of [style, pattern, context] returns a lexing function,
    513     * The lexing function interprets the patterns to find token boundaries and
    514     * returns a decoration list of the form
    515     * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
    516     * where index_n is an index into the sourceCode, and style_n is a style
    517     * constant like PR_PLAIN.  index_n-1 <= index_n, and style_n-1 applies to
    518     * all characters in sourceCode[index_n-1:index_n].
    519     *
    520     * The stylePatterns is a list whose elements have the form
    521     * [style : string, pattern : RegExp, context : RegExp, shortcut : string].
    522     &
    523     * Style is a style constant like PR_PLAIN.
    524     *
    525     * Pattern must only match prefixes, and if it matches a prefix and context
    526     * is null or matches the last non-comment token parsed, then that match is
    527     * considered a token with the same style.
    528     *
    529     * Context is applied to the last non-whitespace, non-comment token
    530     * recognized.
    531     *
    532     * Shortcut is an optional string of characters, any of which, if the first
    533     * character, gurantee that this pattern and only this pattern matches.
    534     *
    535     * @param {Array} shortcutStylePatterns patterns that always start with
    536     *   a known character.  Must have a shortcut string.
    537     * @param {Array} fallthroughStylePatterns patterns that will be tried in
    538     *   order if the shortcut ones fail.  May have shortcuts.
    539     *
    540     * @return {function (string, number?) : Array.<number|string>} a
    541     *   function that takes source code and returns a list of decorations.
    542     */
    543   function createSimpleLexer(shortcutStylePatterns,
    544                              fallthroughStylePatterns) {
    545     var shortcuts = {};
    546     (function () {
    547       var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
    548       for (var i = allPatterns.length; --i >= 0;) {
    549         var patternParts = allPatterns[i];
    550         var shortcutChars = patternParts[3];
    551         if (shortcutChars) {
    552           for (var c = shortcutChars.length; --c >= 0;) {
    553             shortcuts[shortcutChars.charAt(c)] = patternParts;
    554           }
    555         }
    556       }
    557     })();
    558 
    559     var nPatterns = fallthroughStylePatterns.length;
    560     var notWs = /\S/;
    561 
    562     return function (sourceCode, opt_basePos) {
    563       opt_basePos = opt_basePos || 0;
    564       var decorations = [opt_basePos, PR_PLAIN];
    565       var lastToken = '';
    566       var pos = 0;  // index into sourceCode
    567       var tail = sourceCode;
    568 
    569       while (tail.length) {
    570         var style;
    571         var token = null;
    572         var match;
    573 
    574         var patternParts = shortcuts[tail.charAt(0)];
    575         if (patternParts) {
    576           match = tail.match(patternParts[1]);
    577           token = match[0];
    578           style = patternParts[0];
    579         } else {
    580           for (var i = 0; i < nPatterns; ++i) {
    581             patternParts = fallthroughStylePatterns[i];
    582             var contextPattern = patternParts[2];
    583             if (contextPattern && !contextPattern.test(lastToken)) {
    584               // rule can't be used
    585               continue;
    586             }
    587             match = tail.match(patternParts[1]);
    588             if (match) {
    589               token = match[0];
    590               style = patternParts[0];
    591               break;
    592             }
    593           }
    594 
    595           if (!token) {  // make sure that we make progress
    596             style = PR_PLAIN;
    597             token = tail.substring(0, 1);
    598           }
    599         }
    600 
    601         decorations.push(opt_basePos + pos, style);
    602         pos += token.length;
    603         tail = tail.substring(token.length);
    604         if (style !== PR_COMMENT && notWs.test(token)) { lastToken = token; }
    605       }
    606       return decorations;
    607     };
    608   }
    609 
    610   var PR_MARKUP_LEXER = createSimpleLexer([], [
    611       [PR_PLAIN,       /^[^<]+/, null],
    612       [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/, null],
    613       [PR_COMMENT,     /^<!--[\s\S]*?(?:-->|$)/, null],
    614       [PR_SOURCE,      /^<\?[\s\S]*?(?:\?>|$)/, null],
    615       [PR_SOURCE,      /^<%[\s\S]*?(?:%>|$)/, null],
    616       [PR_SOURCE,
    617        // Tags whose content is not escaped, and which contain source code.
    618        /^<(script|style|xmp)\b[^>]*>[\s\S]*?<\/\1\b[^>]*>/i, null],
    619       [PR_TAG,         /^<\/?\w[^<>]*>/, null]
    620       ]);
    621   // Splits any of the source|style|xmp entries above into a start tag,
    622   // source content, and end tag.
    623   var PR_SOURCE_CHUNK_PARTS = /^(<[^>]*>)([\s\S]*)(<\/[^>]*>)$/;
    624   /** split markup on tags, comments, application directives, and other top
    625     * level constructs.  Tags are returned as a single token - attributes are
    626     * not yet broken out.
    627     * @private
    628     */
    629   function tokenizeMarkup(source) {
    630     var decorations = PR_MARKUP_LEXER(source);
    631     for (var i = 0; i < decorations.length; i += 2) {
    632       if (decorations[i + 1] === PR_SOURCE) {
    633         var start, end;
    634         start = decorations[i];
    635         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
    636         // Split out start and end script tags as actual tags, and leave the
    637         // body with style SCRIPT.
    638         var sourceChunk = source.substring(start, end);
    639         var match = sourceChunk.match(PR_SOURCE_CHUNK_PARTS);
    640         if (match) {
    641           decorations.splice(
    642               i, 2,
    643               start, PR_TAG,  // the open chunk
    644               start + match[1].length, PR_SOURCE,
    645               start + match[1].length + (match[2] || '').length, PR_TAG);
    646         }
    647       }
    648     }
    649     return decorations;
    650   }
    651 
    652   var PR_TAG_LEXER = createSimpleLexer([
    653       [PR_ATTRIB_VALUE, /^\'[^\']*(?:\'|$)/, null, "'"],
    654       [PR_ATTRIB_VALUE, /^\"[^\"]*(?:\"|$)/, null, '"'],
    655       [PR_PUNCTUATION,  /^[<>\/=]+/, null, '<>/=']
    656       ], [
    657       [PR_TAG,          /^[\w:\-]+/, /^</],
    658       [PR_ATTRIB_VALUE, /^[\w\-]+/, /^=/],
    659       [PR_ATTRIB_NAME,  /^[\w:\-]+/, null],
    660       [PR_PLAIN,        /^\s+/, null, ' \t\r\n']
    661       ]);
    662   /** split tags attributes and their values out from the tag name, and
    663     * recursively lex source chunks.
    664     * @private
    665     */
    666   function splitTagAttributes(source, decorations) {
    667     for (var i = 0; i < decorations.length; i += 2) {
    668       var style = decorations[i + 1];
    669       if (style === PR_TAG) {
    670         var start, end;
    671         start = decorations[i];
    672         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
    673         var chunk = source.substring(start, end);
    674         var subDecorations = PR_TAG_LEXER(chunk, start);
    675         spliceArrayInto(subDecorations, decorations, i, 2);
    676         i += subDecorations.length - 2;
    677       }
    678     }
    679     return decorations;
    680   }
    681 
    682   /** returns a function that produces a list of decorations from source text.
    683     *
    684     * This code treats ", ', and ` as string delimiters, and \ as a string
    685     * escape.  It does not recognize perl's qq() style strings.
    686     * It has no special handling for double delimiter escapes as in basic, or
    687     * the tripled delimiters used in python, but should work on those regardless
    688     * although in those cases a single string literal may be broken up into
    689     * multiple adjacent string literals.
    690     *
    691     * It recognizes C, C++, and shell style comments.
    692     *
    693     * @param {Object} options a set of optional parameters.
    694     * @return {function (string) : Array.<string|number>} a
    695     *     decorator that takes sourceCode as plain text and that returns a
    696     *     decoration list
    697     */
    698   function sourceDecorator(options) {
    699     var shortcutStylePatterns = [], fallthroughStylePatterns = [];
    700     if (options.tripleQuotedStrings) {
    701       // '''multi-line-string''', 'single-line-string', and double-quoted
    702       shortcutStylePatterns.push(
    703           [PR_STRING,  /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
    704            null, '\'"']);
    705     } else if (options.multiLineStrings) {
    706       // 'multi-line-string', "multi-line-string"
    707       shortcutStylePatterns.push(
    708           [PR_STRING,  /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
    709            null, '\'"`']);
    710     } else {
    711       // 'single-line-string', "single-line-string"
    712       shortcutStylePatterns.push(
    713           [PR_STRING,
    714            /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
    715            null, '"\'']);
    716     }
    717     fallthroughStylePatterns.push(
    718         [PR_PLAIN,   /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n']);
    719     if (options.hashComments) {
    720       shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
    721     }
    722     if (options.cStyleComments) {
    723       fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
    724       fallthroughStylePatterns.push(
    725           [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
    726     }
    727     if (options.regexLiterals) {
    728       var REGEX_LITERAL = (
    729           // A regular expression literal starts with a slash that is
    730           // not followed by * or / so that it is not confused with
    731           // comments.
    732           '^/(?=[^/*])'
    733           // and then contains any number of raw characters,
    734           + '(?:[^/\\x5B\\x5C]'
    735           // escape sequences (\x5C),
    736           +    '|\\x5C[\\s\\S]'
    737           // or non-nesting character sets (\x5B\x5D);
    738           +    '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+'
    739           // finally closed by a /.
    740           + '(?:/|$)');
    741       fallthroughStylePatterns.push(
    742           [PR_STRING, new RegExp(REGEX_LITERAL), REGEXP_PRECEDER_PATTERN]);
    743     }
    744 
    745     var keywords = wordSet(options.keywords);
    746 
    747     options = null;
    748 
    749     /** splits the given string into comment, string, and "other" tokens.
    750       * @param {string} sourceCode as plain text
    751       * @return {Array.<number|string>} a decoration list.
    752       * @private
    753       */
    754     var splitStringAndCommentTokens = createSimpleLexer(
    755         shortcutStylePatterns, fallthroughStylePatterns);
    756 
    757     var styleLiteralIdentifierPuncRecognizer = createSimpleLexer([], [
    758         [PR_PLAIN,       /^\s+/, null, ' \r\n'],
    759         // TODO(mikesamuel): recognize non-latin letters and numerals in idents
    760         [PR_PLAIN,       /^[a-z_$@][a-z_$@0-9]*/i, null],
    761         // A hex number
    762         [PR_LITERAL,     /^0x[a-f0-9]+[a-z]/i, null],
    763         // An octal or decimal number, possibly in scientific notation
    764         [PR_LITERAL,
    765          /^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i,
    766          null, '123456789'],
    767         [PR_PUNCTUATION, /^[^\s\w\.$@]+/, null]
    768         // Fallback will handle decimal points not adjacent to a digit
    769       ]);
    770 
    771     /** splits plain text tokens into more specific tokens, and then tries to
    772       * recognize keywords, and types.
    773       * @private
    774       */
    775     function splitNonStringNonCommentTokens(source, decorations) {
    776       for (var i = 0; i < decorations.length; i += 2) {
    777         var style = decorations[i + 1];
    778         if (style === PR_PLAIN) {
    779           var start, end, chunk, subDecs;
    780           start = decorations[i];
    781           end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
    782           chunk = source.substring(start, end);
    783           subDecs = styleLiteralIdentifierPuncRecognizer(chunk, start);
    784           for (var j = 0, m = subDecs.length; j < m; j += 2) {
    785             var subStyle = subDecs[j + 1];
    786             if (subStyle === PR_PLAIN) {
    787               var subStart = subDecs[j];
    788               var subEnd = j + 2 < m ? subDecs[j + 2] : chunk.length;
    789               var token = source.substring(subStart, subEnd);
    790               if (token === '.') {
    791                 subDecs[j + 1] = PR_PUNCTUATION;
    792               } else if (token in keywords) {
    793                 subDecs[j + 1] = PR_KEYWORD;
    794               } else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) {
    795                 // classify types and annotations using Java's style conventions
    796                 subDecs[j + 1] = token.charAt(0) === '@' ? PR_LITERAL : PR_TYPE;
    797               }
    798             }
    799           }
    800           spliceArrayInto(subDecs, decorations, i, 2);
    801           i += subDecs.length - 2;
    802         }
    803       }
    804       return decorations;
    805     }
    806 
    807     return function (sourceCode) {
    808       // Split into strings, comments, and other.
    809       // We do this because strings and comments are easily recognizable and can
    810       // contain stuff that looks like other tokens, so we want to mark those
    811       // early so we don't recurse into them.
    812       var decorations = splitStringAndCommentTokens(sourceCode);
    813 
    814       // Split non comment|string tokens on whitespace and word boundaries
    815       decorations = splitNonStringNonCommentTokens(sourceCode, decorations);
    816 
    817       return decorations;
    818     };
    819   }
    820 
    821   var decorateSource = sourceDecorator({
    822         keywords: ALL_KEYWORDS,
    823         hashComments: true,
    824         cStyleComments: true,
    825         multiLineStrings: true,
    826         regexLiterals: true
    827       });
    828 
    829   /** identify regions of markup that are really source code, and recursivley
    830     * lex them.
    831     * @private
    832     */
    833   function splitSourceNodes(source, decorations) {
    834     for (var i = 0; i < decorations.length; i += 2) {
    835       var style = decorations[i + 1];
    836       if (style === PR_SOURCE) {
    837         // Recurse using the non-markup lexer
    838         var start, end;
    839         start = decorations[i];
    840         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
    841         var subDecorations = decorateSource(source.substring(start, end));
    842         for (var j = 0, m = subDecorations.length; j < m; j += 2) {
    843           subDecorations[j] += start;
    844         }
    845         spliceArrayInto(subDecorations, decorations, i, 2);
    846         i += subDecorations.length - 2;
    847       }
    848     }
    849     return decorations;
    850   }
    851 
    852   /** identify attribute values that really contain source code and recursively
    853     * lex them.
    854     * @private
    855     */
    856   function splitSourceAttributes(source, decorations) {
    857     var nextValueIsSource = false;
    858     for (var i = 0; i < decorations.length; i += 2) {
    859       var style = decorations[i + 1];
    860       var start, end;
    861       if (style === PR_ATTRIB_NAME) {
    862         start = decorations[i];
    863         end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
    864         nextValueIsSource = /^on|^style$/i.test(source.substring(start, end));
    865       } else if (style === PR_ATTRIB_VALUE) {
    866         if (nextValueIsSource) {
    867           start = decorations[i];
    868           end = i + 2 < decorations.length ? decorations[i + 2] : source.length;
    869           var attribValue = source.substring(start, end);
    870           var attribLen = attribValue.length;
    871           var quoted =
    872               (attribLen >= 2 && /^[\"\']/.test(attribValue) &&
    873                attribValue.charAt(0) === attribValue.charAt(attribLen - 1));
    874 
    875           var attribSource;
    876           var attribSourceStart;
    877           var attribSourceEnd;
    878           if (quoted) {
    879             attribSourceStart = start + 1;
    880             attribSourceEnd = end - 1;
    881             attribSource = attribValue;
    882           } else {
    883             attribSourceStart = start + 1;
    884             attribSourceEnd = end - 1;
    885             attribSource = attribValue.substring(1, attribValue.length - 1);
    886           }
    887 
    888           var attribSourceDecorations = decorateSource(attribSource);
    889           for (var j = 0, m = attribSourceDecorations.length; j < m; j += 2) {
    890             attribSourceDecorations[j] += attribSourceStart;
    891           }
    892 
    893           if (quoted) {
    894             attribSourceDecorations.push(attribSourceEnd, PR_ATTRIB_VALUE);
    895             spliceArrayInto(attribSourceDecorations, decorations, i + 2, 0);
    896           } else {
    897             spliceArrayInto(attribSourceDecorations, decorations, i, 2);
    898           }
    899         }
    900         nextValueIsSource = false;
    901       }
    902     }
    903     return decorations;
    904   }
    905 
    906   /** returns a decoration list given a string of markup.
    907     *
    908     * This code recognizes a number of constructs.
    909     * <!-- ... --> comment
    910     * <!\w ... >   declaration
    911     * <\w ... >    tag
    912     * </\w ... >   tag
    913     * <?...?>      embedded source
    914     * <%...%>      embedded source
    915     * &[#\w]...;   entity
    916     *
    917     * It does not recognizes %foo; doctype entities from  .
    918     *
    919     * It will recurse into any <style>, <script>, and on* attributes using
    920     * PR_lexSource.
    921     */
    922   function decorateMarkup(sourceCode) {
    923     // This function works as follows:
    924     // 1) Start by splitting the markup into text and tag chunks
    925     //    Input:  string s
    926     //    Output: List<PR_Token> where style in (PR_PLAIN, null)
    927     // 2) Then split the text chunks further into comments, declarations,
    928     //    tags, etc.
    929     //    After each split, consider whether the token is the start of an
    930     //    embedded source section, i.e. is an open <script> tag.  If it is, find
    931     //    the corresponding close token, and don't bother to lex in between.
    932     //    Input:  List<string>
    933     //    Output: List<PR_Token> with style in
    934     //            (PR_TAG, PR_PLAIN, PR_SOURCE, null)
    935     // 3) Finally go over each tag token and split out attribute names and
    936     //    values.
    937     //    Input:  List<PR_Token>
    938     //    Output: List<PR_Token> where style in
    939     //            (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null)
    940     var decorations = tokenizeMarkup(sourceCode);
    941     decorations = splitTagAttributes(sourceCode, decorations);
    942     decorations = splitSourceNodes(sourceCode, decorations);
    943     decorations = splitSourceAttributes(sourceCode, decorations);
    944     return decorations;
    945   }
    946 
    947   /**
    948     * @param {string} sourceText plain text
    949     * @param {Array.<number|string>} extractedTags chunks of raw html preceded
    950     *   by their position in sourceText in order.
    951     * @param {Array.<number|string>} decorations style classes preceded by their
    952     *   position in sourceText in order.
    953     * @return {string} html
    954     * @private
    955     */
    956   function recombineTagsAndDecorations(sourceText, extractedTags, decorations) {
    957     var html = [];
    958     // index past the last char in sourceText written to html
    959     var outputIdx = 0;
    960 
    961     var openDecoration = null;
    962     var currentDecoration = null;
    963     var tagPos = 0;  // index into extractedTags
    964     var decPos = 0;  // index into decorations
    965     var tabExpander = makeTabExpander(PR_TAB_WIDTH);
    966 
    967     var adjacentSpaceRe = /([\r\n ]) /g;
    968     var startOrSpaceRe = /(^| ) /gm;
    969     var newlineRe = /\r\n?|\n/g;
    970     var trailingSpaceRe = /[ \r\n]$/;
    971     var lastWasSpace = true;  // the last text chunk emitted ended with a space.
    972 
    973     // A helper function that is responsible for opening sections of decoration
    974     // and outputing properly escaped chunks of source
    975     function emitTextUpTo(sourceIdx) {
    976       if (sourceIdx > outputIdx) {
    977         if (openDecoration && openDecoration !== currentDecoration) {
    978           // Close the current decoration
    979           html.push('</span>');
    980           openDecoration = null;
    981         }
    982         if (!openDecoration && currentDecoration) {
    983           openDecoration = currentDecoration;
    984           html.push('<span class="', openDecoration, '">');
    985         }
    986         // This interacts badly with some wikis which introduces paragraph tags
    987         // into pre blocks for some strange reason.
    988         // It's necessary for IE though which seems to lose the preformattedness
    989         // of <pre> tags when their innerHTML is assigned.
    990         // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html
    991         // and it serves to undo the conversion of <br>s to newlines done in
    992         // chunkify.
    993         var htmlChunk = textToHtml(
    994             tabExpander(sourceText.substring(outputIdx, sourceIdx)))
    995             .replace(lastWasSpace
    996                      ? startOrSpaceRe
    997                      : adjacentSpaceRe, '$1&nbsp;');
    998         // Keep track of whether we need to escape space at the beginning of the
    999         // next chunk.
   1000         lastWasSpace = trailingSpaceRe.test(htmlChunk);
   1001         html.push(htmlChunk.replace(newlineRe, '<br />'));
   1002         outputIdx = sourceIdx;
   1003       }
   1004     }
   1005 
   1006     while (true) {
   1007       // Determine if we're going to consume a tag this time around.  Otherwise
   1008       // we consume a decoration or exit.
   1009       var outputTag;
   1010       if (tagPos < extractedTags.length) {
   1011         if (decPos < decorations.length) {
   1012           // Pick one giving preference to extractedTags since we shouldn't open
   1013           // a new style that we're going to have to immediately close in order
   1014           // to output a tag.
   1015           outputTag = extractedTags[tagPos] <= decorations[decPos];
   1016         } else {
   1017           outputTag = true;
   1018         }
   1019       } else {
   1020         outputTag = false;
   1021       }
   1022       // Consume either a decoration or a tag or exit.
   1023       if (outputTag) {
   1024         emitTextUpTo(extractedTags[tagPos]);
   1025         if (openDecoration) {
   1026           // Close the current decoration
   1027           html.push('</span>');
   1028           openDecoration = null;
   1029         }
   1030         html.push(extractedTags[tagPos + 1]);
   1031         tagPos += 2;
   1032       } else if (decPos < decorations.length) {
   1033         emitTextUpTo(decorations[decPos]);
   1034         currentDecoration = decorations[decPos + 1];
   1035         decPos += 2;
   1036       } else {
   1037         break;
   1038       }
   1039     }
   1040     emitTextUpTo(sourceText.length);
   1041     if (openDecoration) {
   1042       html.push('</span>');
   1043     }
   1044 
   1045     return html.join('');
   1046   }
   1047 
   1048   /** Maps language-specific file extensions to handlers. */
   1049   var langHandlerRegistry = {};
   1050   /** Register a language handler for the given file extensions.
   1051     * @param {function (string) : Array.<number|string>} handler
   1052     *     a function from source code to a list of decorations.
   1053     * @param {Array.<string>} fileExtensions
   1054     */
   1055   function registerLangHandler(handler, fileExtensions) {
   1056     for (var i = fileExtensions.length; --i >= 0;) {
   1057       var ext = fileExtensions[i];
   1058       if (!langHandlerRegistry.hasOwnProperty(ext)) {
   1059         langHandlerRegistry[ext] = handler;
   1060       } else if ('console' in window) {
   1061         console.log('cannot override language handler %s', ext);
   1062       }
   1063     }
   1064   }
   1065   registerLangHandler(decorateSource, ['default-code']);
   1066   registerLangHandler(decorateMarkup,
   1067                       ['default-markup', 'html', 'htm', 'xhtml', 'xml', 'xsl']);
   1068   registerLangHandler(sourceDecorator({
   1069           keywords: CPP_KEYWORDS,
   1070           hashComments: true,
   1071           cStyleComments: true
   1072         }), ['c', 'cc', 'cpp', 'cxx', 'cyc']);
   1073   registerLangHandler(sourceDecorator({
   1074           keywords: CSHARP_KEYWORDS,
   1075           hashComments: true,
   1076           cStyleComments: true
   1077         }), ['cs']);
   1078   registerLangHandler(sourceDecorator({
   1079           keywords: JAVA_KEYWORDS,
   1080           cStyleComments: true
   1081         }), ['java']);
   1082   registerLangHandler(sourceDecorator({
   1083           keywords: SH_KEYWORDS,
   1084           hashComments: true,
   1085           multiLineStrings: true
   1086         }), ['bsh', 'csh', 'sh']);
   1087   registerLangHandler(sourceDecorator({
   1088           keywords: PYTHON_KEYWORDS,
   1089           hashComments: true,
   1090           multiLineStrings: true,
   1091           tripleQuotedStrings: true
   1092         }), ['cv', 'py']);
   1093   registerLangHandler(sourceDecorator({
   1094           keywords: PERL_KEYWORDS,
   1095           hashComments: true,
   1096           multiLineStrings: true,
   1097           regexLiterals: true
   1098         }), ['perl', 'pl', 'pm']);
   1099   registerLangHandler(sourceDecorator({
   1100           keywords: RUBY_KEYWORDS,
   1101           hashComments: true,
   1102           multiLineStrings: true,
   1103           regexLiterals: true
   1104         }), ['rb']);
   1105   registerLangHandler(sourceDecorator({
   1106           keywords: JSCRIPT_KEYWORDS,
   1107           cStyleComments: true,
   1108           regexLiterals: true
   1109         }), ['js']);
   1110 
   1111   function prettyPrintOne(sourceCodeHtml, opt_langExtension) {
   1112     try {
   1113       // Extract tags, and convert the source code to plain text.
   1114       var sourceAndExtractedTags = extractTags(sourceCodeHtml);
   1115       /** Plain text. @type {string} */
   1116       var source = sourceAndExtractedTags.source;
   1117 
   1118       /** Even entries are positions in source in ascending order.  Odd entries
   1119         * are tags that were extracted at that position.
   1120         * @type {Array.<number|string>}
   1121         */
   1122       var extractedTags = sourceAndExtractedTags.tags;
   1123 
   1124       // Pick a lexer and apply it.
   1125       if (!langHandlerRegistry.hasOwnProperty(opt_langExtension)) {
   1126         // Treat it as markup if the first non whitespace character is a < and
   1127         // the last non-whitespace character is a >.
   1128         opt_langExtension =
   1129             /^\s*</.test(source) ? 'default-markup' : 'default-code';
   1130       }
   1131 
   1132       /** Even entries are positions in source in ascending order.  Odd enties
   1133         * are style markers (e.g., PR_COMMENT) that run from that position until
   1134         * the end.
   1135         * @type {Array.<number|string>}
   1136         */
   1137       var decorations = langHandlerRegistry[opt_langExtension].call({}, source);
   1138 
   1139       // Integrate the decorations and tags back into the source code to produce
   1140       // a decorated html string.
   1141       return recombineTagsAndDecorations(source, extractedTags, decorations);
   1142     } catch (e) {
   1143       if ('console' in window) {
   1144         console.log(e);
   1145         console.trace();
   1146       }
   1147       return sourceCodeHtml;
   1148     }
   1149   }
   1150 
   1151   function prettyPrint(opt_whenDone) {
   1152     var isIE6 = _pr_isIE6();
   1153 
   1154     // fetch a list of nodes to rewrite
   1155     var codeSegments = [
   1156         document.getElementsByTagName('pre'),
   1157         document.getElementsByTagName('code'),
   1158         document.getElementsByTagName('xmp') ];
   1159     var elements = [];
   1160     for (var i = 0; i < codeSegments.length; ++i) {
   1161       for (var j = 0; j < codeSegments[i].length; ++j) {
   1162         elements.push(codeSegments[i][j]);
   1163       }
   1164     }
   1165     codeSegments = null;
   1166 
   1167     // the loop is broken into a series of continuations to make sure that we
   1168     // don't make the browser unresponsive when rewriting a large page.
   1169     var k = 0;
   1170 
   1171     function doWork() {
   1172       var endTime = (PR_SHOULD_USE_CONTINUATION ?
   1173                      new Date().getTime() + 250 /* ms */ :
   1174                      Infinity);
   1175       for (; k < elements.length && new Date().getTime() < endTime; k++) {
   1176         var cs = elements[k];
   1177         if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
   1178           // If the classes includes a language extensions, use it.
   1179           // Language extensions can be specified like
   1180           //     <pre class="prettyprint lang-cpp">
   1181           // the language extension "cpp" is used to find a language handler as
   1182           // passed to PR_registerLangHandler.
   1183           var langExtension = cs.className.match(/\blang-(\w+)\b/);
   1184           if (langExtension) { langExtension = langExtension[1]; }
   1185 
   1186           // make sure this is not nested in an already prettified element
   1187           var nested = false;
   1188           for (var p = cs.parentNode; p; p = p.parentNode) {
   1189             if ((p.tagName === 'pre' || p.tagName === 'code' ||
   1190                  p.tagName === 'xmp') &&
   1191                 p.className && p.className.indexOf('prettyprint') >= 0) {
   1192               nested = true;
   1193               break;
   1194             }
   1195           }
   1196           if (!nested) {
   1197             // fetch the content as a snippet of properly escaped HTML.
   1198             // Firefox adds newlines at the end.
   1199             var content = getInnerHtml(cs);
   1200             content = content.replace(/(?:\r\n?|\n)$/, '');
   1201 
   1202             // do the pretty printing
   1203             var newContent = prettyPrintOne(content, langExtension);
   1204 
   1205             // push the prettified html back into the tag.
   1206             if (!isRawContent(cs)) {
   1207               // just replace the old html with the new
   1208               cs.innerHTML = newContent;
   1209             } else {
   1210               // we need to change the tag to a <pre> since <xmp>s do not allow
   1211               // embedded tags such as the span tags used to attach styles to
   1212               // sections of source code.
   1213               var pre = document.createElement('PRE');
   1214               for (var i = 0; i < cs.attributes.length; ++i) {
   1215                 var a = cs.attributes[i];
   1216                 if (a.specified) {
   1217                   var aname = a.name.toLowerCase();
   1218                   if (aname === 'class') {
   1219                     pre.className = a.value;  // For IE 6
   1220                   } else {
   1221                     pre.setAttribute(a.name, a.value);
   1222                   }
   1223                 }
   1224               }
   1225               pre.innerHTML = newContent;
   1226 
   1227               // remove the old
   1228               cs.parentNode.replaceChild(pre, cs);
   1229               cs = pre;
   1230             }
   1231 
   1232             // Replace <br>s with line-feeds so that copying and pasting works
   1233             // on IE 6.
   1234             // Doing this on other browsers breaks lots of stuff since \r\n is
   1235             // treated as two newlines on Firefox, and doing this also slows
   1236             // down rendering.
   1237             if (isIE6 && cs.tagName === 'PRE') {
   1238               var lineBreaks = cs.getElementsByTagName('br');
   1239               for (var j = lineBreaks.length; --j >= 0;) {
   1240                 var lineBreak = lineBreaks[j];
   1241                 lineBreak.parentNode.replaceChild(
   1242                     document.createTextNode('\r\n'), lineBreak);
   1243               }
   1244             }
   1245           }
   1246         }
   1247       }
   1248       if (k < elements.length) {
   1249         // finish up in a continuation
   1250         setTimeout(doWork, 250);
   1251       } else if (opt_whenDone) {
   1252         opt_whenDone();
   1253       }
   1254     }
   1255 
   1256     doWork();
   1257   }
   1258 
   1259   window['PR_normalizedHtml'] = normalizedHtml;
   1260   window['prettyPrintOne'] = prettyPrintOne;
   1261   window['prettyPrint'] = prettyPrint;
   1262   window['PR'] = {
   1263         'createSimpleLexer': createSimpleLexer,
   1264         'registerLangHandler': registerLangHandler,
   1265         'sourceDecorator': sourceDecorator,
   1266         'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
   1267         'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
   1268         'PR_COMMENT': PR_COMMENT,
   1269         'PR_DECLARATION': PR_DECLARATION,
   1270         'PR_KEYWORD': PR_KEYWORD,
   1271         'PR_LITERAL': PR_LITERAL,
   1272         'PR_NOCODE': PR_NOCODE,
   1273         'PR_PLAIN': PR_PLAIN,
   1274         'PR_PUNCTUATION': PR_PUNCTUATION,
   1275         'PR_SOURCE': PR_SOURCE,
   1276         'PR_STRING': PR_STRING,
   1277         'PR_TAG': PR_TAG,
   1278         'PR_TYPE': PR_TYPE
   1279       };
   1280 })();
   1281