Home | History | Annotate | Download | only in script_formatter_worker
      1 /*
      2  * Copyright (C) 2011 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 FormatterWorker = {
     31     /**
     32      * @param {string} mimeType
     33      * @return {function(string, function(string, ?string, number, number))}
     34      */
     35     createTokenizer: function(mimeType)
     36     {
     37         var mode = CodeMirror.getMode({indentUnit: 2}, mimeType);
     38         var state = CodeMirror.startState(mode);
     39         function tokenize(line, callback)
     40         {
     41             var stream = new CodeMirror.StringStream(line);
     42             while (!stream.eol()) {
     43                 var style = mode.token(stream, state);
     44                 var value = stream.current();
     45                 callback(value, style, stream.start, stream.start + value.length);
     46                 stream.start = stream.pos;
     47             }
     48         }
     49         return tokenize;
     50     }
     51 };
     52 
     53 /**
     54  * @typedef {{indentString: string, content: string, mimeType: string}}
     55  */
     56 var FormatterParameters;
     57 
     58 var onmessage = function(event) {
     59     var data = /** @type !{method: string, params: !FormatterParameters} */ (event.data);
     60     if (!data.method)
     61         return;
     62 
     63     FormatterWorker[data.method](data.params);
     64 };
     65 
     66 /**
     67  * @param {!FormatterParameters} params
     68  */
     69 FormatterWorker.format = function(params)
     70 {
     71     // Default to a 4-space indent.
     72     var indentString = params.indentString || "    ";
     73     var result = {};
     74 
     75     if (params.mimeType === "text/html") {
     76         var formatter = new FormatterWorker.HTMLFormatter(indentString);
     77         result = formatter.format(params.content);
     78     } else if (params.mimeType === "text/css") {
     79         result.mapping = { original: [0], formatted: [0] };
     80         result.content = FormatterWorker._formatCSS(params.content, result.mapping, 0, 0, indentString);
     81     } else {
     82         result.mapping = { original: [0], formatted: [0] };
     83         result.content = FormatterWorker._formatScript(params.content, result.mapping, 0, 0, indentString);
     84     }
     85     postMessage(result);
     86 }
     87 
     88 /**
     89  * @param {number} totalLength
     90  * @param {number} chunkSize
     91  */
     92 FormatterWorker._chunkCount = function(totalLength, chunkSize)
     93 {
     94     if (totalLength <= chunkSize)
     95         return 1;
     96 
     97     var remainder = totalLength % chunkSize;
     98     var partialLength = totalLength - remainder;
     99     return (partialLength / chunkSize) + (remainder ? 1 : 0);
    100 }
    101 
    102 /**
    103  * @param {!Object} params
    104  */
    105 FormatterWorker.javaScriptOutline = function(params)
    106 {
    107     var chunkSize = 100000; // characters per data chunk
    108     var totalLength = params.content.length;
    109     var lines = params.content.split("\n");
    110     var chunkCount = FormatterWorker._chunkCount(totalLength, chunkSize);
    111     var outlineChunk = [];
    112     var previousIdentifier = null;
    113     var previousToken = null;
    114     var previousTokenType = null;
    115     var currentChunk = 1;
    116     var processedChunkCharacters = 0;
    117     var addedFunction = false;
    118     var isReadingArguments = false;
    119     var argumentsText = "";
    120     var currentFunction = null;
    121     var tokenizer = FormatterWorker.createTokenizer("text/javascript");
    122     for (var i = 0; i < lines.length; ++i) {
    123         var line = lines[i];
    124         tokenizer(line, processToken);
    125     }
    126 
    127     /**
    128      * @param {?string} tokenType
    129      * @return {boolean}
    130      */
    131     function isJavaScriptIdentifier(tokenType)
    132     {
    133         if (!tokenType)
    134             return false;
    135         return tokenType.startsWith("variable") || tokenType.startsWith("property") || tokenType === "def";
    136     }
    137 
    138     /**
    139      * @param {string} tokenValue
    140      * @param {?string} tokenType
    141      * @param {number} column
    142      * @param {number} newColumn
    143      */
    144     function processToken(tokenValue, tokenType, column, newColumn)
    145     {
    146         if (tokenType === "property" && previousTokenType === "property" && (previousToken === "get" || previousToken === "set")) {
    147             currentFunction = { line: i, column: column, name: previousToken + " " + tokenValue };
    148             addedFunction = true;
    149             previousIdentifier = null;
    150         } else if (isJavaScriptIdentifier(tokenType)) {
    151             previousIdentifier = tokenValue;
    152             if (tokenValue && previousToken === "function") {
    153                 // A named function: "function f...".
    154                 currentFunction = { line: i, column: column, name: tokenValue };
    155                 addedFunction = true;
    156                 previousIdentifier = null;
    157             }
    158         } else if (tokenType === "keyword") {
    159             if (tokenValue === "function") {
    160                 if (previousIdentifier && (previousToken === "=" || previousToken === ":")) {
    161                     // Anonymous function assigned to an identifier: "...f = function..."
    162                     // or "funcName: function...".
    163                     currentFunction = { line: i, column: column, name: previousIdentifier };
    164                     addedFunction = true;
    165                     previousIdentifier = null;
    166                 }
    167             }
    168         } else if (tokenValue === "." && isJavaScriptIdentifier(previousTokenType))
    169             previousIdentifier += ".";
    170         else if (tokenValue === "(" && addedFunction)
    171             isReadingArguments = true;
    172         if (isReadingArguments && tokenValue)
    173             argumentsText += tokenValue;
    174 
    175         if (tokenValue === ")" && isReadingArguments) {
    176             addedFunction = false;
    177             isReadingArguments = false;
    178             currentFunction.arguments = argumentsText.replace(/,[\r\n\s]*/g, ", ").replace(/([^,])[\r\n\s]+/g, "$1");
    179             argumentsText = "";
    180             outlineChunk.push(currentFunction);
    181         }
    182 
    183         if (tokenValue.trim().length) {
    184             // Skip whitespace tokens.
    185             previousToken = tokenValue;
    186             previousTokenType = tokenType;
    187         }
    188         processedChunkCharacters += newColumn - column;
    189 
    190         if (processedChunkCharacters >= chunkSize) {
    191             postMessage({ chunk: outlineChunk, total: chunkCount, index: currentChunk++ });
    192             outlineChunk = [];
    193             processedChunkCharacters = 0;
    194         }
    195     }
    196 
    197     postMessage({ chunk: outlineChunk, total: chunkCount, index: chunkCount });
    198 }
    199 
    200 FormatterWorker.CSSParserStates = {
    201     Initial: "Initial",
    202     Selector: "Selector",
    203     Style: "Style",
    204     PropertyName: "PropertyName",
    205     PropertyValue: "PropertyValue",
    206     AtRule: "AtRule",
    207 };
    208 
    209 FormatterWorker.parseCSS = function(params)
    210 {
    211     var chunkSize = 100000; // characters per data chunk
    212     var lines = params.content.split("\n");
    213     var rules = [];
    214     var processedChunkCharacters = 0;
    215 
    216     var state = FormatterWorker.CSSParserStates.Initial;
    217     var rule;
    218     var property;
    219     var UndefTokenType = {};
    220 
    221     /**
    222      * @param {string} tokenValue
    223      * @param {?string} tokenTypes
    224      * @param {number} column
    225      * @param {number} newColumn
    226      */
    227     function processToken(tokenValue, tokenTypes, column, newColumn)
    228     {
    229         var tokenType = tokenTypes ? tokenTypes.split(" ").keySet() : UndefTokenType;
    230         switch (state) {
    231         case FormatterWorker.CSSParserStates.Initial:
    232             if (tokenType["qualifier"] || tokenType["builtin"] || tokenType["tag"]) {
    233                 rule = {
    234                     selectorText: tokenValue,
    235                     lineNumber: lineNumber,
    236                     columNumber: column,
    237                     properties: [],
    238                 };
    239                 state = FormatterWorker.CSSParserStates.Selector;
    240             } else if (tokenType["def"]) {
    241                 rule = {
    242                     atRule: tokenValue,
    243                     lineNumber: lineNumber,
    244                     columNumber: column,
    245                 };
    246                 state = FormatterWorker.CSSParserStates.AtRule;
    247             }
    248             break;
    249         case FormatterWorker.CSSParserStates.Selector:
    250             if (tokenValue === "{" && tokenType === UndefTokenType) {
    251                 rule.selectorText = rule.selectorText.trim();
    252                 state = FormatterWorker.CSSParserStates.Style;
    253             } else {
    254                 rule.selectorText += tokenValue;
    255             }
    256             break;
    257         case FormatterWorker.CSSParserStates.AtRule:
    258             if ((tokenValue === ";" || tokenValue === "{") && tokenType === UndefTokenType) {
    259                 rule.atRule = rule.atRule.trim();
    260                 rules.push(rule);
    261                 state = FormatterWorker.CSSParserStates.Initial;
    262             } else {
    263                 rule.atRule += tokenValue;
    264             }
    265             break;
    266         case FormatterWorker.CSSParserStates.Style:
    267             if (tokenType["meta"] || tokenType["property"]) {
    268                 property = {
    269                     name: tokenValue,
    270                     value: "",
    271                 };
    272                 state = FormatterWorker.CSSParserStates.PropertyName;
    273             } else if (tokenValue === "}" && tokenType === UndefTokenType) {
    274                 rules.push(rule);
    275                 state = FormatterWorker.CSSParserStates.Initial;
    276             }
    277             break;
    278         case FormatterWorker.CSSParserStates.PropertyName:
    279             if (tokenValue === ":" && tokenType === UndefTokenType) {
    280                 property.name = property.name.trim();
    281                 state = FormatterWorker.CSSParserStates.PropertyValue;
    282             } else if (tokenType["property"]) {
    283                 property.name += tokenValue;
    284             }
    285             break;
    286         case FormatterWorker.CSSParserStates.PropertyValue:
    287             if (tokenValue === ";" && tokenType === UndefTokenType) {
    288                 property.value = property.value.trim();
    289                 rule.properties.push(property);
    290                 state = FormatterWorker.CSSParserStates.Style;
    291             } else if (tokenValue === "}" && tokenType === UndefTokenType) {
    292                 property.value = property.value.trim();
    293                 rule.properties.push(property);
    294                 rules.push(rule);
    295                 state = FormatterWorker.CSSParserStates.Initial;
    296             } else if (!tokenType["comment"]) {
    297                 property.value += tokenValue;
    298             }
    299             break;
    300         default:
    301             console.assert(false, "Unknown CSS parser state.");
    302         }
    303         processedChunkCharacters += newColumn - column;
    304         if (processedChunkCharacters > chunkSize) {
    305             postMessage({ chunk: rules, isLastChunk: false });
    306             rules = [];
    307             processedChunkCharacters = 0;
    308         }
    309     }
    310     var tokenizer = FormatterWorker.createTokenizer("text/css");
    311     var lineNumber;
    312     for (lineNumber = 0; lineNumber < lines.length; ++lineNumber) {
    313         var line = lines[lineNumber];
    314         tokenizer(line, processToken);
    315     }
    316     postMessage({ chunk: rules, isLastChunk: true });
    317 }
    318 
    319 /**
    320  * @param {string} content
    321  * @param {!{original: !Array.<number>, formatted: !Array.<number>}} mapping
    322  * @param {number} offset
    323  * @param {number} formattedOffset
    324  * @param {string} indentString
    325  * @return {string}
    326  */
    327 FormatterWorker._formatScript = function(content, mapping, offset, formattedOffset, indentString)
    328 {
    329     var formattedContent;
    330     try {
    331         var tokenizer = new FormatterWorker.JavaScriptTokenizer(content);
    332         var builder = new FormatterWorker.JavaScriptFormattedContentBuilder(tokenizer.content(), mapping, offset, formattedOffset, indentString);
    333         var formatter = new FormatterWorker.JavaScriptFormatter(tokenizer, builder);
    334         formatter.format();
    335         formattedContent = builder.content();
    336     } catch (e) {
    337         formattedContent = content;
    338     }
    339     return formattedContent;
    340 }
    341 
    342 /**
    343  * @param {string} content
    344  * @param {!{original: !Array.<number>, formatted: !Array.<number>}} mapping
    345  * @param {number} offset
    346  * @param {number} formattedOffset
    347  * @param {string} indentString
    348  * @return {string}
    349  */
    350 FormatterWorker._formatCSS = function(content, mapping, offset, formattedOffset, indentString)
    351 {
    352     var formattedContent;
    353     try {
    354         var builder = new FormatterWorker.CSSFormattedContentBuilder(content, mapping, offset, formattedOffset, indentString);
    355         var formatter = new FormatterWorker.CSSFormatter(content, builder);
    356         formatter.format();
    357         formattedContent = builder.content();
    358     } catch (e) {
    359         formattedContent = content;
    360     }
    361     return formattedContent;
    362 }
    363 
    364 /**
    365  * @constructor
    366  * @param {string} indentString
    367  */
    368 FormatterWorker.HTMLFormatter = function(indentString)
    369 {
    370     this._indentString = indentString;
    371 }
    372 
    373 FormatterWorker.HTMLFormatter.prototype = {
    374     /**
    375      * @param {string} content
    376      * @return {!{content: string, mapping: {original: !Array.<number>, formatted: !Array.<number>}}}
    377      */
    378     format: function(content)
    379     {
    380         this.line = content;
    381         this._content = content;
    382         this._formattedContent = "";
    383         this._mapping = { original: [0], formatted: [0] };
    384         this._position = 0;
    385 
    386         var scriptOpened = false;
    387         var styleOpened = false;
    388         var tokenizer = FormatterWorker.createTokenizer("text/html");
    389         var accumulatedTokenValue = "";
    390         var accumulatedTokenStart = 0;
    391 
    392         /**
    393          * @this {FormatterWorker.HTMLFormatter}
    394          */
    395         function processToken(tokenValue, tokenType, tokenStart, tokenEnd) {
    396             if (!tokenType)
    397                 return;
    398             var oldType = tokenType;
    399             tokenType = tokenType.split(" ").keySet();
    400             if (!tokenType["tag"])
    401                 return;
    402             if (tokenType["bracket"] && (tokenValue === "<" || tokenValue === "</")) {
    403                 accumulatedTokenValue = tokenValue;
    404                 accumulatedTokenStart = tokenStart;
    405                 return;
    406             }
    407             accumulatedTokenValue = accumulatedTokenValue + tokenValue.toLowerCase();
    408             if (accumulatedTokenValue === "<script") {
    409                 scriptOpened = true;
    410             } else if (scriptOpened && tokenValue === ">") {
    411                 scriptOpened = false;
    412                 this._scriptStarted(tokenEnd);
    413             } else if (accumulatedTokenValue === "</script") {
    414                 this._scriptEnded(accumulatedTokenStart);
    415             } else if (accumulatedTokenValue === "<style") {
    416                 styleOpened = true;
    417             } else if (styleOpened && tokenValue === ">") {
    418                 styleOpened = false;
    419                 this._styleStarted(tokenEnd);
    420             } else if (accumulatedTokenValue === "</style") {
    421                 this._styleEnded(accumulatedTokenStart);
    422             }
    423             accumulatedTokenValue = "";
    424         }
    425         tokenizer(content, processToken.bind(this));
    426 
    427         this._formattedContent += this._content.substring(this._position);
    428         return { content: this._formattedContent, mapping: this._mapping };
    429     },
    430 
    431     /**
    432      * @param {number} cursor
    433      */
    434     _scriptStarted: function(cursor)
    435     {
    436         this._handleSubFormatterStart(cursor);
    437     },
    438 
    439     /**
    440      * @param {number} cursor
    441      */
    442     _scriptEnded: function(cursor)
    443     {
    444         this._handleSubFormatterEnd(FormatterWorker._formatScript, cursor);
    445     },
    446 
    447     /**
    448      * @param {number} cursor
    449      */
    450     _styleStarted: function(cursor)
    451     {
    452         this._handleSubFormatterStart(cursor);
    453     },
    454 
    455     /**
    456      * @param {number} cursor
    457      */
    458     _styleEnded: function(cursor)
    459     {
    460         this._handleSubFormatterEnd(FormatterWorker._formatCSS, cursor);
    461     },
    462 
    463     /**
    464      * @param {number} cursor
    465      */
    466     _handleSubFormatterStart: function(cursor)
    467     {
    468         this._formattedContent += this._content.substring(this._position, cursor);
    469         this._formattedContent += "\n";
    470         this._position = cursor;
    471     },
    472 
    473     /**
    474      * @param {function(string, !{formatted: !Array.<number>, original: !Array.<number>}, number, number, string)} formatFunction
    475      * @param {number} cursor
    476      */
    477     _handleSubFormatterEnd: function(formatFunction, cursor)
    478     {
    479         if (cursor === this._position)
    480             return;
    481 
    482         var scriptContent = this._content.substring(this._position, cursor);
    483         this._mapping.original.push(this._position);
    484         this._mapping.formatted.push(this._formattedContent.length);
    485         var formattedScriptContent = formatFunction(scriptContent, this._mapping, this._position, this._formattedContent.length, this._indentString);
    486 
    487         this._formattedContent += formattedScriptContent;
    488         this._position = cursor;
    489     }
    490 }
    491 
    492 /**
    493  * @return {!Object}
    494  */
    495 function require()
    496 {
    497     return tokenizerHolder;
    498 }
    499 
    500 /**
    501  * @type {!{tokenizer}}
    502  */
    503 var exports = { tokenizer: null };
    504 var tokenizerHolder = exports;
    505