1 /* 2 * Copyright (C) 2011 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 FormatterWorker = { 31 /** 32 * @param {string} mimeType 33 * @return {function(string, function(string, ?string, number, number))} 34 */ 35 createTokenizer: function(mimeType) 36 { 37 var mode = CodeMirror.getMode({indentUnit: 2}, mimeType); 38 var state = CodeMirror.startState(mode); 39 function tokenize(line, callback) 40 { 41 var stream = new CodeMirror.StringStream(line); 42 while (!stream.eol()) { 43 var style = mode.token(stream, state); 44 var value = stream.current(); 45 callback(value, style, stream.start, stream.start + value.length); 46 stream.start = stream.pos; 47 } 48 } 49 return tokenize; 50 } 51 }; 52 53 /** 54 * @typedef {{indentString: string, content: string, mimeType: string}} 55 */ 56 var FormatterParameters; 57 58 var onmessage = function(event) { 59 var data = /** @type !{method: string, params: !FormatterParameters} */ (event.data); 60 if (!data.method) 61 return; 62 63 FormatterWorker[data.method](data.params); 64 }; 65 66 /** 67 * @param {!FormatterParameters} params 68 */ 69 FormatterWorker.format = function(params) 70 { 71 // Default to a 4-space indent. 72 var indentString = params.indentString || " "; 73 var result = {}; 74 75 if (params.mimeType === "text/html") { 76 var formatter = new FormatterWorker.HTMLFormatter(indentString); 77 result = formatter.format(params.content); 78 } else if (params.mimeType === "text/css") { 79 result.mapping = { original: [0], formatted: [0] }; 80 result.content = FormatterWorker._formatCSS(params.content, result.mapping, 0, 0, indentString); 81 } else { 82 result.mapping = { original: [0], formatted: [0] }; 83 result.content = FormatterWorker._formatScript(params.content, result.mapping, 0, 0, indentString); 84 } 85 postMessage(result); 86 } 87 88 /** 89 * @param {number} totalLength 90 * @param {number} chunkSize 91 */ 92 FormatterWorker._chunkCount = function(totalLength, chunkSize) 93 { 94 if (totalLength <= chunkSize) 95 return 1; 96 97 var remainder = totalLength % chunkSize; 98 var partialLength = totalLength - remainder; 99 return (partialLength / chunkSize) + (remainder ? 1 : 0); 100 } 101 102 /** 103 * @param {!Object} params 104 */ 105 FormatterWorker.javaScriptOutline = function(params) 106 { 107 var chunkSize = 100000; // characters per data chunk 108 var totalLength = params.content.length; 109 var lines = params.content.split("\n"); 110 var chunkCount = FormatterWorker._chunkCount(totalLength, chunkSize); 111 var outlineChunk = []; 112 var previousIdentifier = null; 113 var previousToken = null; 114 var previousTokenType = null; 115 var currentChunk = 1; 116 var processedChunkCharacters = 0; 117 var addedFunction = false; 118 var isReadingArguments = false; 119 var argumentsText = ""; 120 var currentFunction = null; 121 var tokenizer = FormatterWorker.createTokenizer("text/javascript"); 122 for (var i = 0; i < lines.length; ++i) { 123 var line = lines[i]; 124 tokenizer(line, processToken); 125 } 126 127 /** 128 * @param {?string} tokenType 129 * @return {boolean} 130 */ 131 function isJavaScriptIdentifier(tokenType) 132 { 133 if (!tokenType) 134 return false; 135 return tokenType.startsWith("variable") || tokenType.startsWith("property") || tokenType === "def"; 136 } 137 138 /** 139 * @param {string} tokenValue 140 * @param {?string} tokenType 141 * @param {number} column 142 * @param {number} newColumn 143 */ 144 function processToken(tokenValue, tokenType, column, newColumn) 145 { 146 if (tokenType === "property" && previousTokenType === "property" && (previousToken === "get" || previousToken === "set")) { 147 currentFunction = { line: i, column: column, name: previousToken + " " + tokenValue }; 148 addedFunction = true; 149 previousIdentifier = null; 150 } else if (isJavaScriptIdentifier(tokenType)) { 151 previousIdentifier = tokenValue; 152 if (tokenValue && previousToken === "function") { 153 // A named function: "function f...". 154 currentFunction = { line: i, column: column, name: tokenValue }; 155 addedFunction = true; 156 previousIdentifier = null; 157 } 158 } else if (tokenType === "keyword") { 159 if (tokenValue === "function") { 160 if (previousIdentifier && (previousToken === "=" || previousToken === ":")) { 161 // Anonymous function assigned to an identifier: "...f = function..." 162 // or "funcName: function...". 163 currentFunction = { line: i, column: column, name: previousIdentifier }; 164 addedFunction = true; 165 previousIdentifier = null; 166 } 167 } 168 } else if (tokenValue === "." && isJavaScriptIdentifier(previousTokenType)) 169 previousIdentifier += "."; 170 else if (tokenValue === "(" && addedFunction) 171 isReadingArguments = true; 172 if (isReadingArguments && tokenValue) 173 argumentsText += tokenValue; 174 175 if (tokenValue === ")" && isReadingArguments) { 176 addedFunction = false; 177 isReadingArguments = false; 178 currentFunction.arguments = argumentsText.replace(/,[\r\n\s]*/g, ", ").replace(/([^,])[\r\n\s]+/g, "$1"); 179 argumentsText = ""; 180 outlineChunk.push(currentFunction); 181 } 182 183 if (tokenValue.trim().length) { 184 // Skip whitespace tokens. 185 previousToken = tokenValue; 186 previousTokenType = tokenType; 187 } 188 processedChunkCharacters += newColumn - column; 189 190 if (processedChunkCharacters >= chunkSize) { 191 postMessage({ chunk: outlineChunk, total: chunkCount, index: currentChunk++ }); 192 outlineChunk = []; 193 processedChunkCharacters = 0; 194 } 195 } 196 197 postMessage({ chunk: outlineChunk, total: chunkCount, index: chunkCount }); 198 } 199 200 FormatterWorker.CSSParserStates = { 201 Initial: "Initial", 202 Selector: "Selector", 203 Style: "Style", 204 PropertyName: "PropertyName", 205 PropertyValue: "PropertyValue", 206 AtRule: "AtRule", 207 }; 208 209 FormatterWorker.parseCSS = function(params) 210 { 211 var chunkSize = 100000; // characters per data chunk 212 var lines = params.content.split("\n"); 213 var rules = []; 214 var processedChunkCharacters = 0; 215 216 var state = FormatterWorker.CSSParserStates.Initial; 217 var rule; 218 var property; 219 var UndefTokenType = {}; 220 221 /** 222 * @param {string} tokenValue 223 * @param {?string} tokenTypes 224 * @param {number} column 225 * @param {number} newColumn 226 */ 227 function processToken(tokenValue, tokenTypes, column, newColumn) 228 { 229 var tokenType = tokenTypes ? tokenTypes.split(" ").keySet() : UndefTokenType; 230 switch (state) { 231 case FormatterWorker.CSSParserStates.Initial: 232 if (tokenType["qualifier"] || tokenType["builtin"] || tokenType["tag"]) { 233 rule = { 234 selectorText: tokenValue, 235 lineNumber: lineNumber, 236 columNumber: column, 237 properties: [], 238 }; 239 state = FormatterWorker.CSSParserStates.Selector; 240 } else if (tokenType["def"]) { 241 rule = { 242 atRule: tokenValue, 243 lineNumber: lineNumber, 244 columNumber: column, 245 }; 246 state = FormatterWorker.CSSParserStates.AtRule; 247 } 248 break; 249 case FormatterWorker.CSSParserStates.Selector: 250 if (tokenValue === "{" && tokenType === UndefTokenType) { 251 rule.selectorText = rule.selectorText.trim(); 252 state = FormatterWorker.CSSParserStates.Style; 253 } else { 254 rule.selectorText += tokenValue; 255 } 256 break; 257 case FormatterWorker.CSSParserStates.AtRule: 258 if ((tokenValue === ";" || tokenValue === "{") && tokenType === UndefTokenType) { 259 rule.atRule = rule.atRule.trim(); 260 rules.push(rule); 261 state = FormatterWorker.CSSParserStates.Initial; 262 } else { 263 rule.atRule += tokenValue; 264 } 265 break; 266 case FormatterWorker.CSSParserStates.Style: 267 if (tokenType["meta"] || tokenType["property"]) { 268 property = { 269 name: tokenValue, 270 value: "", 271 }; 272 state = FormatterWorker.CSSParserStates.PropertyName; 273 } else if (tokenValue === "}" && tokenType === UndefTokenType) { 274 rules.push(rule); 275 state = FormatterWorker.CSSParserStates.Initial; 276 } 277 break; 278 case FormatterWorker.CSSParserStates.PropertyName: 279 if (tokenValue === ":" && tokenType === UndefTokenType) { 280 property.name = property.name.trim(); 281 state = FormatterWorker.CSSParserStates.PropertyValue; 282 } else if (tokenType["property"]) { 283 property.name += tokenValue; 284 } 285 break; 286 case FormatterWorker.CSSParserStates.PropertyValue: 287 if (tokenValue === ";" && tokenType === UndefTokenType) { 288 property.value = property.value.trim(); 289 rule.properties.push(property); 290 state = FormatterWorker.CSSParserStates.Style; 291 } else if (tokenValue === "}" && tokenType === UndefTokenType) { 292 property.value = property.value.trim(); 293 rule.properties.push(property); 294 rules.push(rule); 295 state = FormatterWorker.CSSParserStates.Initial; 296 } else if (!tokenType["comment"]) { 297 property.value += tokenValue; 298 } 299 break; 300 default: 301 console.assert(false, "Unknown CSS parser state."); 302 } 303 processedChunkCharacters += newColumn - column; 304 if (processedChunkCharacters > chunkSize) { 305 postMessage({ chunk: rules, isLastChunk: false }); 306 rules = []; 307 processedChunkCharacters = 0; 308 } 309 } 310 var tokenizer = FormatterWorker.createTokenizer("text/css"); 311 var lineNumber; 312 for (lineNumber = 0; lineNumber < lines.length; ++lineNumber) { 313 var line = lines[lineNumber]; 314 tokenizer(line, processToken); 315 } 316 postMessage({ chunk: rules, isLastChunk: true }); 317 } 318 319 /** 320 * @param {string} content 321 * @param {!{original: !Array.<number>, formatted: !Array.<number>}} mapping 322 * @param {number} offset 323 * @param {number} formattedOffset 324 * @param {string} indentString 325 * @return {string} 326 */ 327 FormatterWorker._formatScript = function(content, mapping, offset, formattedOffset, indentString) 328 { 329 var formattedContent; 330 try { 331 var tokenizer = new FormatterWorker.JavaScriptTokenizer(content); 332 var builder = new FormatterWorker.JavaScriptFormattedContentBuilder(tokenizer.content(), mapping, offset, formattedOffset, indentString); 333 var formatter = new FormatterWorker.JavaScriptFormatter(tokenizer, builder); 334 formatter.format(); 335 formattedContent = builder.content(); 336 } catch (e) { 337 formattedContent = content; 338 } 339 return formattedContent; 340 } 341 342 /** 343 * @param {string} content 344 * @param {!{original: !Array.<number>, formatted: !Array.<number>}} mapping 345 * @param {number} offset 346 * @param {number} formattedOffset 347 * @param {string} indentString 348 * @return {string} 349 */ 350 FormatterWorker._formatCSS = function(content, mapping, offset, formattedOffset, indentString) 351 { 352 var formattedContent; 353 try { 354 var builder = new FormatterWorker.CSSFormattedContentBuilder(content, mapping, offset, formattedOffset, indentString); 355 var formatter = new FormatterWorker.CSSFormatter(content, builder); 356 formatter.format(); 357 formattedContent = builder.content(); 358 } catch (e) { 359 formattedContent = content; 360 } 361 return formattedContent; 362 } 363 364 /** 365 * @constructor 366 * @param {string} indentString 367 */ 368 FormatterWorker.HTMLFormatter = function(indentString) 369 { 370 this._indentString = indentString; 371 } 372 373 FormatterWorker.HTMLFormatter.prototype = { 374 /** 375 * @param {string} content 376 * @return {!{content: string, mapping: {original: !Array.<number>, formatted: !Array.<number>}}} 377 */ 378 format: function(content) 379 { 380 this.line = content; 381 this._content = content; 382 this._formattedContent = ""; 383 this._mapping = { original: [0], formatted: [0] }; 384 this._position = 0; 385 386 var scriptOpened = false; 387 var styleOpened = false; 388 var tokenizer = FormatterWorker.createTokenizer("text/html"); 389 var accumulatedTokenValue = ""; 390 var accumulatedTokenStart = 0; 391 392 /** 393 * @this {FormatterWorker.HTMLFormatter} 394 */ 395 function processToken(tokenValue, tokenType, tokenStart, tokenEnd) { 396 if (!tokenType) 397 return; 398 var oldType = tokenType; 399 tokenType = tokenType.split(" ").keySet(); 400 if (!tokenType["tag"]) 401 return; 402 if (tokenType["bracket"] && (tokenValue === "<" || tokenValue === "</")) { 403 accumulatedTokenValue = tokenValue; 404 accumulatedTokenStart = tokenStart; 405 return; 406 } 407 accumulatedTokenValue = accumulatedTokenValue + tokenValue.toLowerCase(); 408 if (accumulatedTokenValue === "<script") { 409 scriptOpened = true; 410 } else if (scriptOpened && tokenValue === ">") { 411 scriptOpened = false; 412 this._scriptStarted(tokenEnd); 413 } else if (accumulatedTokenValue === "</script") { 414 this._scriptEnded(accumulatedTokenStart); 415 } else if (accumulatedTokenValue === "<style") { 416 styleOpened = true; 417 } else if (styleOpened && tokenValue === ">") { 418 styleOpened = false; 419 this._styleStarted(tokenEnd); 420 } else if (accumulatedTokenValue === "</style") { 421 this._styleEnded(accumulatedTokenStart); 422 } 423 accumulatedTokenValue = ""; 424 } 425 tokenizer(content, processToken.bind(this)); 426 427 this._formattedContent += this._content.substring(this._position); 428 return { content: this._formattedContent, mapping: this._mapping }; 429 }, 430 431 /** 432 * @param {number} cursor 433 */ 434 _scriptStarted: function(cursor) 435 { 436 this._handleSubFormatterStart(cursor); 437 }, 438 439 /** 440 * @param {number} cursor 441 */ 442 _scriptEnded: function(cursor) 443 { 444 this._handleSubFormatterEnd(FormatterWorker._formatScript, cursor); 445 }, 446 447 /** 448 * @param {number} cursor 449 */ 450 _styleStarted: function(cursor) 451 { 452 this._handleSubFormatterStart(cursor); 453 }, 454 455 /** 456 * @param {number} cursor 457 */ 458 _styleEnded: function(cursor) 459 { 460 this._handleSubFormatterEnd(FormatterWorker._formatCSS, cursor); 461 }, 462 463 /** 464 * @param {number} cursor 465 */ 466 _handleSubFormatterStart: function(cursor) 467 { 468 this._formattedContent += this._content.substring(this._position, cursor); 469 this._formattedContent += "\n"; 470 this._position = cursor; 471 }, 472 473 /** 474 * @param {function(string, !{formatted: !Array.<number>, original: !Array.<number>}, number, number, string)} formatFunction 475 * @param {number} cursor 476 */ 477 _handleSubFormatterEnd: function(formatFunction, cursor) 478 { 479 if (cursor === this._position) 480 return; 481 482 var scriptContent = this._content.substring(this._position, cursor); 483 this._mapping.original.push(this._position); 484 this._mapping.formatted.push(this._formattedContent.length); 485 var formattedScriptContent = formatFunction(scriptContent, this._mapping, this._position, this._formattedContent.length, this._indentString); 486 487 this._formattedContent += formattedScriptContent; 488 this._position = cursor; 489 } 490 } 491 492 /** 493 * @return {!Object} 494 */ 495 function require() 496 { 497 return tokenizerHolder; 498 } 499 500 /** 501 * @type {!{tokenizer}} 502 */ 503 var exports = { tokenizer: null }; 504 var tokenizerHolder = exports; 505