1 #!/usr/bin/python2.4 2 3 # Copyright 2012 the V8 project authors. All rights reserved. 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # * Redistributions in binary form must reproduce the above 11 # copyright notice, this list of conditions and the following 12 # disclaimer in the documentation and/or other materials provided 13 # with the distribution. 14 # * Neither the name of Google Inc. nor the names of its 15 # contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 """A JavaScript minifier. 31 32 It is far from being a complete JS parser, so there are many valid 33 JavaScript programs that will be ruined by it. Another strangeness is that 34 it accepts $ and % as parts of identifiers. It doesn't merge lines or strip 35 out blank lines in order to ease debugging. Variables at the top scope are 36 properties of the global object so we can't rename them. It is assumed that 37 you introduce variables with var as if JavaScript followed C++ scope rules 38 around curly braces, so the declaration must be above the first use. 39 40 Use as: 41 import jsmin 42 minifier = JavaScriptMinifier() 43 program1 = minifier.JSMinify(program1) 44 program2 = minifier.JSMinify(program2) 45 """ 46 47 import re 48 49 50 class JavaScriptMinifier(object): 51 """An object that you can feed code snippets to to get them minified.""" 52 53 def __init__(self): 54 # We prepopulate the list of identifiers that shouldn't be used. These 55 # short language keywords could otherwise be used by the script as variable 56 # names. 57 self.seen_identifiers = {"do": True, "in": True} 58 self.identifier_counter = 0 59 self.in_comment = False 60 self.map = {} 61 self.nesting = 0 62 63 def LookAtIdentifier(self, m): 64 """Records identifiers or keywords that we see in use. 65 66 (So we can avoid renaming variables to these strings.) 67 Args: 68 m: The match object returned by re.search. 69 70 Returns: 71 Nothing. 72 """ 73 identifier = m.group(1) 74 self.seen_identifiers[identifier] = True 75 76 def Push(self): 77 """Called when we encounter a '{'.""" 78 self.nesting += 1 79 80 def Pop(self): 81 """Called when we encounter a '}'.""" 82 self.nesting -= 1 83 # We treat each top-level opening brace as a single scope that can span 84 # several sets of nested braces. 85 if self.nesting == 0: 86 self.map = {} 87 self.identifier_counter = 0 88 89 def Declaration(self, m): 90 """Rewrites bits of the program selected by a regexp. 91 92 These can be curly braces, literal strings, function declarations and var 93 declarations. (These last two must be on one line including the opening 94 curly brace of the function for their variables to be renamed). 95 96 Args: 97 m: The match object returned by re.search. 98 99 Returns: 100 The string that should replace the match in the rewritten program. 101 """ 102 matched_text = m.group(0) 103 104 if matched_text.startswith("`") and matched_text.endswith("`"): 105 return re.sub(r"\$\{([\w$%]+)\}", 106 lambda m: '${' + self.FindNewName(m.group(1)) + '}', 107 matched_text) 108 109 if matched_text == "{": 110 self.Push() 111 return matched_text 112 if matched_text == "}": 113 self.Pop() 114 return matched_text 115 if re.match("[\"'/]", matched_text): 116 return matched_text 117 m = re.match(r"var ", matched_text) 118 if m: 119 var_names = matched_text[m.end():] 120 var_names = re.split(r",", var_names) 121 return "var " + ",".join(map(self.FindNewName, var_names)) 122 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text) 123 if m: 124 up_to_args = m.group(1) 125 args = m.group(2) 126 args = re.split(r",", args) 127 self.Push() 128 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){" 129 130 if matched_text in self.map: 131 return self.map[matched_text] 132 133 return matched_text 134 135 def CharFromNumber(self, number): 136 """A single-digit base-52 encoding using a-zA-Z.""" 137 if number < 26: 138 return chr(number + 97) 139 number -= 26 140 return chr(number + 65) 141 142 def FindNewName(self, var_name): 143 """Finds a new 1-character or 2-character name for a variable. 144 145 Enters it into the mapping table for this scope. 146 147 Args: 148 var_name: The name of the variable before renaming. 149 150 Returns: 151 The new name of the variable. 152 """ 153 new_identifier = "" 154 # Variable names that end in _ are member variables of the global object, 155 # so they can be visible from code in a different scope. We leave them 156 # alone. 157 if var_name in self.map: 158 return self.map[var_name] 159 if self.nesting == 0: 160 return var_name 161 # Do not rename arguments object. 162 if var_name == 'arguments': 163 return 'arguments' 164 while True: 165 identifier_first_char = self.identifier_counter % 52 166 identifier_second_char = self.identifier_counter // 52 167 new_identifier = self.CharFromNumber(identifier_first_char) 168 if identifier_second_char != 0: 169 new_identifier = ( 170 self.CharFromNumber(identifier_second_char - 1) + new_identifier) 171 self.identifier_counter += 1 172 if not new_identifier in self.seen_identifiers: 173 break 174 175 self.map[var_name] = new_identifier 176 return new_identifier 177 178 def RemoveSpaces(self, m): 179 """Returns literal strings unchanged, replaces other inputs with group 2. 180 181 Other inputs are replaced with the contents of capture 1. This is either 182 a single space or an empty string. 183 184 Args: 185 m: The match object returned by re.search. 186 187 Returns: 188 The string that should be inserted instead of the matched text. 189 """ 190 entire_match = m.group(0) 191 replacement = m.group(1) 192 if re.match(r"'.*'$", entire_match): 193 return entire_match 194 if re.match(r'".*"$', entire_match): 195 return entire_match 196 if re.match(r"`.*`$", entire_match): 197 return entire_match 198 if re.match(r"/.+/$", entire_match): 199 return entire_match 200 return replacement 201 202 def JSMinify(self, text): 203 """The main entry point. Takes a text and returns a compressed version. 204 205 The compressed version hopefully does the same thing. Line breaks are 206 preserved. 207 208 Args: 209 text: The text of the code snippet as a multiline string. 210 211 Returns: 212 The compressed text of the code snippet as a multiline string. 213 """ 214 new_lines = [] 215 for line in re.split(r"\n", text): 216 line = line.replace("\t", " ") 217 if self.in_comment: 218 m = re.search(r"\*/", line) 219 if m: 220 line = line[m.end():] 221 self.in_comment = False 222 else: 223 new_lines.append("") 224 continue 225 226 if not self.in_comment: 227 line = re.sub(r"/\*.*?\*/", " ", line) 228 line = re.sub(r"//.*", "", line) 229 m = re.search(r"/\*", line) 230 if m: 231 line = line[:m.start()] 232 self.in_comment = True 233 234 # Strip leading and trailing spaces. 235 line = re.sub(r"^ +", "", line) 236 line = re.sub(r" +$", "", line) 237 # A regexp that matches a literal string surrounded by "double quotes". 238 # This regexp can handle embedded backslash-escaped characters including 239 # embedded backslash-escaped double quotes. 240 double_quoted_string = r'"(?:[^"\\]|\\.)*"' 241 # A regexp that matches a literal string surrounded by 'single quotes'. 242 single_quoted_string = r"'(?:[^'\\]|\\.)*'" 243 # A regexp that matches a template string 244 template_string = r"`(?:[^`\\]|\\.)*`" 245 # A regexp that matches a regexp literal surrounded by /slashes/. 246 # Don't allow a regexp to have a ) before the first ( since that's a 247 # syntax error and it's probably just two unrelated slashes. 248 # Also don't allow it to come after anything that can only be the 249 # end of a primary expression. 250 slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/" 251 # Replace multiple spaces with a single space. 252 line = re.sub("|".join([double_quoted_string, 253 single_quoted_string, 254 template_string, 255 slash_quoted_regexp, 256 "( )+"]), 257 self.RemoveSpaces, 258 line) 259 # Strip single spaces unless they have an identifier character both before 260 # and after the space. % and $ are counted as identifier characters. 261 line = re.sub("|".join([double_quoted_string, 262 single_quoted_string, 263 template_string, 264 slash_quoted_regexp, 265 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]), 266 self.RemoveSpaces, 267 line) 268 # Collect keywords and identifiers that are already in use. 269 if self.nesting == 0: 270 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line) 271 function_declaration_regexp = ( 272 r"\bfunction" # Function definition keyword... 273 r"( [\w$%]+)?" # ...optional function name... 274 r"\([\w$%,]+\)\{") # ...argument declarations. 275 # Unfortunately the keyword-value syntax { key:value } makes the key look 276 # like a variable where in fact it is a literal string. We use the 277 # presence or absence of a question mark to try to distinguish between 278 # this case and the ternary operator: "condition ? iftrue : iffalse". 279 if re.search(r"\?", line): 280 block_trailing_colon = r"" 281 else: 282 block_trailing_colon = r"(?![:\w$%])" 283 # Variable use. Cannot follow a period precede a colon. 284 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon 285 line = re.sub("|".join([double_quoted_string, 286 single_quoted_string, 287 template_string, 288 slash_quoted_regexp, 289 r"\{", # Curly braces. 290 r"\}", 291 r"\bvar [\w$%,]+", # var declarations. 292 function_declaration_regexp, 293 variable_use_regexp]), 294 self.Declaration, 295 line) 296 new_lines.append(line) 297 298 return "\n".join(new_lines) + "\n" 299