Home | History | Annotate | Download | only in bench
      1 #!/usr/bin/env python
      2 
      3 # This code is original from jsmin by Douglas Crockford, it was translated to
      4 # Python by Baruch Even. The original code had the following copyright and
      5 # license.
      6 #
      7 # /* jsmin.c
      8 #    2007-05-22
      9 #
     10 # Copyright (c) 2002 Douglas Crockford  (www.crockford.com)
     11 #
     12 # Permission is hereby granted, free of charge, to any person obtaining a copy of
     13 # this software and associated documentation files (the "Software"), to deal in
     14 # the Software without restriction, including without limitation the rights to
     15 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
     16 # of the Software, and to permit persons to whom the Software is furnished to do
     17 # so, subject to the following conditions:
     18 #
     19 # The above copyright notice and this permission notice shall be included in all
     20 # copies or substantial portions of the Software.
     21 #
     22 # The Software shall be used for Good, not Evil.
     23 #
     24 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     25 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     26 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     27 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     28 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     29 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 # SOFTWARE.
     31 # */
     32 
     33 # imports adjusted for speed (cStringIO) and python 3 (io) -- nd
     34 try:
     35     from cStringIO import StringIO
     36 except ImportError:
     37     try:
     38         from StringIO import StringIO
     39     except ImportError:
     40         from io import StringIO
     41 
     42 
     43 def jsmin(js):
     44     ins = StringIO(js)
     45     outs = StringIO()
     46     JavascriptMinify().minify(ins, outs)
     47     str = outs.getvalue()
     48     if len(str) > 0 and str[0] == '\n':
     49         str = str[1:]
     50     return str
     51 
     52 def isAlphanum(c):
     53     """return true if the character is a letter, digit, underscore,
     54            dollar sign, or non-ASCII character.
     55     """
     56     return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or
     57             (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126));
     58 
     59 class UnterminatedComment(Exception):
     60     pass
     61 
     62 class UnterminatedStringLiteral(Exception):
     63     pass
     64 
     65 class UnterminatedRegularExpression(Exception):
     66     pass
     67 
     68 class JavascriptMinify(object):
     69 
     70     def _outA(self):
     71         self.outstream.write(self.theA)
     72     def _outB(self):
     73         self.outstream.write(self.theB)
     74 
     75     def _get(self):
     76         """return the next character from stdin. Watch out for lookahead. If
     77            the character is a control character, translate it to a space or
     78            linefeed.
     79         """
     80         c = self.theLookahead
     81         self.theLookahead = None
     82         if c == None:
     83             c = self.instream.read(1)
     84         if c >= ' ' or c == '\n':
     85             return c
     86         if c == '': # EOF
     87             return '\000'
     88         if c == '\r':
     89             return '\n'
     90         return ' '
     91 
     92     def _peek(self):
     93         self.theLookahead = self._get()
     94         return self.theLookahead
     95 
     96     def _next(self):
     97         """get the next character, excluding comments. peek() is used to see
     98            if an unescaped '/' is followed by a '/' or '*'.
     99         """
    100         c = self._get()
    101         if c == '/' and self.theA != '\\':
    102             p = self._peek()
    103             if p == '/':
    104                 c = self._get()
    105                 while c > '\n':
    106                     c = self._get()
    107                 return c
    108             if p == '*':
    109                 c = self._get()
    110                 while 1:
    111                     c = self._get()
    112                     if c == '*':
    113                         if self._peek() == '/':
    114                             self._get()
    115                             return ' '
    116                     if c == '\000':
    117                         raise UnterminatedComment()
    118 
    119         return c
    120 
    121     def _action(self, action):
    122         """do something! What you do is determined by the argument:
    123            1   Output A. Copy B to A. Get the next B.
    124            2   Copy B to A. Get the next B. (Delete A).
    125            3   Get the next B. (Delete B).
    126            action treats a string as a single character. Wow!
    127            action recognizes a regular expression if it is preceded by ( or , or =.
    128         """
    129         if action <= 1:
    130             self._outA()
    131 
    132         if action <= 2:
    133             self.theA = self.theB
    134             if self.theA == "'" or self.theA == '"':
    135                 while 1:
    136                     self._outA()
    137                     self.theA = self._get()
    138                     if self.theA == self.theB:
    139                         break
    140                     if self.theA <= '\n':
    141                         raise UnterminatedStringLiteral()
    142                     if self.theA == '\\':
    143                         self._outA()
    144                         self.theA = self._get()
    145 
    146 
    147         if action <= 3:
    148             self.theB = self._next()
    149             if self.theB == '/' and (self.theA == '(' or self.theA == ',' or
    150                                      self.theA == '=' or self.theA == ':' or
    151                                      self.theA == '[' or self.theA == '?' or
    152                                      self.theA == '!' or self.theA == '&' or
    153                                      self.theA == '|' or self.theA == ';' or
    154                                      self.theA == '{' or self.theA == '}' or
    155                                      self.theA == '\n'):
    156                 self._outA()
    157                 self._outB()
    158                 while 1:
    159                     self.theA = self._get()
    160                     if self.theA == '/':
    161                         break
    162                     elif self.theA == '\\':
    163                         self._outA()
    164                         self.theA = self._get()
    165                     elif self.theA <= '\n':
    166                         raise UnterminatedRegularExpression()
    167                     self._outA()
    168                 self.theB = self._next()
    169 
    170 
    171     def _jsmin(self):
    172         """Copy the input to the output, deleting the characters which are
    173            insignificant to JavaScript. Comments will be removed. Tabs will be
    174            replaced with spaces. Carriage returns will be replaced with linefeeds.
    175            Most spaces and linefeeds will be removed.
    176         """
    177         self.theA = '\n'
    178         self._action(3)
    179 
    180         while self.theA != '\000':
    181             if self.theA == ' ':
    182                 if isAlphanum(self.theB):
    183                     self._action(1)
    184                 else:
    185                     self._action(2)
    186             elif self.theA == '\n':
    187                 if self.theB in ['{', '[', '(', '+', '-']:
    188                     self._action(1)
    189                 elif self.theB == ' ':
    190                     self._action(3)
    191                 else:
    192                     if isAlphanum(self.theB):
    193                         self._action(1)
    194                     else:
    195                         self._action(2)
    196             else:
    197                 if self.theB == ' ':
    198                     if isAlphanum(self.theA):
    199                         self._action(1)
    200                     else:
    201                         self._action(3)
    202                 elif self.theB == '\n':
    203                     if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:
    204                         self._action(1)
    205                     else:
    206                         if isAlphanum(self.theA):
    207                             self._action(1)
    208                         else:
    209                             self._action(3)
    210                 else:
    211                     self._action(1)
    212 
    213     def minify(self, instream, outstream):
    214         self.instream = instream
    215         self.outstream = outstream
    216         self.theA = '\n'
    217         self.theB = None
    218         self.theLookahead = None
    219 
    220         self._jsmin()
    221         self.instream.close()
    222 
    223 if __name__ == '__main__':
    224     import sys
    225     jsm = JavascriptMinify()
    226     jsm.minify(sys.stdin, sys.stdout)
    227