Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2011 - 2013
      4 # Andr\xe9 Malo or his licensors, as applicable
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #     http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 r"""
     18 =====================
     19  Javascript Minifier
     20 =====================
     21 
     22 rJSmin is a javascript minifier written in python.
     23 
     24 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\.
     25 
     26 The module is a re-implementation aiming for speed, so it can be used at
     27 runtime (rather than during a preprocessing step). Usually it produces the
     28 same results as the original ``jsmin.c``. It differs in the following ways:
     29 
     30 - there is no error detection: unterminated string, regex and comment
     31   literals are treated as regular javascript code and minified as such.
     32 - Control characters inside string and regex literals are left untouched; they
     33   are not converted to spaces (nor to \n)
     34 - Newline characters are not allowed inside string and regex literals, except
     35   for line continuations in string literals (ECMA-5).
     36 - "return /regex/" is recognized correctly.
     37 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
     38 - Newlines before ! operators are removed more sensibly
     39 - rJSmin does not handle streams, but only complete strings. (However, the
     40   module provides a "streamy" interface).
     41 
     42 Since most parts of the logic are handled by the regex engine it's way
     43 faster than the original python port of ``jsmin.c`` by Baruch Even. The speed
     44 factor varies between about 6 and 55 depending on input and python version
     45 (it gets faster the more compressed the input already is). Compared to the
     46 speed-refactored python port by Dave St.Germain the performance gain is less
     47 dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for
     48 details.
     49 
     50 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
     51 
     52 Both python 2 and python 3 are supported.
     53 
     54 .. _jsmin.c by Douglas Crockford:
     55    http://www.crockford.com/javascript/jsmin.c
     56 """
     57 __author__ = "Andr\xe9 Malo"
     58 __author__ = getattr(__author__, 'decode', lambda x: __author__)('latin-1')
     59 __docformat__ = "restructuredtext en"
     60 __license__ = "Apache License, Version 2.0"
     61 __version__ = '1.0.7'
     62 __all__ = ['jsmin']
     63 
     64 import re as _re
     65 
     66 
     67 def _make_jsmin(python_only=False):
     68     """
     69     Generate JS minifier based on `jsmin.c by Douglas Crockford`_
     70 
     71     .. _jsmin.c by Douglas Crockford:
     72        http://www.crockford.com/javascript/jsmin.c
     73 
     74     :Parameters:
     75       `python_only` : ``bool``
     76         Use only the python variant. If true, the c extension is not even
     77         tried to be loaded.
     78 
     79     :Return: Minifier
     80     :Rtype: ``callable``
     81     """
     82     # pylint: disable = R0912, R0914, W0612
     83     if not python_only:
     84         try:
     85             import _rjsmin
     86         except ImportError:
     87             pass
     88         else:
     89             return _rjsmin.jsmin
     90     try:
     91         xrange
     92     except NameError:
     93         xrange = range  # pylint: disable = W0622
     94 
     95     space_chars = r'[\000-\011\013\014\016-\040]'
     96 
     97     line_comment = r'(?://[^\r\n]*)'
     98     space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
     99     string1 = \
    100         r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
    101     string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
    102     strings = r'(?:%s|%s)' % (string1, string2)
    103 
    104     charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
    105     nospecial = r'[^/\\\[\r\n]'
    106     regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
    107         nospecial, charclass, nospecial)
    108     space = r'(?:%s|%s)' % (space_chars, space_comment)
    109     newline = r'(?:%s?[\r\n])' % line_comment
    110 
    111     def fix_charclass(result):
    112         """ Fixup string of chars to fit into a regex char class """
    113         pos = result.find('-')
    114         if pos >= 0:
    115             result = r'%s%s-' % (result[:pos], result[pos + 1:])
    116 
    117         def sequentize(string):
    118             """
    119             Notate consecutive characters as sequence
    120 
    121             (1-4 instead of 1234)
    122             """
    123             first, last, result = None, None, []
    124             for char in map(ord, string):
    125                 if last is None:
    126                     first = last = char
    127                 elif last + 1 == char:
    128                     last = char
    129                 else:
    130                     result.append((first, last))
    131                     first = last = char
    132             if last is not None:
    133                 result.append((first, last))
    134             return ''.join(['%s%s%s' % (
    135                 chr(first),
    136                 last > first + 1 and '-' or '',
    137                 last != first and chr(last) or '') for first, last in result])
    138 
    139         return _re.sub(r'([\000-\040\047])',  # for better portability
    140             lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
    141                 .replace('\\', '\\\\')
    142                 .replace('[', '\\[')
    143                 .replace(']', '\\]')))
    144 
    145     def id_literal_(what):
    146         """ Make id_literal like char class """
    147         match = _re.compile(what).match
    148         result = ''.join([chr(c) for c in xrange(127) if not match(chr(c))])
    149         return '[^%s]' % fix_charclass(result)
    150 
    151     def not_id_literal_(keep):
    152         """ Make negated id_literal like char class """
    153         match = _re.compile(id_literal_(keep)).match
    154         result = ''.join([chr(c) for c in xrange(127) if not match(chr(c))])
    155         return r'[%s]' % fix_charclass(result)
    156 
    157     not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
    158     preregex1 = r'[(,=:\[!&|?{};\r\n]'
    159     preregex2 = r'%(not_id_literal)sreturn' % locals()
    160 
    161     id_literal = id_literal_(r'[a-zA-Z0-9_$]')
    162     id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
    163     id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
    164 
    165     dull = r'[^\047"/\000-\040]'
    166 
    167     space_sub = _re.compile((
    168         r'(%(dull)s+)'
    169         r'|(%(strings)s%(dull)s*)'
    170         r'|(?<=%(preregex1)s)'
    171             r'%(space)s*(?:%(newline)s%(space)s*)*'
    172             r'(%(regex)s%(dull)s*)'
    173         r'|(?<=%(preregex2)s)'
    174             r'%(space)s*(?:%(newline)s%(space)s)*'
    175             r'(%(regex)s%(dull)s*)'
    176         r'|(?<=%(id_literal_close)s)'
    177             r'%(space)s*(?:(%(newline)s)%(space)s*)+'
    178             r'(?=%(id_literal_open)s)'
    179         r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
    180         r'|(?<=\+)(%(space)s)+(?=\+)'
    181         r'|(?<=-)(%(space)s)+(?=-)'
    182         r'|%(space)s+'
    183         r'|(?:%(newline)s%(space)s*)+') % locals()).sub
    184     #print space_sub.__self__.pattern
    185 
    186     def space_subber(match):
    187         """ Substitution callback """
    188         # pylint: disable = C0321, R0911
    189         groups = match.groups()
    190         if groups[0]:
    191             return groups[0]
    192         elif groups[1]:
    193             return groups[1]
    194         elif groups[2]:
    195             return groups[2]
    196         elif groups[3]:
    197             return groups[3]
    198         elif groups[4]:
    199             return '\n'
    200         elif groups[5] or groups[6] or groups[7]:
    201             return ' '
    202         else:
    203             return ''
    204 
    205     def jsmin(script):  # pylint: disable = W0621
    206         r"""
    207         Minify javascript based on `jsmin.c by Douglas Crockford`_\.
    208 
    209         Instead of parsing the stream char by char, it uses a regular
    210         expression approach which minifies the whole script with one big
    211         substitution regex.
    212 
    213         .. _jsmin.c by Douglas Crockford:
    214            http://www.crockford.com/javascript/jsmin.c
    215 
    216         :Parameters:
    217           `script` : ``str``
    218             Script to minify
    219 
    220         :Return: Minified script
    221         :Rtype: ``str``
    222         """
    223         return space_sub(space_subber, '\n%s\n' % script).strip()
    224 
    225     return jsmin
    226 
    227 jsmin = _make_jsmin()
    228 
    229 
    230 def jsmin_for_posers(script):
    231     r"""
    232     Minify javascript based on `jsmin.c by Douglas Crockford`_\.
    233 
    234     Instead of parsing the stream char by char, it uses a regular
    235     expression approach which minifies the whole script with one big
    236     substitution regex.
    237 
    238     .. _jsmin.c by Douglas Crockford:
    239        http://www.crockford.com/javascript/jsmin.c
    240 
    241     :Warning: This function is the digest of a _make_jsmin() call. It just
    242               utilizes the resulting regex. It's just for fun here and may
    243               vanish any time. Use the `jsmin` function instead.
    244 
    245     :Parameters:
    246       `script` : ``str``
    247         Script to minify
    248 
    249     :Return: Minified script
    250     :Rtype: ``str``
    251     """
    252     def subber(match):
    253         """ Substitution callback """
    254         groups = match.groups()
    255         return (
    256             groups[0] or
    257             groups[1] or
    258             groups[2] or
    259             groups[3] or
    260             (groups[4] and '\n') or
    261             (groups[5] and ' ') or
    262             (groups[6] and ' ') or
    263             (groups[7] and ' ') or
    264             '')
    265 
    266     return _re.sub(
    267         r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?'
    268         r'\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|'
    269         r'\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r\n])(?'
    270         r':[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*'
    271         r'(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*'
    272         r'[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
    273         r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\['
    274         r'\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return'
    275         r')(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/'
    276         r'))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:'
    277         r'/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?'
    278         r':(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
    279         r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|'
    280         r'~])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)'
    281         r'*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]'
    282         r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,./'
    283         r':-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\013\01'
    284         r'4\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:'
    285         r'-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*'
    286         r'\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-'
    287         r'\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
    288         r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?:(?://[^'
    289         r'\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^'
    290         r'/*][^*]*\*+)*/))*)+', subber, '\n%s\n' % script).strip()
    291 
    292 
    293 if __name__ == '__main__':
    294     import sys as _sys
    295     _sys.stdout.write(jsmin(_sys.stdin.read()))
    296