Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 # -*- coding: ascii -*-
      3 #
      4 # Copyright 2011 - 2013
      5 # Andr\xe9 Malo or his licensors, as applicable
      6 #
      7 # Licensed under the Apache License, Version 2.0 (the "License");
      8 # you may not use this file except in compliance with the License.
      9 # You may obtain a copy of the License at
     10 #
     11 #     http://www.apache.org/licenses/LICENSE-2.0
     12 #
     13 # Unless required by applicable law or agreed to in writing, software
     14 # distributed under the License is distributed on an "AS IS" BASIS,
     15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16 # See the License for the specific language governing permissions and
     17 # limitations under the License.
     18 r"""
     19 =====================
     20  Javascript Minifier
     21 =====================
     22 
     23 rJSmin is a javascript minifier written in python.
     24 
     25 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\.
     26 
     27 The module is a re-implementation aiming for speed, so it can be used at
     28 runtime (rather than during a preprocessing step). Usually it produces the
     29 same results as the original ``jsmin.c``. It differs in the following ways:
     30 
     31 - there is no error detection: unterminated string, regex and comment
     32   literals are treated as regular javascript code and minified as such.
     33 - Control characters inside string and regex literals are left untouched; they
     34   are not converted to spaces (nor to \n)
     35 - Newline characters are not allowed inside string and regex literals, except
     36   for line continuations in string literals (ECMA-5).
     37 - "return /regex/" is recognized correctly.
     38 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
     39 - Newlines before ! operators are removed more sensibly
     40 - rJSmin does not handle streams, but only complete strings. (However, the
     41   module provides a "streamy" interface).
     42 
     43 Since most parts of the logic are handled by the regex engine it's way
     44 faster than the original python port of ``jsmin.c`` by Baruch Even. The speed
     45 factor varies between about 6 and 55 depending on input and python version
     46 (it gets faster the more compressed the input already is). Compared to the
     47 speed-refactored python port by Dave St.Germain the performance gain is less
     48 dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for
     49 details.
     50 
     51 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
     52 
     53 Both python 2 and python 3 are supported.
     54 
     55 .. _jsmin.c by Douglas Crockford:
     56    http://www.crockford.com/javascript/jsmin.c
     57 """
     58 __author__ = "Andr\xe9 Malo"
     59 __author__ = getattr(__author__, 'decode', lambda x: __author__)('latin-1')
     60 __docformat__ = "restructuredtext en"
     61 __license__ = "Apache License, Version 2.0"
     62 __version__ = '1.0.7'
     63 __all__ = ['jsmin']
     64 
     65 import re as _re
     66 
     67 
     68 def _make_jsmin(python_only=False):
     69     """
     70     Generate JS minifier based on `jsmin.c by Douglas Crockford`_
     71 
     72     .. _jsmin.c by Douglas Crockford:
     73        http://www.crockford.com/javascript/jsmin.c
     74 
     75     :Parameters:
     76       `python_only` : ``bool``
     77         Use only the python variant. If true, the c extension is not even
     78         tried to be loaded.
     79 
     80     :Return: Minifier
     81     :Rtype: ``callable``
     82     """
     83     # pylint: disable = R0912, R0914, W0612
     84     if not python_only:
     85         try:
     86             import _rjsmin
     87         except ImportError:
     88             pass
     89         else:
     90             return _rjsmin.jsmin
     91     try:
     92         xrange
     93     except NameError:
     94         xrange = range # pylint: disable = W0622
     95 
     96     space_chars = r'[\000-\011\013\014\016-\040]'
     97 
     98     line_comment = r'(?://[^\r\n]*)'
     99     space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
    100     string1 = \
    101         r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
    102     string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
    103     strings = r'(?:%s|%s)' % (string1, string2)
    104 
    105     charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
    106     nospecial = r'[^/\\\[\r\n]'
    107     regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
    108         nospecial, charclass, nospecial
    109     )
    110     space = r'(?:%s|%s)' % (space_chars, space_comment)
    111     newline = r'(?:%s?[\r\n])' % line_comment
    112 
    113     def fix_charclass(result):
    114         """ Fixup string of chars to fit into a regex char class """
    115         pos = result.find('-')
    116         if pos >= 0:
    117             result = r'%s%s-' % (result[:pos], result[pos + 1:])
    118 
    119         def sequentize(string):
    120             """
    121             Notate consecutive characters as sequence
    122 
    123             (1-4 instead of 1234)
    124             """
    125             first, last, result = None, None, []
    126             for char in map(ord, string):
    127                 if last is None:
    128                     first = last = char
    129                 elif last + 1 == char:
    130                     last = char
    131                 else:
    132                     result.append((first, last))
    133                     first = last = char
    134             if last is not None:
    135                 result.append((first, last))
    136             return ''.join(['%s%s%s' % (
    137                 chr(first),
    138                 last > first + 1 and '-' or '',
    139                 last != first and chr(last) or ''
    140             ) for first, last in result])
    141 
    142         return _re.sub(r'([\000-\040\047])', # for better portability
    143             lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
    144                 .replace('\\', '\\\\')
    145                 .replace('[', '\\[')
    146                 .replace(']', '\\]')
    147             )
    148         )
    149 
    150     def id_literal_(what):
    151         """ Make id_literal like char class """
    152         match = _re.compile(what).match
    153         result = ''.join([
    154             chr(c) for c in xrange(127) if not match(chr(c))
    155         ])
    156         return '[^%s]' % fix_charclass(result)
    157 
    158     def not_id_literal_(keep):
    159         """ Make negated id_literal like char class """
    160         match = _re.compile(id_literal_(keep)).match
    161         result = ''.join([
    162             chr(c) for c in xrange(127) if not match(chr(c))
    163         ])
    164         return r'[%s]' % fix_charclass(result)
    165 
    166     not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
    167     preregex1 = r'[(,=:\[!&|?{};\r\n]'
    168     preregex2 = r'%(not_id_literal)sreturn' % locals()
    169 
    170     id_literal = id_literal_(r'[a-zA-Z0-9_$]')
    171     id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
    172     id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
    173 
    174     dull = r'[^\047"/\000-\040]'
    175 
    176     space_sub = _re.compile((
    177         r'(%(dull)s+)'
    178         r'|(%(strings)s%(dull)s*)'
    179         r'|(?<=%(preregex1)s)'
    180             r'%(space)s*(?:%(newline)s%(space)s*)*'
    181             r'(%(regex)s%(dull)s*)'
    182         r'|(?<=%(preregex2)s)'
    183             r'%(space)s*(?:%(newline)s%(space)s)*'
    184             r'(%(regex)s%(dull)s*)'
    185         r'|(?<=%(id_literal_close)s)'
    186             r'%(space)s*(?:(%(newline)s)%(space)s*)+'
    187             r'(?=%(id_literal_open)s)'
    188         r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
    189         r'|(?<=\+)(%(space)s)+(?=\+)'
    190         r'|(?<=-)(%(space)s)+(?=-)'
    191         r'|%(space)s+'
    192         r'|(?:%(newline)s%(space)s*)+'
    193     ) % locals()).sub
    194     #print space_sub.__self__.pattern
    195 
    196     def space_subber(match):
    197         """ Substitution callback """
    198         # pylint: disable = C0321, R0911
    199         groups = match.groups()
    200         if groups[0]: return groups[0]
    201         elif groups[1]: return groups[1]
    202         elif groups[2]: return groups[2]
    203         elif groups[3]: return groups[3]
    204         elif groups[4]: return '\n'
    205         elif groups[5] or groups[6] or groups[7]: return ' '
    206         else: return ''
    207 
    208     def jsmin(script): # pylint: disable = W0621
    209         r"""
    210         Minify javascript based on `jsmin.c by Douglas Crockford`_\.
    211 
    212         Instead of parsing the stream char by char, it uses a regular
    213         expression approach which minifies the whole script with one big
    214         substitution regex.
    215 
    216         .. _jsmin.c by Douglas Crockford:
    217            http://www.crockford.com/javascript/jsmin.c
    218 
    219         :Parameters:
    220           `script` : ``str``
    221             Script to minify
    222 
    223         :Return: Minified script
    224         :Rtype: ``str``
    225         """
    226         return space_sub(space_subber, '\n%s\n' % script).strip()
    227 
    228     return jsmin
    229 
    230 jsmin = _make_jsmin()
    231 
    232 
    233 def jsmin_for_posers(script):
    234     r"""
    235     Minify javascript based on `jsmin.c by Douglas Crockford`_\.
    236 
    237     Instead of parsing the stream char by char, it uses a regular
    238     expression approach which minifies the whole script with one big
    239     substitution regex.
    240 
    241     .. _jsmin.c by Douglas Crockford:
    242        http://www.crockford.com/javascript/jsmin.c
    243 
    244     :Warning: This function is the digest of a _make_jsmin() call. It just
    245               utilizes the resulting regex. It's just for fun here and may
    246               vanish any time. Use the `jsmin` function instead.
    247 
    248     :Parameters:
    249       `script` : ``str``
    250         Script to minify
    251 
    252     :Return: Minified script
    253     :Rtype: ``str``
    254     """
    255     def subber(match):
    256         """ Substitution callback """
    257         groups = match.groups()
    258         return (
    259             groups[0] or
    260             groups[1] or
    261             groups[2] or
    262             groups[3] or
    263             (groups[4] and '\n') or
    264             (groups[5] and ' ') or
    265             (groups[6] and ' ') or
    266             (groups[7] and ' ') or
    267             ''
    268         )
    269 
    270     return _re.sub(
    271         r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?'
    272         r'\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|'
    273         r'\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r\n])(?'
    274         r':[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*'
    275         r'(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*'
    276         r'[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
    277         r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\['
    278         r'\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return'
    279         r')(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/'
    280         r'))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:'
    281         r'/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?'
    282         r':(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
    283         r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|'
    284         r'~])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)'
    285         r'*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]'
    286         r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,./'
    287         r':-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\013\01'
    288         r'4\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:'
    289         r'-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*'
    290         r'\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-'
    291         r'\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
    292         r'\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?:(?://[^'
    293         r'\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^'
    294         r'/*][^*]*\*+)*/))*)+', subber, '\n%s\n' % script
    295     ).strip()
    296 
    297 
    298 if __name__ == '__main__':
    299     import sys as _sys
    300     _sys.stdout.write(jsmin(_sys.stdin.read()))
    301