1 /* 2 * Copyright 2011 - 2015 3 * Andr\xe9 Malo or his licensors, as applicable 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 #include "cext.h" 19 EXT_INIT_FUNC; 20 21 #define RJSMIN_DULL_BIT (1 << 0) 22 #define RJSMIN_PRE_REGEX_BIT (1 << 1) 23 #define RJSMIN_REGEX_DULL_BIT (1 << 2) 24 #define RJSMIN_REGEX_CC_DULL_BIT (1 << 3) 25 #define RJSMIN_ID_LIT_BIT (1 << 4) 26 #define RJSMIN_ID_LIT_O_BIT (1 << 5) 27 #define RJSMIN_ID_LIT_C_BIT (1 << 6) 28 #define RJSMIN_STRING_DULL_BIT (1 << 7) 29 #define RJSMIN_SPACE_BIT (1 << 8) 30 #define RJSMIN_POST_REGEX_OFF_BIT (1 << 9) 31 32 #ifdef EXT3 33 typedef Py_UNICODE rchar; 34 #else 35 typedef unsigned char rchar; 36 #endif 37 #define U(c) ((rchar)(c)) 38 39 #define RJSMIN_IS_DULL(c) ((U(c) > 127) || \ 40 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_DULL_BIT)) 41 42 #define RJSMIN_IS_REGEX_DULL(c) ((U(c) > 127) || \ 43 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_DULL_BIT)) 44 45 #define RJSMIN_IS_REGEX_CC_DULL(c) ((U(c) > 127) || \ 46 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_CC_DULL_BIT)) 47 48 #define RJSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \ 49 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_STRING_DULL_BIT)) 50 51 #define RJSMIN_IS_ID_LITERAL(c) ((U(c) > 127) || \ 52 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_BIT)) 53 54 #define RJSMIN_IS_ID_LITERAL_OPEN(c) ((U(c) > 127) || \ 55 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_O_BIT)) 56 57 #define RJSMIN_IS_ID_LITERAL_CLOSE(c) ((U(c) > 127) || \ 58 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_C_BIT)) 59 60 #define RJSMIN_IS_POST_REGEX_OFF(c) ((U(c) > 127) || \ 61 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_POST_REGEX_OFF_BIT)) 62 63 #define RJSMIN_IS_SPACE(c) ((U(c) <= 127) && \ 64 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_SPACE_BIT)) 65 66 #define RJSMIN_IS_PRE_REGEX_1(c) ((U(c) <= 127) && \ 67 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_PRE_REGEX_BIT)) 68 69 70 static const unsigned short rjsmin_charmask[128] = { 71 396, 396, 396, 396, 396, 396, 396, 396, 72 396, 396, 2, 396, 396, 2, 396, 396, 73 396, 396, 396, 396, 396, 396, 396, 396, 74 396, 396, 396, 396, 396, 396, 396, 396, 75 396, 687, 588, 653, 765, 653, 143, 588, 76 687, 205, 653, 237, 143, 237, 141, 648, 77 765, 765, 765, 765, 765, 765, 765, 765, 78 765, 765, 143, 143, 653, 143, 653, 143, 79 653, 765, 765, 765, 765, 765, 765, 765, 80 765, 765, 765, 765, 765, 765, 765, 765, 81 765, 765, 765, 765, 765, 765, 765, 765, 82 765, 765, 765, 683, 513, 197, 653, 765, 83 653, 765, 765, 765, 765, 765, 765, 765, 84 765, 765, 765, 765, 765, 765, 765, 765, 85 765, 765, 765, 765, 765, 765, 765, 765, 86 765, 765, 765, 687, 143, 207, 653, 765 87 }; 88 89 static Py_ssize_t 90 rjsmin(const rchar *source, rchar *target, Py_ssize_t length, 91 int keep_bang_comments) 92 { 93 const rchar *reset, *pcreset = NULL, *pctoken = NULL, *xtarget, 94 *sentinel = source + length; 95 rchar *tstart = target; 96 int post_regex = 0; 97 rchar c, quote, spaced = U(' '); 98 99 while (source < sentinel) { 100 c = *source++; 101 if (RJSMIN_IS_DULL(c)) { 102 if (post_regex) post_regex = 0; 103 if (pctoken) pctoken = NULL; 104 if (spaced == U('\n')) spaced = U(' '); 105 106 *target++ = c; 107 continue; 108 } 109 switch (c) { 110 111 /* String */ 112 case U('\''): case U('"'): 113 if (post_regex) post_regex = 0; 114 if (pctoken) pctoken = NULL; 115 if (spaced == U('\n')) spaced = U(' '); 116 117 reset = source; 118 *target++ = quote = c; 119 while (source < sentinel) { 120 c = *source++; 121 *target++ = c; 122 if (RJSMIN_IS_STRING_DULL(c)) 123 continue; 124 switch (c) { 125 case U('\''): case U('"'): 126 if (c == quote) 127 goto cont; 128 continue; 129 case U('\\'): 130 if (source < sentinel) { 131 c = *source++; 132 *target++ = c; 133 if (c == U('\r') && source < sentinel 134 && *source == U('\n')) 135 *target++ = *source++; 136 } 137 continue; 138 } 139 break; 140 } 141 target -= source - reset; 142 source = reset; 143 continue; 144 145 /* Comment or Regex or something else entirely */ 146 case U('/'): 147 if (!(source < sentinel)) { 148 if (post_regex) post_regex = 0; 149 if (pctoken) pctoken = NULL; 150 if (spaced == U('\n')) spaced = U(' '); 151 152 *target++ = c; 153 } 154 else { 155 switch (*source) { 156 /* Comment */ 157 case U('*'): case U('/'): 158 goto skip_or_copy_ws; 159 160 default: 161 xtarget = NULL; 162 if ( target == tstart 163 || RJSMIN_IS_PRE_REGEX_1(*((pctoken ? pctoken : target) 164 - 1)) 165 || ( 166 (xtarget = pctoken ? pctoken : target) 167 && (xtarget - tstart >= 6) 168 && *(xtarget - 1) == U('n') 169 && *(xtarget - 2) == U('r') 170 && *(xtarget - 3) == U('u') 171 && *(xtarget - 4) == U('t') 172 && *(xtarget - 5) == U('e') 173 && *(xtarget - 6) == U('r') 174 && ( 175 xtarget - tstart == 6 176 || !RJSMIN_IS_ID_LITERAL(*(xtarget - 7)) 177 ) 178 )) { 179 180 /* Regex */ 181 if (post_regex) post_regex = 0; 182 if (pctoken) pctoken = NULL; 183 184 reset = source; 185 if (spaced == U('\n')) { 186 spaced = U(' '); 187 if (xtarget) 188 *target++ = U('\n'); 189 } 190 191 *target++ = U('/'); 192 while (source < sentinel) { 193 c = *source++; 194 *target++ = c; 195 if (RJSMIN_IS_REGEX_DULL(c)) 196 continue; 197 switch (c) { 198 case U('/'): 199 post_regex = 1; 200 goto cont; 201 case U('\\'): 202 if (source < sentinel) { 203 c = *source++; 204 *target++ = c; 205 if (c == U('\r') || c == U('\n')) 206 break; 207 } 208 continue; 209 case U('['): 210 while (source < sentinel) { 211 c = *source++; 212 *target++ = c; 213 if (RJSMIN_IS_REGEX_CC_DULL(c)) 214 continue; 215 switch (c) { 216 case U('\\'): 217 if (source < sentinel) { 218 c = *source++; 219 *target++ = c; 220 if (c == U('\r') || c == U('\n')) 221 break; 222 } 223 continue; 224 case U(']'): 225 goto cont_regex; 226 } 227 } 228 break; 229 } 230 break; 231 cont_regex: 232 continue; 233 } 234 target -= source - reset; 235 source = reset; 236 } 237 else { 238 /* Just a slash */ 239 if (post_regex) post_regex = 0; 240 if (pctoken) pctoken = NULL; 241 if (spaced == U('\n')) spaced = U(' '); 242 243 *target++ = c; 244 } 245 continue; 246 } 247 } 248 continue; 249 250 /* Whitespace */ 251 default: 252 skip_or_copy_ws: 253 quote = U(' '); 254 --source; 255 while (source < sentinel) { 256 c = *source++; 257 if (RJSMIN_IS_SPACE(c)) 258 continue; 259 switch (c) { 260 case U('\r'): case U('\n'): 261 quote = U('\n'); 262 continue; 263 case U('/'): 264 if (source < sentinel) { 265 switch (*source) { 266 case U('*'): 267 reset = source++; 268 /* copy bang comment, if requested */ 269 if ( keep_bang_comments && source < sentinel 270 && *source == U('!')) { 271 if (!pctoken) { 272 pctoken = target; 273 pcreset = reset; 274 } 275 276 *target++ = U('/'); 277 *target++ = U('*'); 278 *target++ = *source++; 279 while (source < sentinel) { 280 c = *source++; 281 *target++ = c; 282 if (c == U('*') && source < sentinel 283 && *source == U('/')) { 284 *target++ = *source++; 285 reset = NULL; 286 break; 287 } 288 } 289 if (!reset) 290 continue; 291 292 target -= source - reset; 293 source = reset; 294 if (pcreset == reset) { 295 pctoken = NULL; 296 pcreset = NULL; 297 } 298 299 } 300 /* strip regular comment */ 301 else { 302 while (source < sentinel) { 303 c = *source++; 304 if (c == U('*') && source < sentinel 305 && *source == U('/')) { 306 ++source; 307 reset = NULL; 308 break; 309 } 310 } 311 if (!reset) 312 continue; 313 source = reset; 314 *target++ = U('/'); 315 } 316 goto cont; 317 case U('/'): 318 ++source; 319 while (source < sentinel) { 320 c = *source++; 321 switch (c) { 322 case U('\n'): 323 break; 324 case U('\r'): 325 if (source < sentinel 326 && *source == U('\n')) 327 ++source; 328 break; 329 default: 330 continue; 331 } 332 break; 333 } 334 quote = U('\n'); 335 continue; 336 } 337 } 338 } 339 --source; 340 break; 341 } 342 343 if ((tstart < (pctoken ? pctoken : target) && source < sentinel) 344 && ((quote == U('\n') 345 && ((RJSMIN_IS_ID_LITERAL_CLOSE(*((pctoken ? 346 pctoken : target) - 1)) 347 && RJSMIN_IS_ID_LITERAL_OPEN(*source)) 348 || (post_regex 349 && RJSMIN_IS_POST_REGEX_OFF(*source) 350 && !(post_regex = 0)))) 351 || 352 (quote == U(' ') && !pctoken 353 && ((RJSMIN_IS_ID_LITERAL(*(target - 1)) 354 && RJSMIN_IS_ID_LITERAL(*source)) 355 || (source < sentinel 356 && ((*(target - 1) == U('+') 357 && *source == U('+')) 358 || (*(target - 1) == U('-') 359 && *source == U('-')))))))) { 360 *target++ = quote; 361 } 362 363 pcreset = NULL; 364 spaced = quote; 365 } 366 cont: 367 continue; 368 } 369 return (Py_ssize_t)(target - tstart); 370 } 371 372 373 PyDoc_STRVAR(rjsmin_jsmin__doc__, 374 "jsmin(script, keep_bang_comments=False)\n\ 375 \n\ 376 Minify javascript based on `jsmin.c by Douglas Crockford`_\\.\n\ 377 \n\ 378 Instead of parsing the stream char by char, it uses a regular\n\ 379 expression approach which minifies the whole script with one big\n\ 380 substitution regex.\n\ 381 \n\ 382 .. _jsmin.c by Douglas Crockford:\n\ 383 http://www.crockford.com/javascript/jsmin.c\n\ 384 \n\ 385 :Note: This is a hand crafted C implementation built on the regex\n\ 386 semantics.\n\ 387 \n\ 388 :Parameters:\n\ 389 `script` : ``str``\n\ 390 Script to minify\n\ 391 \n\ 392 `keep_bang_comments` : ``bool``\n\ 393 Keep comments starting with an exclamation mark? (``/*!...*/``)\n\ 394 \n\ 395 :Return: Minified script\n\ 396 :Rtype: ``str``"); 397 398 static PyObject * 399 rjsmin_jsmin(PyObject *self, PyObject *args, PyObject *kwds) 400 { 401 PyObject *script, *keep_bang_comments_ = NULL, *result; 402 static char *kwlist[] = {"script", "keep_bang_comments", NULL}; 403 Py_ssize_t slength, length; 404 int keep_bang_comments; 405 #ifdef EXT2 406 int uni; 407 #define UOBJ "O" 408 #endif 409 #ifdef EXT3 410 #define UOBJ "U" 411 #endif 412 413 if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist, 414 &script, &keep_bang_comments_)) 415 return NULL; 416 417 if (!keep_bang_comments_) 418 keep_bang_comments = 0; 419 else { 420 keep_bang_comments = PyObject_IsTrue(keep_bang_comments_); 421 if (keep_bang_comments == -1) 422 return NULL; 423 } 424 425 #ifdef EXT2 426 if (PyUnicode_Check(script)) { 427 if (!(script = PyUnicode_AsUTF8String(script))) 428 return NULL; 429 uni = 1; 430 } 431 else { 432 if (!(script = PyObject_Str(script))) 433 return NULL; 434 uni = 0; 435 } 436 #endif 437 438 #ifdef EXT3 439 Py_INCREF(script); 440 #define PyString_GET_SIZE PyUnicode_GET_SIZE 441 #define PyString_AS_STRING PyUnicode_AS_UNICODE 442 #define _PyString_Resize PyUnicode_Resize 443 #define PyString_FromStringAndSize PyUnicode_FromUnicode 444 #endif 445 446 slength = PyString_GET_SIZE(script); 447 if (!(result = PyString_FromStringAndSize(NULL, slength))) { 448 Py_DECREF(script); 449 return NULL; 450 } 451 Py_BEGIN_ALLOW_THREADS 452 length = rjsmin((rchar *)PyString_AS_STRING(script), 453 (rchar *)PyString_AS_STRING(result), 454 slength, keep_bang_comments); 455 Py_END_ALLOW_THREADS 456 457 Py_DECREF(script); 458 if (length < 0) { 459 Py_DECREF(result); 460 return NULL; 461 } 462 if (length != slength && _PyString_Resize(&result, length) == -1) 463 return NULL; 464 465 #ifdef EXT2 466 if (uni) { 467 script = PyUnicode_DecodeUTF8(PyString_AS_STRING(result), 468 PyString_GET_SIZE(result), "strict"); 469 Py_DECREF(result); 470 if (!script) 471 return NULL; 472 result = script; 473 } 474 #endif 475 return result; 476 } 477 478 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */ 479 480 EXT_METHODS = { 481 {"jsmin", 482 (PyCFunction)rjsmin_jsmin, METH_VARARGS | METH_KEYWORDS, 483 rjsmin_jsmin__doc__}, 484 485 {NULL} /* Sentinel */ 486 }; 487 488 PyDoc_STRVAR(EXT_DOCS_VAR, 489 "C implementation of rjsmin\n\ 490 ==========================\n\ 491 \n\ 492 C implementation of rjsmin."); 493 494 495 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR); 496 497 EXT_INIT_FUNC { 498 PyObject *m; 499 500 /* Create the module and populate stuff */ 501 if (!(m = EXT_CREATE(&EXT_DEFINE_VAR))) 502 EXT_INIT_ERROR(NULL); 503 504 EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1"); 505 EXT_ADD_STRING(m, "__docformat__", "restructuredtext en"); 506 507 EXT_INIT_RETURN(m); 508 } 509 510 /* ------------------------- END MODULE DEFINITION ------------------------- */ 511