1 /* 2 * Copyright 2011 - 2014 3 * Andr\xe9 Malo or his licensors, as applicable 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 #include "cext.h" 19 EXT_INIT_FUNC; 20 21 #ifdef EXT3 22 typedef Py_UNICODE rchar; 23 #else 24 typedef unsigned char rchar; 25 #endif 26 #define U(c) ((rchar)(c)) 27 28 typedef struct { 29 const rchar *start; 30 const rchar *sentinel; 31 const rchar *tsentinel; 32 Py_ssize_t at_group; 33 int in_macie5; 34 int in_rule; 35 int keep_bang_comments; 36 } rcssmin_ctx_t; 37 38 typedef enum { 39 NEED_SPACE_MAYBE = 0, 40 NEED_SPACE_NEVER 41 } need_space_flag; 42 43 44 #define RCSSMIN_DULL_BIT (1 << 0) 45 #define RCSSMIN_HEX_BIT (1 << 1) 46 #define RCSSMIN_ESC_BIT (1 << 2) 47 #define RCSSMIN_SPACE_BIT (1 << 3) 48 #define RCSSMIN_STRING_DULL_BIT (1 << 4) 49 #define RCSSMIN_NMCHAR_BIT (1 << 5) 50 #define RCSSMIN_URI_DULL_BIT (1 << 6) 51 #define RCSSMIN_PRE_CHAR_BIT (1 << 7) 52 #define RCSSMIN_POST_CHAR_BIT (1 << 8) 53 54 static const unsigned short rcssmin_charmask[128] = { 55 21, 21, 21, 21, 21, 21, 21, 21, 56 21, 28, 8, 21, 8, 8, 21, 21, 57 21, 21, 21, 21, 21, 21, 21, 21, 58 21, 21, 21, 21, 21, 21, 21, 21, 59 28, 469, 4, 85, 85, 85, 85, 4, 60 149, 277, 85, 469, 469, 117, 85, 84, 61 115, 115, 115, 115, 115, 115, 115, 115, 62 115, 115, 468, 340, 85, 469, 468, 85, 63 84, 115, 115, 115, 115, 115, 115, 117, 64 117, 117, 117, 117, 117, 117, 117, 117, 65 117, 117, 117, 117, 117, 117, 117, 117, 66 117, 117, 117, 213, 4, 341, 85, 117, 67 85, 115, 115, 115, 115, 115, 115, 117, 68 117, 117, 117, 117, 117, 117, 117, 117, 69 117, 117, 117, 117, 117, 116, 117, 117, 70 117, 117, 117, 468, 85, 468, 85, 21 71 }; 72 73 #define RCSSMIN_IS_DULL(c) ((U(c) > 127) || \ 74 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_DULL_BIT)) 75 76 #define RCSSMIN_IS_HEX(c) ((U(c) <= 127) && \ 77 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_HEX_BIT)) 78 79 #define RCSSMIN_IS_ESC(c) ((U(c) > 127) || \ 80 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_ESC_BIT)) 81 82 #define RCSSMIN_IS_SPACE(c) ((U(c) <= 127) && \ 83 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_SPACE_BIT)) 84 85 #define RCSSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \ 86 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_STRING_DULL_BIT)) 87 88 #define RCSSMIN_IS_NMCHAR(c) ((U(c) > 127) || \ 89 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_NMCHAR_BIT)) 90 91 #define RCSSMIN_IS_URI_DULL(c) ((U(c) > 127) || \ 92 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_URI_DULL_BIT)) 93 94 #define RCSSMIN_IS_PRE_CHAR(c) ((U(c) <= 127) && \ 95 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_PRE_CHAR_BIT)) 96 97 #define RCSSMIN_IS_POST_CHAR(c) ((U(c) <= 127) && \ 98 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_POST_CHAR_BIT)) 99 100 101 static const rchar pattern_url[] = { 102 /*U('u'),*/ U('r'), U('l'), U('(') 103 }; 104 105 static const rchar pattern_ie7[] = { 106 /*U('>'),*/ U('/'), U('*'), U('*'), U('/') 107 }; 108 109 static const rchar pattern_media[] = { 110 U('m'), U('e'), U('d'), U('i'), U('a'), 111 U('M'), U('E'), U('D'), U('I'), U('A') 112 }; 113 114 static const rchar pattern_document[] = { 115 U('d'), U('o'), U('c'), U('u'), U('m'), U('e'), U('n'), U('t'), 116 U('D'), U('O'), U('C'), U('U'), U('M'), U('E'), U('N'), U('T') 117 }; 118 119 static const rchar pattern_supports[] = { 120 U('s'), U('u'), U('p'), U('p'), U('o'), U('r'), U('t'), U('s'), 121 U('S'), U('U'), U('P'), U('P'), U('O'), U('R'), U('T'), U('S') 122 }; 123 124 static const rchar pattern_keyframes[] = { 125 U('k'), U('e'), U('y'), U('f'), U('r'), U('a'), U('m'), U('e'), U('s'), 126 U('K'), U('E'), U('Y'), U('F'), U('R'), U('A'), U('M'), U('E'), U('S') 127 }; 128 129 static const rchar pattern_vendor_o[] = { 130 U('-'), U('o'), U('-'), 131 U('-'), U('O'), U('-') 132 }; 133 134 static const rchar pattern_vendor_moz[] = { 135 U('-'), U('m'), U('o'), U('z'), U('-'), 136 U('-'), U('M'), U('O'), U('Z'), U('-') 137 }; 138 139 static const rchar pattern_vendor_webkit[] = { 140 U('-'), U('w'), U('e'), U('b'), U('k'), U('i'), U('t'), U('-'), 141 U('-'), U('W'), U('E'), U('B'), U('K'), U('I'), U('T'), U('-') 142 }; 143 144 static const rchar pattern_vendor_ms[] = { 145 U('-'), U('m'), U('s'), U('-'), 146 U('-'), U('M'), U('S'), U('-') 147 }; 148 149 static const rchar pattern_first[] = { 150 U('f'), U('i'), U('r'), U('s'), U('t'), U('-'), U('l'), 151 U('F'), U('I'), U('R'), U('S'), U('T'), U('-'), U('L') 152 }; 153 154 static const rchar pattern_line[] = { 155 U('i'), U('n'), U('e'), 156 U('I'), U('N'), U('E'), 157 }; 158 159 static const rchar pattern_letter[] = { 160 U('e'), U('t'), U('t'), U('e'), U('r'), 161 U('E'), U('T'), U('T'), U('E'), U('R') 162 }; 163 164 static const rchar pattern_macie5_init[] = { 165 U('/'), U('*'), U('\\'), U('*'), U('/') 166 }; 167 168 static const rchar pattern_macie5_exit[] = { 169 U('/'), U('*'), U('*'), U('/') 170 }; 171 172 /* 173 * Match a pattern (and copy immediately to target) 174 */ 175 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) 176 #pragma GCC diagnostic push 177 #pragma GCC diagnostic ignored "-Wstrict-overflow" 178 #endif 179 static int 180 copy_match(const rchar *pattern, const rchar *psentinel, 181 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 182 { 183 const rchar *source = *source_; 184 rchar *target = *target_; 185 rchar c; 186 187 while (pattern < psentinel 188 && source < ctx->sentinel && target < ctx->tsentinel 189 && ((c = *source++) == *pattern++)) 190 *target++ = c; 191 192 *source_ = source; 193 *target_ = target; 194 195 return (pattern == psentinel); 196 } 197 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) 198 #pragma GCC diagnostic pop 199 #endif 200 201 #define MATCH(PAT, source, target, ctx) ( \ 202 copy_match(pattern_##PAT, \ 203 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \ 204 source, target, ctx) \ 205 ) 206 207 208 /* 209 * Match a pattern (and copy immediately to target) - CI version 210 */ 211 static int 212 copy_imatch(const rchar *pattern, const rchar *psentinel, 213 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 214 { 215 const rchar *source = *source_, *pstart = pattern; 216 rchar *target = *target_; 217 rchar c; 218 219 while (pattern < psentinel 220 && source < ctx->sentinel && target < ctx->tsentinel 221 && ((c = *source++) == *pattern 222 || c == pstart[(pattern - pstart) + (psentinel - pstart)])) { 223 ++pattern; 224 *target++ = c; 225 } 226 227 *source_ = source; 228 *target_ = target; 229 230 return (pattern == psentinel); 231 } 232 233 #define IMATCH(PAT, source, target, ctx) ( \ 234 copy_imatch(pattern_##PAT, \ 235 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar) / 2, \ 236 source, target, ctx) \ 237 ) 238 239 240 /* 241 * Copy characters 242 */ 243 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) 244 #pragma GCC diagnostic push 245 #pragma GCC diagnostic ignored "-Wstrict-overflow" 246 #endif 247 static int 248 copy(const rchar *source, const rchar *sentinel, rchar **target_, 249 rcssmin_ctx_t *ctx) 250 { 251 rchar *target = *target_; 252 253 while (source < sentinel && target < ctx->tsentinel) 254 *target++ = *source++; 255 256 *target_ = target; 257 258 return (source == sentinel); 259 } 260 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) 261 #pragma GCC diagnostic pop 262 #endif 263 264 #define COPY_PAT(PAT, target, ctx) ( \ 265 copy(pattern_##PAT, \ 266 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \ 267 target, ctx) \ 268 ) 269 270 271 /* 272 * The ABORT macros work with known local variables! 273 */ 274 #define ABORT_(RET) do { \ 275 if (source < ctx->sentinel && !(target < ctx->tsentinel)) { \ 276 *source_ = source; \ 277 *target_ = target; \ 278 } \ 279 return RET; \ 280 } while(0) 281 282 283 #define CRAPPY_C90_COMPATIBLE_EMPTY 284 #define ABORT ABORT_(CRAPPY_C90_COMPATIBLE_EMPTY) 285 #define RABORT(RET) ABORT_((RET)) 286 287 288 /* 289 * Copy escape 290 */ 291 static void 292 copy_escape(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 293 { 294 const rchar *source = *source_, *hsentinel; 295 rchar *target = *target_; 296 rchar c; 297 298 *target++ = U('\\'); 299 *target_ = target; 300 301 if (source < ctx->sentinel && target < ctx->tsentinel) { 302 c = *source++; 303 if (RCSSMIN_IS_ESC(c)) { 304 *target++ = c; 305 } 306 else if (RCSSMIN_IS_HEX(c)) { 307 *target++ = c; 308 309 /* 6 hex chars max, one we got already */ 310 if (ctx->sentinel - source > 5) 311 hsentinel = source + 5; 312 else 313 hsentinel = ctx->sentinel; 314 315 while (source < hsentinel && target < ctx->tsentinel 316 && (c = *source, RCSSMIN_IS_HEX(c))) { 317 ++source; 318 *target++ = c; 319 } 320 321 /* One optional space after */ 322 if (source < ctx->sentinel && target < ctx->tsentinel) { 323 if (source == hsentinel) 324 c = *source; 325 if (RCSSMIN_IS_SPACE(c)) { 326 ++source; 327 *target++ = U(' '); 328 if (c == U('\r') && source < ctx->sentinel 329 && *source == U('\n')) 330 ++source; 331 } 332 } 333 } 334 } 335 336 *target_ = target; 337 *source_ = source; 338 } 339 340 341 /* 342 * Copy string 343 */ 344 static void 345 copy_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 346 { 347 const rchar *source = *source_; 348 rchar *target = *target_; 349 rchar c, quote = source[-1]; 350 351 *target++ = quote; 352 *target_ = target; 353 354 while (source < ctx->sentinel && target < ctx->tsentinel) { 355 c = *target++ = *source++; 356 if (RCSSMIN_IS_STRING_DULL(c)) 357 continue; 358 359 switch (c) { 360 case U('\''): case U('"'): 361 if (c == quote) { 362 *target_ = target; 363 *source_ = source; 364 return; 365 } 366 continue; 367 368 case U('\\'): 369 if (source < ctx->sentinel && target < ctx->tsentinel) { 370 c = *source++; 371 switch (c) { 372 case U('\r'): 373 if (source < ctx->sentinel && *source == U('\n')) 374 ++source; 375 /* fall through */ 376 377 case U('\n'): case U('\f'): 378 --target; 379 break; 380 381 default: 382 *target++ = c; 383 } 384 } 385 continue; 386 } 387 break; /* forbidden characters */ 388 } 389 390 ABORT; 391 } 392 393 394 /* 395 * Copy URI string 396 */ 397 static int 398 copy_uri_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 399 { 400 const rchar *source = *source_; 401 rchar *target = *target_; 402 rchar c, quote = source[-1]; 403 404 *target++ = quote; 405 *target_ = target; 406 407 while (source < ctx->sentinel && target < ctx->tsentinel) { 408 c = *source++; 409 if (RCSSMIN_IS_SPACE(c)) 410 continue; 411 *target++ = c; 412 if (RCSSMIN_IS_STRING_DULL(c)) 413 continue; 414 415 switch (c) { 416 case U('\''): case U('"'): 417 if (c == quote) { 418 *target_ = target; 419 *source_ = source; 420 return 0; 421 } 422 continue; 423 424 case U('\\'): 425 if (source < ctx->sentinel && target < ctx->tsentinel) { 426 c = *source; 427 switch (c) { 428 case U('\r'): 429 if ((source + 1) < ctx->sentinel && source[1] == U('\n')) 430 ++source; 431 /* fall through */ 432 433 case U('\n'): case U('\f'): 434 --target; 435 ++source; 436 break; 437 438 default: 439 --target; 440 copy_escape(&source, &target, ctx); 441 } 442 } 443 continue; 444 } 445 446 break; /* forbidden characters */ 447 } 448 449 RABORT(-1); 450 } 451 452 453 /* 454 * Copy URI (unquoted) 455 */ 456 static int 457 copy_uri_unquoted(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 458 { 459 const rchar *source = *source_; 460 rchar *target = *target_; 461 rchar c; 462 463 *target++ = source[-1]; 464 *target_ = target; 465 466 while (source < ctx->sentinel && target < ctx->tsentinel) { 467 c = *source++; 468 if (RCSSMIN_IS_SPACE(c)) 469 continue; 470 *target++ = c; 471 if (RCSSMIN_IS_URI_DULL(c)) 472 continue; 473 474 switch (c) { 475 476 case U(')'): 477 *target_ = target - 1; 478 *source_ = source - 1; 479 return 0; 480 481 case U('\\'): 482 if (source < ctx->sentinel && target < ctx->tsentinel) { 483 c = *source; 484 switch (c) { 485 case U('\r'): 486 if ((source + 1) < ctx->sentinel && source[1] == U('\n')) 487 ++source; 488 /* fall through */ 489 490 case U('\n'): case U('\f'): 491 --target; 492 ++source; 493 break; 494 495 default: 496 --target; 497 copy_escape(&source, &target, ctx); 498 } 499 } 500 continue; 501 } 502 503 break; /* forbidden characters */ 504 } 505 506 RABORT(-1); 507 } 508 509 510 /* 511 * Copy url 512 */ 513 static void 514 copy_url(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 515 { 516 const rchar *source = *source_; 517 rchar *target = *target_; 518 rchar c; 519 520 *target++ = U('u'); 521 *target_ = target; 522 523 /* Must not be inside an identifier */ 524 if ((source != ctx->start + 1) && RCSSMIN_IS_NMCHAR(source[-2])) 525 return; 526 527 if (!MATCH(url, &source, &target, ctx) 528 || !(source < ctx->sentinel && target < ctx->tsentinel)) 529 ABORT; 530 531 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source)) 532 ++source; 533 534 if (!(source < ctx->sentinel)) 535 ABORT; 536 537 c = *source++; 538 switch (c) { 539 case U('"'): case U('\''): 540 if (copy_uri_string(&source, &target, ctx) == -1) 541 ABORT; 542 543 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source)) 544 ++source; 545 break; 546 547 default: 548 if (copy_uri_unquoted(&source, &target, ctx) == -1) 549 ABORT; 550 } 551 552 if (!(source < ctx->sentinel && target < ctx->tsentinel)) 553 ABORT; 554 555 if ((*target++ = *source++) != U(')')) 556 ABORT; 557 558 *target_ = target; 559 *source_ = source; 560 } 561 562 563 /* 564 * Copy @-group 565 */ 566 static void 567 copy_at_group(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 568 { 569 const rchar *source = *source_; 570 rchar *target = *target_; 571 572 *target++ = U('@'); 573 *target_ = target; 574 575 #define REMATCH(what) ( \ 576 source = *source_, \ 577 target = *target_, \ 578 IMATCH(what, &source, &target, ctx) \ 579 ) 580 #define CMATCH(what) IMATCH(what, &source, &target, ctx) 581 582 if (( !CMATCH(media) 583 && !REMATCH(supports) 584 && !REMATCH(document) 585 && !REMATCH(keyframes) 586 && !(REMATCH(vendor_webkit) && CMATCH(keyframes)) 587 && !(REMATCH(vendor_moz) && CMATCH(keyframes)) 588 && !(REMATCH(vendor_o) && CMATCH(keyframes)) 589 && !(REMATCH(vendor_ms) && CMATCH(keyframes))) 590 || !(source < ctx->sentinel && target < ctx->tsentinel) 591 || RCSSMIN_IS_NMCHAR(*source)) 592 ABORT; 593 594 #undef CMATCH 595 #undef REMATCH 596 597 ++ctx->at_group; 598 599 *target_ = target; 600 *source_ = source; 601 } 602 603 604 /* 605 * Skip space 606 */ 607 static const rchar * 608 skip_space(const rchar *source, rcssmin_ctx_t *ctx) 609 { 610 const rchar *begin = source; 611 int res; 612 rchar c; 613 614 while (source < ctx->sentinel) { 615 c = *source; 616 if (RCSSMIN_IS_SPACE(c)) { 617 ++source; 618 continue; 619 } 620 else if (c == U('/')) { 621 ++source; 622 if (!(source < ctx->sentinel && *source == U('*'))) { 623 --source; 624 break; 625 } 626 ++source; 627 res = 0; 628 while (source < ctx->sentinel) { 629 c = *source++; 630 if (c != U('*')) 631 continue; 632 if (!(source < ctx->sentinel)) 633 return begin; 634 if (*source != U('/')) 635 continue; 636 637 /* Comment complete */ 638 ++source; 639 res = 1; 640 break; 641 } 642 if (!res) 643 return begin; 644 645 continue; 646 } 647 648 break; 649 } 650 651 return source; 652 } 653 654 655 /* 656 * Copy space 657 */ 658 static void 659 copy_space(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx, 660 need_space_flag need_space) 661 { 662 const rchar *source = *source_, *end, *comment; 663 rchar *target = *target_; 664 int res; 665 rchar c; 666 667 --source; 668 if (need_space == NEED_SPACE_MAYBE 669 && source > ctx->start 670 && !RCSSMIN_IS_PRE_CHAR(source[-1]) 671 && (end = skip_space(source, ctx)) < ctx->sentinel 672 && (!RCSSMIN_IS_POST_CHAR(*end) 673 || (*end == U(':') && !ctx->in_rule && !ctx->at_group))) { 674 675 if (!(target < ctx->tsentinel)) 676 ABORT; 677 *target++ = U(' '); 678 } 679 680 while (source < ctx->sentinel) { 681 switch (c = *source) { 682 683 /* comment */ 684 case U('/'): 685 comment = source++; 686 if (!((source < ctx->sentinel && *source == U('*')))) { 687 --source; 688 break; 689 } 690 ++source; 691 res = 0; 692 while (source < ctx->sentinel) { 693 c = *source++; 694 if (c != U('*')) 695 continue; 696 if (!(source < ctx->sentinel)) 697 ABORT; 698 if (*source != U('/')) 699 continue; 700 701 /* Comment complete */ 702 ++source; 703 res = 1; 704 705 if (ctx->keep_bang_comments && comment[2] == U('!')) { 706 ctx->in_macie5 = (source[-3] == U('\\')); 707 if (!copy(comment, source, &target, ctx)) 708 ABORT; 709 } 710 else if (source[-3] == U('\\')) { 711 if (!ctx->in_macie5) { 712 if (!COPY_PAT(macie5_init, &target, ctx)) 713 ABORT; 714 } 715 ctx->in_macie5 = 1; 716 } 717 else if (ctx->in_macie5) { 718 if (!COPY_PAT(macie5_exit, &target, ctx)) 719 ABORT; 720 ctx->in_macie5 = 0; 721 } 722 /* else don't copy anything */ 723 break; 724 } 725 if (!res) 726 ABORT; 727 continue; 728 729 /* space */ 730 case U(' '): case U('\t'): case U('\r'): case U('\n'): case U('\f'): 731 ++source; 732 continue; 733 } 734 735 break; 736 } 737 738 *source_ = source; 739 *target_ = target; 740 } 741 742 743 /* 744 * Copy space if comment 745 */ 746 static int 747 copy_space_comment(const rchar **source_, rchar **target_, 748 rcssmin_ctx_t *ctx, need_space_flag need_space) 749 { 750 const rchar *source = *source_; 751 rchar *target = *target_; 752 753 if (source < ctx->sentinel && *source == U('*')) { 754 copy_space(source_, target_, ctx, need_space); 755 if (*source_ > source) 756 return 0; 757 } 758 if (!(target < ctx->tsentinel)) 759 RABORT(-1); 760 761 *target++ = source[-1]; 762 763 /* *source_ = source; <-- unchanged */ 764 *target_ = target; 765 766 return -1; 767 } 768 769 770 /* 771 * Copy space if exists 772 */ 773 static int 774 copy_space_optional(const rchar **source_, rchar **target_, 775 rcssmin_ctx_t *ctx) 776 { 777 const rchar *source = *source_; 778 779 if (!(source < ctx->sentinel)) 780 return -1; 781 782 if (*source == U('/')) { 783 *source_ = source + 1; 784 return copy_space_comment(source_, target_, ctx, NEED_SPACE_NEVER); 785 } 786 else if (RCSSMIN_IS_SPACE(*source)) { 787 *source_ = source + 1; 788 copy_space(source_, target_, ctx, NEED_SPACE_NEVER); 789 return 0; 790 } 791 792 return -1; 793 } 794 795 796 /* 797 * Copy :first-line|letter 798 */ 799 static void 800 copy_first(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 801 { 802 const rchar *source = *source_, *next, *source_fork; 803 rchar *target = *target_, *target_fork; 804 805 *target++ = U(':'); 806 *target_ = target; 807 808 if (!IMATCH(first, &source, &target, ctx) 809 || !(source < ctx->sentinel && target < ctx->tsentinel)) 810 ABORT; 811 812 source_fork = source; 813 target_fork = target; 814 815 if (!IMATCH(line, &source, &target, ctx)) { 816 source = source_fork; 817 target = target_fork; 818 819 if (!IMATCH(letter, &source, &target, ctx) 820 || !(source < ctx->sentinel && target < ctx->tsentinel)) 821 ABORT; 822 } 823 824 next = skip_space(source, ctx); 825 if (!(next < ctx->sentinel && target < ctx->tsentinel 826 && (*next == U('{') || *next == U(',')))) 827 ABORT; 828 829 *target++ = U(' '); 830 *target_ = target; 831 *source_ = source; 832 (void)copy_space_optional(source_, target_, ctx); 833 } 834 835 836 /* 837 * Copy IE7 hack 838 */ 839 static void 840 copy_ie7hack(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 841 { 842 const rchar *source = *source_; 843 rchar *target = *target_; 844 845 *target++ = U('>'); 846 *target_ = target; 847 848 if (ctx->in_rule || ctx->at_group) 849 return; /* abort */ 850 851 if (!MATCH(ie7, &source, &target, ctx)) 852 ABORT; 853 854 ctx->in_macie5 = 0; 855 856 *target_ = target; 857 *source_ = source; 858 859 (void)copy_space_optional(source_, target_, ctx); 860 } 861 862 863 /* 864 * Copy semicolon; miss out duplicates or even this one (before '}') 865 */ 866 static void 867 copy_semicolon(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) 868 { 869 const rchar *source = *source_, *begin, *end; 870 rchar *target = *target_; 871 872 begin = source; 873 while (source < ctx->sentinel) { 874 end = skip_space(source, ctx); 875 if (!(end < ctx->sentinel)) { 876 if (!(target < ctx->tsentinel)) 877 ABORT; 878 *target++ = U(';'); 879 break; 880 } 881 switch (*end) { 882 case U(';'): 883 source = end + 1; 884 continue; 885 886 case U('}'): 887 if (ctx->in_rule) 888 break; 889 890 /* fall through */ 891 default: 892 if (!(target < ctx->tsentinel)) 893 ABORT; 894 *target++ = U(';'); 895 break; 896 } 897 898 break; 899 } 900 901 source = begin; 902 *target_ = target; 903 while (source < ctx->sentinel) { 904 if (*source == U(';')) { 905 ++source; 906 continue; 907 } 908 909 if (copy_space_optional(&source, target_, ctx) == 0) 910 continue; 911 912 break; 913 } 914 915 *source_ = source; 916 } 917 918 919 /* 920 * Main function 921 * 922 * The return value determines the result length (kept in the target buffer). 923 * However, if the target buffer is too small, the return value is greater 924 * than tlength. The difference to tlength is the number of unconsumed source 925 * characters at the time the buffer was full. In this case you should resize 926 * the target buffer to the return value and call rcssmin again. Repeat as 927 * often as needed. 928 */ 929 static Py_ssize_t 930 rcssmin(const rchar *source, rchar *target, Py_ssize_t slength, 931 Py_ssize_t tlength, int keep_bang_comments) 932 { 933 rcssmin_ctx_t ctx_, *ctx = &ctx_; 934 const rchar *tstart = target; 935 rchar c; 936 937 ctx->start = source; 938 ctx->sentinel = source + slength; 939 ctx->tsentinel = target + tlength; 940 ctx->at_group = 0; 941 ctx->in_macie5 = 0; 942 ctx->in_rule = 0; 943 ctx->keep_bang_comments = keep_bang_comments; 944 945 while (source < ctx->sentinel && target < ctx->tsentinel) { 946 c = *source++; 947 if (RCSSMIN_IS_DULL(c)) { 948 *target++ = c; 949 continue; 950 } 951 else if (RCSSMIN_IS_SPACE(c)) { 952 copy_space(&source, &target, ctx, NEED_SPACE_MAYBE); 953 continue; 954 } 955 956 switch (c) { 957 958 /* Escape */ 959 case U('\\'): 960 copy_escape(&source, &target, ctx); 961 continue; 962 963 /* String */ 964 case U('"'): case U('\''): 965 copy_string(&source, &target, ctx); 966 continue; 967 968 /* URL */ 969 case U('u'): 970 copy_url(&source, &target, ctx); 971 continue; 972 973 /* IE7hack */ 974 case U('>'): 975 copy_ie7hack(&source, &target, ctx); 976 continue; 977 978 /* @-group */ 979 case U('@'): 980 copy_at_group(&source, &target, ctx); 981 continue; 982 983 /* ; */ 984 case U(';'): 985 copy_semicolon(&source, &target, ctx); 986 continue; 987 988 /* :first-line|letter followed by [{,] */ 989 /* (apparently needed for IE6) */ 990 case U(':'): 991 copy_first(&source, &target, ctx); 992 continue; 993 994 /* { */ 995 case U('{'): 996 if (ctx->at_group) 997 --ctx->at_group; 998 else 999 ++ctx->in_rule; 1000 *target++ = c; 1001 continue; 1002 1003 /* } */ 1004 case U('}'): 1005 if (ctx->in_rule) 1006 --ctx->in_rule; 1007 *target++ = c; 1008 continue; 1009 1010 /* space starting with comment */ 1011 case U('/'): 1012 (void)copy_space_comment(&source, &target, ctx, NEED_SPACE_MAYBE); 1013 continue; 1014 1015 /* Fallback: copy character. Better safe than sorry. Should not be 1016 * reached, though */ 1017 default: 1018 *target++ = c; 1019 continue; 1020 } 1021 } 1022 1023 return 1024 (Py_ssize_t)(target - tstart) + (Py_ssize_t)(ctx->sentinel - source); 1025 } 1026 1027 1028 PyDoc_STRVAR(rcssmin_cssmin__doc__, 1029 "cssmin(style, keep_bang_comments=False)\n\ 1030 \n\ 1031 Minify CSS.\n\ 1032 \n\ 1033 :Note: This is a hand crafted C implementation built on the regex\n\ 1034 semantics.\n\ 1035 \n\ 1036 :Parameters:\n\ 1037 `style` : ``str``\n\ 1038 CSS to minify\n\ 1039 \n\ 1040 :Return: Minified style\n\ 1041 :Rtype: ``str``"); 1042 1043 static PyObject * 1044 rcssmin_cssmin(PyObject *self, PyObject *args, PyObject *kwds) 1045 { 1046 PyObject *style, *keep_bang_comments_ = NULL, *result; 1047 static char *kwlist[] = {"style", "keep_bang_comments", NULL}; 1048 Py_ssize_t rlength, slength, length; 1049 int keep_bang_comments; 1050 #ifdef EXT2 1051 int uni; 1052 #define UOBJ "O" 1053 #endif 1054 #ifdef EXT3 1055 #define UOBJ "U" 1056 #endif 1057 1058 if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist, 1059 &style, &keep_bang_comments_)) 1060 return NULL; 1061 1062 if (!keep_bang_comments_) 1063 keep_bang_comments = 0; 1064 else { 1065 keep_bang_comments = PyObject_IsTrue(keep_bang_comments_); 1066 if (keep_bang_comments == -1) 1067 return NULL; 1068 } 1069 1070 #ifdef EXT2 1071 if (PyUnicode_Check(style)) { 1072 if (!(style = PyUnicode_AsUTF8String(style))) 1073 return NULL; 1074 uni = 1; 1075 } 1076 else { 1077 if (!(style = PyObject_Str(style))) 1078 return NULL; 1079 uni = 0; 1080 } 1081 #endif 1082 1083 #ifdef EXT3 1084 Py_INCREF(style); 1085 #define PyString_GET_SIZE PyUnicode_GET_SIZE 1086 #define PyString_AS_STRING PyUnicode_AS_UNICODE 1087 #define _PyString_Resize PyUnicode_Resize 1088 #define PyString_FromStringAndSize PyUnicode_FromUnicode 1089 #endif 1090 1091 rlength = slength = PyString_GET_SIZE(style); 1092 1093 again: 1094 if (!(result = PyString_FromStringAndSize(NULL, rlength))) { 1095 Py_DECREF(style); 1096 return NULL; 1097 } 1098 Py_BEGIN_ALLOW_THREADS 1099 length = rcssmin((rchar *)PyString_AS_STRING(style), 1100 (rchar *)PyString_AS_STRING(result), 1101 slength, rlength, keep_bang_comments); 1102 Py_END_ALLOW_THREADS 1103 1104 if (length > rlength) { 1105 Py_DECREF(result); 1106 rlength = length; 1107 goto again; 1108 } 1109 1110 Py_DECREF(style); 1111 if (length < 0) { 1112 Py_DECREF(result); 1113 return NULL; 1114 } 1115 if (length != rlength && _PyString_Resize(&result, length) == -1) 1116 return NULL; 1117 1118 #ifdef EXT2 1119 if (uni) { 1120 style = PyUnicode_DecodeUTF8(PyString_AS_STRING(result), 1121 PyString_GET_SIZE(result), "strict"); 1122 Py_DECREF(result); 1123 if (!style) 1124 return NULL; 1125 result = style; 1126 } 1127 #endif 1128 return result; 1129 } 1130 1131 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */ 1132 1133 EXT_METHODS = { 1134 {"cssmin", 1135 (PyCFunction)rcssmin_cssmin, METH_VARARGS | METH_KEYWORDS, 1136 rcssmin_cssmin__doc__}, 1137 1138 {NULL} /* Sentinel */ 1139 }; 1140 1141 PyDoc_STRVAR(EXT_DOCS_VAR, 1142 "C implementation of rcssmin\n\ 1143 ===========================\n\ 1144 \n\ 1145 C implementation of rcssmin."); 1146 1147 1148 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR); 1149 1150 EXT_INIT_FUNC { 1151 PyObject *m; 1152 1153 /* Create the module and populate stuff */ 1154 if (!(m = EXT_CREATE(&EXT_DEFINE_VAR))) 1155 EXT_INIT_ERROR(NULL); 1156 1157 EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1"); 1158 EXT_ADD_STRING(m, "__docformat__", "restructuredtext en"); 1159 1160 EXT_INIT_RETURN(m); 1161 } 1162 1163 /* ------------------------- END MODULE DEFINITION ------------------------- */ 1164