Home | History | Annotate | Download | only in rcssmin
      1 /*
      2  * Copyright 2011 - 2014
      3  * Andr\xe9 Malo or his licensors, as applicable
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 #include "cext.h"
     19 EXT_INIT_FUNC;
     20 
     21 #ifdef EXT3
     22 typedef Py_UNICODE rchar;
     23 #else
     24 typedef unsigned char rchar;
     25 #endif
     26 #define U(c) ((rchar)(c))
     27 
     28 typedef struct {
     29     const rchar *start;
     30     const rchar *sentinel;
     31     const rchar *tsentinel;
     32     Py_ssize_t at_group;
     33     int in_macie5;
     34     int in_rule;
     35     int keep_bang_comments;
     36 } rcssmin_ctx_t;
     37 
     38 typedef enum {
     39     NEED_SPACE_MAYBE = 0,
     40     NEED_SPACE_NEVER
     41 } need_space_flag;
     42 
     43 
     44 #define RCSSMIN_DULL_BIT         (1 << 0)
     45 #define RCSSMIN_HEX_BIT          (1 << 1)
     46 #define RCSSMIN_ESC_BIT          (1 << 2)
     47 #define RCSSMIN_SPACE_BIT        (1 << 3)
     48 #define RCSSMIN_STRING_DULL_BIT  (1 << 4)
     49 #define RCSSMIN_NMCHAR_BIT       (1 << 5)
     50 #define RCSSMIN_URI_DULL_BIT     (1 << 6)
     51 #define RCSSMIN_PRE_CHAR_BIT     (1 << 7)
     52 #define RCSSMIN_POST_CHAR_BIT    (1 << 8)
     53 
     54 static const unsigned short rcssmin_charmask[128] = {
     55      21,  21,  21,  21,  21,  21,  21,  21,
     56      21,  28,   8,  21,   8,   8,  21,  21,
     57      21,  21,  21,  21,  21,  21,  21,  21,
     58      21,  21,  21,  21,  21,  21,  21,  21,
     59      28, 469,   4,  85,  85,  85,  85,   4,
     60     149, 277,  85, 469, 469, 117,  85,  84,
     61     115, 115, 115, 115, 115, 115, 115, 115,
     62     115, 115, 468, 340,  85, 469, 468,  85,
     63      84, 115, 115, 115, 115, 115, 115, 117,
     64     117, 117, 117, 117, 117, 117, 117, 117,
     65     117, 117, 117, 117, 117, 117, 117, 117,
     66     117, 117, 117, 213,   4, 341,  85, 117,
     67      85, 115, 115, 115, 115, 115, 115, 117,
     68     117, 117, 117, 117, 117, 117, 117, 117,
     69     117, 117, 117, 117, 117, 116, 117, 117,
     70     117, 117, 117, 468,  85, 468,  85,  21
     71 };
     72 
     73 #define RCSSMIN_IS_DULL(c) ((U(c) > 127) || \
     74     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_DULL_BIT))
     75 
     76 #define RCSSMIN_IS_HEX(c) ((U(c) <= 127) && \
     77     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_HEX_BIT))
     78 
     79 #define RCSSMIN_IS_ESC(c) ((U(c) > 127) || \
     80     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_ESC_BIT))
     81 
     82 #define RCSSMIN_IS_SPACE(c) ((U(c) <= 127) && \
     83     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_SPACE_BIT))
     84 
     85 #define RCSSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \
     86     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_STRING_DULL_BIT))
     87 
     88 #define RCSSMIN_IS_NMCHAR(c) ((U(c) > 127) || \
     89     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_NMCHAR_BIT))
     90 
     91 #define RCSSMIN_IS_URI_DULL(c) ((U(c) > 127) || \
     92     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_URI_DULL_BIT))
     93 
     94 #define RCSSMIN_IS_PRE_CHAR(c) ((U(c) <= 127) && \
     95     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_PRE_CHAR_BIT))
     96 
     97 #define RCSSMIN_IS_POST_CHAR(c) ((U(c) <= 127) && \
     98     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_POST_CHAR_BIT))
     99 
    100 
    101 static const rchar pattern_url[] = {
    102     /*U('u'),*/ U('r'), U('l'), U('(')
    103 };
    104 
    105 static const rchar pattern_ie7[] = {
    106     /*U('>'),*/ U('/'), U('*'), U('*'), U('/')
    107 };
    108 
    109 static const rchar pattern_media[] = {
    110     U('m'), U('e'), U('d'), U('i'), U('a'),
    111     U('M'), U('E'), U('D'), U('I'), U('A')
    112 };
    113 
    114 static const rchar pattern_document[] = {
    115     U('d'), U('o'), U('c'), U('u'), U('m'), U('e'), U('n'), U('t'),
    116     U('D'), U('O'), U('C'), U('U'), U('M'), U('E'), U('N'), U('T')
    117 };
    118 
    119 static const rchar pattern_supports[] = {
    120     U('s'), U('u'), U('p'), U('p'), U('o'), U('r'), U('t'), U('s'),
    121     U('S'), U('U'), U('P'), U('P'), U('O'), U('R'), U('T'), U('S')
    122 };
    123 
    124 static const rchar pattern_keyframes[] = {
    125     U('k'), U('e'), U('y'), U('f'), U('r'), U('a'), U('m'), U('e'), U('s'),
    126     U('K'), U('E'), U('Y'), U('F'), U('R'), U('A'), U('M'), U('E'), U('S')
    127 };
    128 
    129 static const rchar pattern_vendor_o[] = {
    130     U('-'), U('o'), U('-'),
    131     U('-'), U('O'), U('-')
    132 };
    133 
    134 static const rchar pattern_vendor_moz[] = {
    135     U('-'), U('m'), U('o'), U('z'), U('-'),
    136     U('-'), U('M'), U('O'), U('Z'), U('-')
    137 };
    138 
    139 static const rchar pattern_vendor_webkit[] = {
    140     U('-'), U('w'), U('e'), U('b'), U('k'), U('i'), U('t'), U('-'),
    141     U('-'), U('W'), U('E'), U('B'), U('K'), U('I'), U('T'), U('-')
    142 };
    143 
    144 static const rchar pattern_vendor_ms[] = {
    145     U('-'), U('m'), U('s'), U('-'),
    146     U('-'), U('M'), U('S'), U('-')
    147 };
    148 
    149 static const rchar pattern_first[] = {
    150     U('f'), U('i'), U('r'), U('s'), U('t'), U('-'), U('l'),
    151     U('F'), U('I'), U('R'), U('S'), U('T'), U('-'), U('L')
    152 };
    153 
    154 static const rchar pattern_line[] = {
    155     U('i'), U('n'), U('e'),
    156     U('I'), U('N'), U('E'),
    157 };
    158 
    159 static const rchar pattern_letter[] = {
    160     U('e'), U('t'), U('t'), U('e'), U('r'),
    161     U('E'), U('T'), U('T'), U('E'), U('R')
    162 };
    163 
    164 static const rchar pattern_macie5_init[] = {
    165     U('/'), U('*'), U('\\'), U('*'), U('/')
    166 };
    167 
    168 static const rchar pattern_macie5_exit[] = {
    169     U('/'), U('*'), U('*'), U('/')
    170 };
    171 
    172 /*
    173  * Match a pattern (and copy immediately to target)
    174  */
    175 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
    176 #pragma GCC diagnostic push
    177 #pragma GCC diagnostic ignored "-Wstrict-overflow"
    178 #endif
    179 static int
    180 copy_match(const rchar *pattern, const rchar *psentinel,
    181            const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    182 {
    183     const rchar *source = *source_;
    184     rchar *target = *target_;
    185     rchar c;
    186 
    187     while (pattern < psentinel
    188            && source < ctx->sentinel && target < ctx->tsentinel
    189            && ((c = *source++) == *pattern++))
    190         *target++ = c;
    191 
    192     *source_ = source;
    193     *target_ = target;
    194 
    195     return (pattern == psentinel);
    196 }
    197 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
    198 #pragma GCC diagnostic pop
    199 #endif
    200 
    201 #define MATCH(PAT, source, target, ctx) (                              \
    202     copy_match(pattern_##PAT,                                          \
    203                pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar),  \
    204                source, target, ctx)                                    \
    205 )
    206 
    207 
    208 /*
    209  * Match a pattern (and copy immediately to target) - CI version
    210  */
    211 static int
    212 copy_imatch(const rchar *pattern, const rchar *psentinel,
    213             const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    214 {
    215     const rchar *source = *source_, *pstart = pattern;
    216     rchar *target = *target_;
    217     rchar c;
    218 
    219     while (pattern < psentinel
    220            && source < ctx->sentinel && target < ctx->tsentinel
    221            && ((c = *source++) == *pattern
    222                || c == pstart[(pattern - pstart) + (psentinel - pstart)])) {
    223         ++pattern;
    224         *target++ = c;
    225     }
    226 
    227     *source_ = source;
    228     *target_ = target;
    229 
    230     return (pattern == psentinel);
    231 }
    232 
    233 #define IMATCH(PAT, source, target, ctx) (                                  \
    234     copy_imatch(pattern_##PAT,                                              \
    235                 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar) / 2,  \
    236                 source, target, ctx)                                        \
    237 )
    238 
    239 
    240 /*
    241  * Copy characters
    242  */
    243 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
    244 #pragma GCC diagnostic push
    245 #pragma GCC diagnostic ignored "-Wstrict-overflow"
    246 #endif
    247 static int
    248 copy(const rchar *source, const rchar *sentinel, rchar **target_,
    249      rcssmin_ctx_t *ctx)
    250 {
    251     rchar *target = *target_;
    252 
    253     while (source < sentinel && target < ctx->tsentinel)
    254         *target++ = *source++;
    255 
    256     *target_ = target;
    257 
    258     return (source == sentinel);
    259 }
    260 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
    261 #pragma GCC diagnostic pop
    262 #endif
    263 
    264 #define COPY_PAT(PAT, target, ctx) (                             \
    265     copy(pattern_##PAT,                                          \
    266          pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar),  \
    267          target, ctx)                                            \
    268 )
    269 
    270 
    271 /*
    272  * The ABORT macros work with known local variables!
    273  */
    274 #define ABORT_(RET) do {                                         \
    275     if (source < ctx->sentinel && !(target < ctx->tsentinel)) {  \
    276         *source_ = source;                                       \
    277         *target_ = target;                                       \
    278     }                                                            \
    279     return RET;                                                  \
    280 } while(0)
    281 
    282 
    283 #define CRAPPY_C90_COMPATIBLE_EMPTY
    284 #define ABORT ABORT_(CRAPPY_C90_COMPATIBLE_EMPTY)
    285 #define RABORT(RET) ABORT_((RET))
    286 
    287 
    288 /*
    289  * Copy escape
    290  */
    291 static void
    292 copy_escape(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    293 {
    294     const rchar *source = *source_, *hsentinel;
    295     rchar *target = *target_;
    296     rchar c;
    297 
    298     *target++ = U('\\');
    299     *target_ = target;
    300 
    301     if (source < ctx->sentinel && target < ctx->tsentinel) {
    302         c = *source++;
    303         if (RCSSMIN_IS_ESC(c)) {
    304             *target++ = c;
    305         }
    306         else if (RCSSMIN_IS_HEX(c)) {
    307             *target++ = c;
    308 
    309             /* 6 hex chars max, one we got already */
    310             if (ctx->sentinel - source > 5)
    311                 hsentinel = source + 5;
    312             else
    313                 hsentinel = ctx->sentinel;
    314 
    315             while (source < hsentinel && target < ctx->tsentinel
    316                    && (c = *source, RCSSMIN_IS_HEX(c))) {
    317                 ++source;
    318                 *target++ = c;
    319             }
    320 
    321             /* One optional space after */
    322             if (source < ctx->sentinel && target < ctx->tsentinel) {
    323                 if (source == hsentinel)
    324                     c = *source;
    325                 if (RCSSMIN_IS_SPACE(c)) {
    326                     ++source;
    327                     *target++ = U(' ');
    328                     if (c == U('\r') && source < ctx->sentinel
    329                         && *source == U('\n'))
    330                         ++source;
    331                 }
    332             }
    333         }
    334     }
    335 
    336     *target_ = target;
    337     *source_ = source;
    338 }
    339 
    340 
    341 /*
    342  * Copy string
    343  */
    344 static void
    345 copy_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    346 {
    347     const rchar *source = *source_;
    348     rchar *target = *target_;
    349     rchar c, quote = source[-1];
    350 
    351     *target++ = quote;
    352     *target_ = target;
    353 
    354     while (source < ctx->sentinel && target < ctx->tsentinel) {
    355         c = *target++ = *source++;
    356         if (RCSSMIN_IS_STRING_DULL(c))
    357             continue;
    358 
    359         switch (c) {
    360         case U('\''): case U('"'):
    361             if (c == quote) {
    362                 *target_ = target;
    363                 *source_ = source;
    364                 return;
    365             }
    366             continue;
    367 
    368         case U('\\'):
    369             if (source < ctx->sentinel && target < ctx->tsentinel) {
    370                 c = *source++;
    371                 switch (c) {
    372                 case U('\r'):
    373                     if (source < ctx->sentinel && *source == U('\n'))
    374                         ++source;
    375                     /* fall through */
    376 
    377                 case U('\n'): case U('\f'):
    378                     --target;
    379                     break;
    380 
    381                 default:
    382                     *target++ = c;
    383                 }
    384             }
    385             continue;
    386         }
    387         break; /* forbidden characters */
    388     }
    389 
    390     ABORT;
    391 }
    392 
    393 
    394 /*
    395  * Copy URI string
    396  */
    397 static int
    398 copy_uri_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    399 {
    400     const rchar *source = *source_;
    401     rchar *target = *target_;
    402     rchar c, quote = source[-1];
    403 
    404     *target++ = quote;
    405     *target_ = target;
    406 
    407     while (source < ctx->sentinel && target < ctx->tsentinel) {
    408         c = *source++;
    409         if (RCSSMIN_IS_SPACE(c))
    410             continue;
    411         *target++ = c;
    412         if (RCSSMIN_IS_STRING_DULL(c))
    413             continue;
    414 
    415         switch (c) {
    416         case U('\''): case U('"'):
    417             if (c == quote) {
    418                 *target_ = target;
    419                 *source_ = source;
    420                 return 0;
    421             }
    422             continue;
    423 
    424         case U('\\'):
    425             if (source < ctx->sentinel && target < ctx->tsentinel) {
    426                 c = *source;
    427                 switch (c) {
    428                 case U('\r'):
    429                     if ((source + 1) < ctx->sentinel && source[1] == U('\n'))
    430                         ++source;
    431                     /* fall through */
    432 
    433                 case U('\n'): case U('\f'):
    434                     --target;
    435                     ++source;
    436                     break;
    437 
    438                 default:
    439                     --target;
    440                     copy_escape(&source, &target, ctx);
    441                 }
    442             }
    443             continue;
    444         }
    445 
    446         break; /* forbidden characters */
    447     }
    448 
    449     RABORT(-1);
    450 }
    451 
    452 
    453 /*
    454  * Copy URI (unquoted)
    455  */
    456 static int
    457 copy_uri_unquoted(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    458 {
    459     const rchar *source = *source_;
    460     rchar *target = *target_;
    461     rchar c;
    462 
    463     *target++ = source[-1];
    464     *target_ = target;
    465 
    466     while (source < ctx->sentinel && target < ctx->tsentinel) {
    467         c = *source++;
    468         if (RCSSMIN_IS_SPACE(c))
    469             continue;
    470         *target++ = c;
    471         if (RCSSMIN_IS_URI_DULL(c))
    472             continue;
    473 
    474         switch (c) {
    475 
    476         case U(')'):
    477             *target_ = target - 1;
    478             *source_ = source - 1;
    479             return 0;
    480 
    481         case U('\\'):
    482             if (source < ctx->sentinel && target < ctx->tsentinel) {
    483                 c = *source;
    484                 switch (c) {
    485                 case U('\r'):
    486                     if ((source + 1) < ctx->sentinel && source[1] == U('\n'))
    487                         ++source;
    488                     /* fall through */
    489 
    490                 case U('\n'): case U('\f'):
    491                     --target;
    492                     ++source;
    493                     break;
    494 
    495                 default:
    496                     --target;
    497                     copy_escape(&source, &target, ctx);
    498                 }
    499             }
    500             continue;
    501         }
    502 
    503         break; /* forbidden characters */
    504     }
    505 
    506     RABORT(-1);
    507 }
    508 
    509 
    510 /*
    511  * Copy url
    512  */
    513 static void
    514 copy_url(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    515 {
    516     const rchar *source = *source_;
    517     rchar *target = *target_;
    518     rchar c;
    519 
    520     *target++ = U('u');
    521     *target_ = target;
    522 
    523     /* Must not be inside an identifier */
    524     if ((source != ctx->start + 1) && RCSSMIN_IS_NMCHAR(source[-2]))
    525         return;
    526 
    527     if (!MATCH(url, &source, &target, ctx)
    528         || !(source < ctx->sentinel && target < ctx->tsentinel))
    529         ABORT;
    530 
    531     while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source))
    532         ++source;
    533 
    534     if (!(source < ctx->sentinel))
    535         ABORT;
    536 
    537     c = *source++;
    538     switch (c) {
    539     case U('"'): case U('\''):
    540         if (copy_uri_string(&source, &target, ctx) == -1)
    541             ABORT;
    542 
    543         while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source))
    544             ++source;
    545         break;
    546 
    547     default:
    548         if (copy_uri_unquoted(&source, &target, ctx) == -1)
    549             ABORT;
    550     }
    551 
    552     if (!(source < ctx->sentinel && target < ctx->tsentinel))
    553         ABORT;
    554 
    555     if ((*target++ = *source++) != U(')'))
    556         ABORT;
    557 
    558     *target_ = target;
    559     *source_ = source;
    560 }
    561 
    562 
    563 /*
    564  * Copy @-group
    565  */
    566 static void
    567 copy_at_group(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    568 {
    569     const rchar *source = *source_;
    570     rchar *target = *target_;
    571 
    572     *target++ = U('@');
    573     *target_ = target;
    574 
    575 #define REMATCH(what) ( \
    576     source = *source_, \
    577     target = *target_, \
    578     IMATCH(what, &source, &target, ctx) \
    579 )
    580 #define CMATCH(what) IMATCH(what, &source, &target, ctx)
    581 
    582     if ((  !CMATCH(media)
    583         && !REMATCH(supports)
    584         && !REMATCH(document)
    585         && !REMATCH(keyframes)
    586         && !(REMATCH(vendor_webkit) && CMATCH(keyframes))
    587         && !(REMATCH(vendor_moz) && CMATCH(keyframes))
    588         && !(REMATCH(vendor_o) && CMATCH(keyframes))
    589         && !(REMATCH(vendor_ms) && CMATCH(keyframes)))
    590         || !(source < ctx->sentinel && target < ctx->tsentinel)
    591         || RCSSMIN_IS_NMCHAR(*source))
    592         ABORT;
    593 
    594 #undef CMATCH
    595 #undef REMATCH
    596 
    597     ++ctx->at_group;
    598 
    599     *target_ = target;
    600     *source_ = source;
    601 }
    602 
    603 
    604 /*
    605  * Skip space
    606  */
    607 static const rchar *
    608 skip_space(const rchar *source, rcssmin_ctx_t *ctx)
    609 {
    610     const rchar *begin = source;
    611     int res;
    612     rchar c;
    613 
    614     while (source < ctx->sentinel) {
    615         c = *source;
    616         if (RCSSMIN_IS_SPACE(c)) {
    617             ++source;
    618             continue;
    619         }
    620         else if (c == U('/')) {
    621             ++source;
    622             if (!(source < ctx->sentinel && *source == U('*'))) {
    623                 --source;
    624                 break;
    625             }
    626             ++source;
    627             res = 0;
    628             while (source < ctx->sentinel) {
    629                 c = *source++;
    630                 if (c != U('*'))
    631                     continue;
    632                 if (!(source < ctx->sentinel))
    633                     return begin;
    634                 if (*source != U('/'))
    635                     continue;
    636 
    637                 /* Comment complete */
    638                 ++source;
    639                 res = 1;
    640                 break;
    641             }
    642             if (!res)
    643                 return begin;
    644 
    645             continue;
    646         }
    647 
    648         break;
    649     }
    650 
    651     return source;
    652 }
    653 
    654 
    655 /*
    656  * Copy space
    657  */
    658 static void
    659 copy_space(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx,
    660            need_space_flag need_space)
    661 {
    662     const rchar *source = *source_, *end, *comment;
    663     rchar *target = *target_;
    664     int res;
    665     rchar c;
    666 
    667     --source;
    668     if (need_space == NEED_SPACE_MAYBE
    669         && source > ctx->start
    670         && !RCSSMIN_IS_PRE_CHAR(source[-1])
    671         && (end = skip_space(source, ctx)) < ctx->sentinel
    672         && (!RCSSMIN_IS_POST_CHAR(*end)
    673             || (*end == U(':') && !ctx->in_rule && !ctx->at_group))) {
    674 
    675         if (!(target < ctx->tsentinel))
    676             ABORT;
    677         *target++ = U(' ');
    678     }
    679 
    680     while (source < ctx->sentinel) {
    681         switch (c = *source) {
    682 
    683         /* comment */
    684         case U('/'):
    685             comment = source++;
    686             if (!((source < ctx->sentinel && *source == U('*')))) {
    687                 --source;
    688                 break;
    689             }
    690             ++source;
    691             res = 0;
    692             while (source < ctx->sentinel) {
    693                 c = *source++;
    694                 if (c != U('*'))
    695                     continue;
    696                 if (!(source < ctx->sentinel))
    697                     ABORT;
    698                 if (*source != U('/'))
    699                     continue;
    700 
    701                 /* Comment complete */
    702                 ++source;
    703                 res = 1;
    704 
    705                 if (ctx->keep_bang_comments && comment[2] == U('!')) {
    706                     ctx->in_macie5 = (source[-3] == U('\\'));
    707                     if (!copy(comment, source, &target, ctx))
    708                         ABORT;
    709                 }
    710                 else if (source[-3] == U('\\')) {
    711                     if (!ctx->in_macie5) {
    712                         if (!COPY_PAT(macie5_init, &target, ctx))
    713                             ABORT;
    714                     }
    715                     ctx->in_macie5 = 1;
    716                 }
    717                 else if (ctx->in_macie5) {
    718                     if (!COPY_PAT(macie5_exit, &target, ctx))
    719                         ABORT;
    720                     ctx->in_macie5 = 0;
    721                 }
    722                 /* else don't copy anything */
    723                 break;
    724             }
    725             if (!res)
    726                 ABORT;
    727             continue;
    728 
    729         /* space */
    730         case U(' '): case U('\t'): case U('\r'): case U('\n'): case U('\f'):
    731             ++source;
    732             continue;
    733         }
    734 
    735         break;
    736     }
    737 
    738     *source_ = source;
    739     *target_ = target;
    740 }
    741 
    742 
    743 /*
    744  * Copy space if comment
    745  */
    746 static int
    747 copy_space_comment(const rchar **source_, rchar **target_,
    748                    rcssmin_ctx_t *ctx, need_space_flag need_space)
    749 {
    750     const rchar *source = *source_;
    751     rchar *target = *target_;
    752 
    753     if (source < ctx->sentinel && *source == U('*')) {
    754         copy_space(source_, target_, ctx, need_space);
    755         if (*source_ > source)
    756             return 0;
    757     }
    758     if (!(target < ctx->tsentinel))
    759         RABORT(-1);
    760 
    761     *target++ = source[-1];
    762 
    763     /* *source_ = source; <-- unchanged */
    764     *target_ = target;
    765 
    766     return -1;
    767 }
    768 
    769 
    770 /*
    771  * Copy space if exists
    772  */
    773 static int
    774 copy_space_optional(const rchar **source_, rchar **target_,
    775                     rcssmin_ctx_t *ctx)
    776 {
    777     const rchar *source = *source_;
    778 
    779     if (!(source < ctx->sentinel))
    780         return -1;
    781 
    782     if (*source == U('/')) {
    783         *source_ = source + 1;
    784         return copy_space_comment(source_, target_, ctx, NEED_SPACE_NEVER);
    785     }
    786     else if (RCSSMIN_IS_SPACE(*source)) {
    787         *source_ = source + 1;
    788         copy_space(source_, target_, ctx, NEED_SPACE_NEVER);
    789         return 0;
    790     }
    791 
    792     return -1;
    793 }
    794 
    795 
    796 /*
    797  * Copy :first-line|letter
    798  */
    799 static void
    800 copy_first(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    801 {
    802     const rchar *source = *source_, *next, *source_fork;
    803     rchar *target = *target_, *target_fork;
    804 
    805     *target++ = U(':');
    806     *target_ = target;
    807 
    808     if (!IMATCH(first, &source, &target, ctx)
    809         || !(source < ctx->sentinel && target < ctx->tsentinel))
    810         ABORT;
    811 
    812     source_fork = source;
    813     target_fork = target;
    814 
    815     if (!IMATCH(line, &source, &target, ctx)) {
    816         source = source_fork;
    817         target = target_fork;
    818 
    819         if (!IMATCH(letter, &source, &target, ctx)
    820             || !(source < ctx->sentinel && target < ctx->tsentinel))
    821             ABORT;
    822     }
    823 
    824     next = skip_space(source, ctx);
    825     if (!(next < ctx->sentinel && target < ctx->tsentinel
    826         && (*next == U('{') || *next == U(','))))
    827         ABORT;
    828 
    829     *target++ = U(' ');
    830     *target_ = target;
    831     *source_ = source;
    832     (void)copy_space_optional(source_, target_, ctx);
    833 }
    834 
    835 
    836 /*
    837  * Copy IE7 hack
    838  */
    839 static void
    840 copy_ie7hack(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    841 {
    842     const rchar *source = *source_;
    843     rchar *target = *target_;
    844 
    845     *target++ = U('>');
    846     *target_ = target;
    847 
    848     if (ctx->in_rule || ctx->at_group)
    849         return; /* abort */
    850 
    851     if (!MATCH(ie7, &source, &target, ctx))
    852         ABORT;
    853 
    854     ctx->in_macie5 = 0;
    855 
    856     *target_ = target;
    857     *source_ = source;
    858 
    859     (void)copy_space_optional(source_, target_, ctx);
    860 }
    861 
    862 
    863 /*
    864  * Copy semicolon; miss out duplicates or even this one (before '}')
    865  */
    866 static void
    867 copy_semicolon(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
    868 {
    869     const rchar *source = *source_, *begin, *end;
    870     rchar *target = *target_;
    871 
    872     begin = source;
    873     while (source < ctx->sentinel) {
    874         end = skip_space(source, ctx);
    875         if (!(end < ctx->sentinel)) {
    876             if (!(target < ctx->tsentinel))
    877                 ABORT;
    878             *target++ = U(';');
    879             break;
    880         }
    881         switch (*end) {
    882         case U(';'):
    883             source = end + 1;
    884             continue;
    885 
    886         case U('}'):
    887             if (ctx->in_rule)
    888                 break;
    889 
    890             /* fall through */
    891         default:
    892             if (!(target < ctx->tsentinel))
    893                 ABORT;
    894             *target++ = U(';');
    895             break;
    896         }
    897 
    898         break;
    899     }
    900 
    901     source = begin;
    902     *target_ = target;
    903     while (source < ctx->sentinel) {
    904         if (*source == U(';')) {
    905             ++source;
    906             continue;
    907         }
    908 
    909         if (copy_space_optional(&source, target_, ctx) == 0)
    910             continue;
    911 
    912         break;
    913     }
    914 
    915     *source_ = source;
    916 }
    917 
    918 
    919 /*
    920  * Main function
    921  *
    922  * The return value determines the result length (kept in the target buffer).
    923  * However, if the target buffer is too small, the return value is greater
    924  * than tlength. The difference to tlength is the number of unconsumed source
    925  * characters at the time the buffer was full. In this case you should resize
    926  * the target buffer to the return value and call rcssmin again. Repeat as
    927  * often as needed.
    928  */
    929 static Py_ssize_t
    930 rcssmin(const rchar *source, rchar *target, Py_ssize_t slength,
    931         Py_ssize_t tlength, int keep_bang_comments)
    932 {
    933     rcssmin_ctx_t ctx_, *ctx = &ctx_;
    934     const rchar *tstart = target;
    935     rchar c;
    936 
    937     ctx->start = source;
    938     ctx->sentinel = source + slength;
    939     ctx->tsentinel = target + tlength;
    940     ctx->at_group = 0;
    941     ctx->in_macie5 = 0;
    942     ctx->in_rule = 0;
    943     ctx->keep_bang_comments = keep_bang_comments;
    944 
    945     while (source < ctx->sentinel && target < ctx->tsentinel) {
    946         c = *source++;
    947         if (RCSSMIN_IS_DULL(c)) {
    948             *target++ = c;
    949             continue;
    950         }
    951         else if (RCSSMIN_IS_SPACE(c)) {
    952             copy_space(&source, &target, ctx, NEED_SPACE_MAYBE);
    953             continue;
    954         }
    955 
    956         switch (c) {
    957 
    958         /* Escape */
    959         case U('\\'):
    960             copy_escape(&source, &target, ctx);
    961             continue;
    962 
    963         /* String */
    964         case U('"'): case U('\''):
    965             copy_string(&source, &target, ctx);
    966             continue;
    967 
    968         /* URL */
    969         case U('u'):
    970             copy_url(&source, &target, ctx);
    971             continue;
    972 
    973         /* IE7hack */
    974         case U('>'):
    975             copy_ie7hack(&source, &target, ctx);
    976             continue;
    977 
    978         /* @-group */
    979         case U('@'):
    980             copy_at_group(&source, &target, ctx);
    981             continue;
    982 
    983         /* ; */
    984         case U(';'):
    985             copy_semicolon(&source, &target, ctx);
    986             continue;
    987 
    988         /* :first-line|letter followed by [{,] */
    989         /* (apparently needed for IE6) */
    990         case U(':'):
    991             copy_first(&source, &target, ctx);
    992             continue;
    993 
    994         /* { */
    995         case U('{'):
    996             if (ctx->at_group)
    997                 --ctx->at_group;
    998             else
    999                 ++ctx->in_rule;
   1000             *target++ = c;
   1001             continue;
   1002 
   1003         /* } */
   1004         case U('}'):
   1005             if (ctx->in_rule)
   1006                 --ctx->in_rule;
   1007             *target++ = c;
   1008             continue;
   1009 
   1010         /* space starting with comment */
   1011         case U('/'):
   1012             (void)copy_space_comment(&source, &target, ctx, NEED_SPACE_MAYBE);
   1013             continue;
   1014 
   1015         /* Fallback: copy character. Better safe than sorry. Should not be
   1016          * reached, though */
   1017         default:
   1018             *target++ = c;
   1019             continue;
   1020         }
   1021     }
   1022 
   1023     return
   1024         (Py_ssize_t)(target - tstart) + (Py_ssize_t)(ctx->sentinel - source);
   1025 }
   1026 
   1027 
   1028 PyDoc_STRVAR(rcssmin_cssmin__doc__,
   1029 "cssmin(style, keep_bang_comments=False)\n\
   1030 \n\
   1031 Minify CSS.\n\
   1032 \n\
   1033 :Note: This is a hand crafted C implementation built on the regex\n\
   1034        semantics.\n\
   1035 \n\
   1036 :Parameters:\n\
   1037   `style` : ``str``\n\
   1038     CSS to minify\n\
   1039 \n\
   1040 :Return: Minified style\n\
   1041 :Rtype: ``str``");
   1042 
   1043 static PyObject *
   1044 rcssmin_cssmin(PyObject *self, PyObject *args, PyObject *kwds)
   1045 {
   1046     PyObject *style, *keep_bang_comments_ = NULL, *result;
   1047     static char *kwlist[] = {"style", "keep_bang_comments", NULL};
   1048     Py_ssize_t rlength, slength, length;
   1049     int keep_bang_comments;
   1050 #ifdef EXT2
   1051     int uni;
   1052 #define UOBJ "O"
   1053 #endif
   1054 #ifdef EXT3
   1055 #define UOBJ "U"
   1056 #endif
   1057 
   1058     if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist,
   1059                                      &style, &keep_bang_comments_))
   1060         return NULL;
   1061 
   1062     if (!keep_bang_comments_)
   1063         keep_bang_comments = 0;
   1064     else {
   1065         keep_bang_comments = PyObject_IsTrue(keep_bang_comments_);
   1066         if (keep_bang_comments == -1)
   1067             return NULL;
   1068     }
   1069 
   1070 #ifdef EXT2
   1071     if (PyUnicode_Check(style)) {
   1072         if (!(style = PyUnicode_AsUTF8String(style)))
   1073             return NULL;
   1074         uni = 1;
   1075     }
   1076     else {
   1077         if (!(style = PyObject_Str(style)))
   1078             return NULL;
   1079         uni = 0;
   1080     }
   1081 #endif
   1082 
   1083 #ifdef EXT3
   1084     Py_INCREF(style);
   1085 #define PyString_GET_SIZE PyUnicode_GET_SIZE
   1086 #define PyString_AS_STRING PyUnicode_AS_UNICODE
   1087 #define _PyString_Resize PyUnicode_Resize
   1088 #define PyString_FromStringAndSize PyUnicode_FromUnicode
   1089 #endif
   1090 
   1091     rlength = slength = PyString_GET_SIZE(style);
   1092 
   1093 again:
   1094     if (!(result = PyString_FromStringAndSize(NULL, rlength))) {
   1095         Py_DECREF(style);
   1096         return NULL;
   1097     }
   1098     Py_BEGIN_ALLOW_THREADS
   1099     length = rcssmin((rchar *)PyString_AS_STRING(style),
   1100                      (rchar *)PyString_AS_STRING(result),
   1101                      slength, rlength, keep_bang_comments);
   1102     Py_END_ALLOW_THREADS
   1103 
   1104     if (length > rlength) {
   1105         Py_DECREF(result);
   1106         rlength = length;
   1107         goto again;
   1108     }
   1109 
   1110     Py_DECREF(style);
   1111     if (length < 0) {
   1112         Py_DECREF(result);
   1113         return NULL;
   1114     }
   1115     if (length != rlength && _PyString_Resize(&result, length) == -1)
   1116         return NULL;
   1117 
   1118 #ifdef EXT2
   1119     if (uni) {
   1120         style = PyUnicode_DecodeUTF8(PyString_AS_STRING(result),
   1121                                      PyString_GET_SIZE(result), "strict");
   1122         Py_DECREF(result);
   1123         if (!style)
   1124             return NULL;
   1125         result = style;
   1126     }
   1127 #endif
   1128     return result;
   1129 }
   1130 
   1131 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */
   1132 
   1133 EXT_METHODS = {
   1134     {"cssmin",
   1135         (PyCFunction)rcssmin_cssmin, METH_VARARGS | METH_KEYWORDS,
   1136         rcssmin_cssmin__doc__},
   1137 
   1138     {NULL}  /* Sentinel */
   1139 };
   1140 
   1141 PyDoc_STRVAR(EXT_DOCS_VAR,
   1142 "C implementation of rcssmin\n\
   1143 ===========================\n\
   1144 \n\
   1145 C implementation of rcssmin.");
   1146 
   1147 
   1148 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR);
   1149 
   1150 EXT_INIT_FUNC {
   1151     PyObject *m;
   1152 
   1153     /* Create the module and populate stuff */
   1154     if (!(m = EXT_CREATE(&EXT_DEFINE_VAR)))
   1155         EXT_INIT_ERROR(NULL);
   1156 
   1157     EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1");
   1158     EXT_ADD_STRING(m, "__docformat__", "restructuredtext en");
   1159 
   1160     EXT_INIT_RETURN(m);
   1161 }
   1162 
   1163 /* ------------------------- END MODULE DEFINITION ------------------------- */
   1164