Home | History | Annotate | Download | only in Modules
      1 /*
      2 ** Routines to represent binary data in ASCII and vice-versa
      3 **
      4 ** This module currently supports the following encodings:
      5 ** uuencode:
      6 **      each line encodes 45 bytes (except possibly the last)
      7 **      First char encodes (binary) length, rest data
      8 **      each char encodes 6 bits, as follows:
      9 **      binary: 01234567 abcdefgh ijklmnop
     10 **      ascii:  012345 67abcd efghij klmnop
     11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
     12 **      short binary data is zero-extended (so the bits are always in the
     13 **      right place), this does *not* reflect in the length.
     14 ** base64:
     15 **      Line breaks are insignificant, but lines are at most 76 chars
     16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
     17 **      is done via a table.
     18 **      Short binary data is filled (in ASCII) with '='.
     19 ** hqx:
     20 **      File starts with introductory text, real data starts and ends
     21 **      with colons.
     22 **      Data consists of three similar parts: info, datafork, resourcefork.
     23 **      Each part is protected (at the end) with a 16-bit crc
     24 **      The binary data is run-length encoded, and then ascii-fied:
     25 **      binary: 01234567 abcdefgh ijklmnop
     26 **      ascii:  012345 67abcd efghij klmnop
     27 **      ASCII encoding is table-driven, see the code.
     28 **      Short binary data results in the runt ascii-byte being output with
     29 **      the bits in the right place.
     30 **
     31 ** While I was reading dozens of programs that encode or decode the formats
     32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
     33 **
     34 **      Programs that encode binary data in ASCII are written in
     35 **      such a style that they are as unreadable as possible. Devices used
     36 **      include unnecessary global variables, burying important tables
     37 **      in unrelated sourcefiles, putting functions in include files,
     38 **      using seemingly-descriptive variable names for different purposes,
     39 **      calls to empty subroutines and a host of others.
     40 **
     41 ** I have attempted to break with this tradition, but I guess that that
     42 ** does make the performance sub-optimal. Oh well, too bad...
     43 **
     44 ** Jack Jansen, CWI, July 1995.
     45 **
     46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
     47 ** quoted-printable encoding specifies that non printable characters (anything
     48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
     49 ** of the character.  It also specifies some other behavior to enable 8bit data
     50 ** in a mail message with little difficulty (maximum line sizes, protecting
     51 ** some cases of whitespace, etc).
     52 **
     53 ** Brandon Long, September 2001.
     54 */
     55 
     56 #define PY_SSIZE_T_CLEAN
     57 
     58 #include "Python.h"
     59 #ifdef USE_ZLIB_CRC32
     60 #include "zlib.h"
     61 #endif
     62 
     63 static PyObject *Error;
     64 static PyObject *Incomplete;
     65 
     66 /*
     67 ** hqx lookup table, ascii->binary.
     68 */
     69 
     70 #define RUNCHAR 0x90
     71 
     72 #define DONE 0x7F
     73 #define SKIP 0x7E
     74 #define FAIL 0x7D
     75 
     76 static unsigned char table_a2b_hqx[256] = {
     77 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
     78 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
     79 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
     80 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
     81 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
     82 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
     83 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
     84 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
     85 /*              !     "     #     $     %     &     '   */
     86 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
     87 /*        (     )     *     +     ,     -     .     /   */
     88 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
     89 /*        0     1     2     3     4     5     6     7   */
     90 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
     91 /*        8     9     :     ;     <     =     >     ?   */
     92 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
     93 /*        @     A     B     C     D     E     F     G   */
     94 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
     95 /*        H     I     J     K     L     M     N     O   */
     96 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
     97 /*        P     Q     R     S     T     U     V     W   */
     98 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
     99 /*        X     Y     Z     [     \     ]     ^     _   */
    100 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
    101 /*        `     a     b     c     d     e     f     g   */
    102 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
    103 /*        h     i     j     k     l     m     n     o   */
    104 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
    105 /*        p     q     r     s     t     u     v     w   */
    106 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
    107 /*        x     y     z     {     |     }     ~    ^?   */
    108 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    109 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    110     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    111     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    112     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    113     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    114     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    115     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    116     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    117     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    118     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    119     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    120     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    121     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    122     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    123     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    124     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    125 };
    126 
    127 static unsigned char table_b2a_hqx[] =
    128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
    129 
    130 static char table_a2b_base64[] = {
    131     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    132     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    133     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
    134     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
    135     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
    136     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
    137     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
    138     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
    139 };
    140 
    141 #define BASE64_PAD '='
    142 
    143 /* Max binary chunk size; limited only by available memory */
    144 #define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
    145 
    146 static unsigned char table_b2a_base64[] =
    147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    148 
    149 
    150 
    151 static unsigned short crctab_hqx[256] = {
    152     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
    153     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
    154     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
    155     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
    156     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
    157     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
    158     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
    159     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
    160     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
    161     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
    162     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
    163     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
    164     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
    165     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
    166     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
    167     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
    168     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
    169     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
    170     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
    171     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
    172     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
    173     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
    174     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
    175     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
    176     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
    177     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
    178     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
    179     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
    180     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
    181     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
    182     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
    183     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
    184 };
    185 
    186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
    187 
    188 static PyObject *
    189 binascii_a2b_uu(PyObject *self, PyObject *args)
    190 {
    191     Py_buffer pascii;
    192     unsigned char *ascii_data, *bin_data;
    193     int leftbits = 0;
    194     unsigned char this_ch;
    195     unsigned int leftchar = 0;
    196     PyObject *rv;
    197     Py_ssize_t ascii_len, bin_len;
    198 
    199     if ( !PyArg_ParseTuple(args, "s*:a2b_uu", &pascii) )
    200         return NULL;
    201     ascii_data = pascii.buf;
    202     ascii_len = pascii.len;
    203 
    204     assert(ascii_len >= 0);
    205 
    206     /* First byte: binary data length (in bytes) */
    207     bin_len = (*ascii_data++ - ' ') & 077;
    208     ascii_len--;
    209 
    210     /* Allocate the buffer */
    211     if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
    212         PyBuffer_Release(&pascii);
    213         return NULL;
    214     }
    215     bin_data = (unsigned char *)PyString_AS_STRING(rv);
    216 
    217     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
    218         /* XXX is it really best to add NULs if there's no more data */
    219         this_ch = (ascii_len > 0) ? *ascii_data : 0;
    220         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
    221             /*
    222             ** Whitespace. Assume some spaces got eaten at
    223             ** end-of-line. (We check this later)
    224             */
    225             this_ch = 0;
    226         } else {
    227             /* Check the character for legality
    228             ** The 64 in stead of the expected 63 is because
    229             ** there are a few uuencodes out there that use
    230             ** '`' as zero instead of space.
    231             */
    232             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
    233                 PyErr_SetString(Error, "Illegal char");
    234                 PyBuffer_Release(&pascii);
    235                 Py_DECREF(rv);
    236                 return NULL;
    237             }
    238             this_ch = (this_ch - ' ') & 077;
    239         }
    240         /*
    241         ** Shift it in on the low end, and see if there's
    242         ** a byte ready for output.
    243         */
    244         leftchar = (leftchar << 6) | (this_ch);
    245         leftbits += 6;
    246         if ( leftbits >= 8 ) {
    247             leftbits -= 8;
    248             *bin_data++ = (leftchar >> leftbits) & 0xff;
    249             leftchar &= ((1 << leftbits) - 1);
    250             bin_len--;
    251         }
    252     }
    253     /*
    254     ** Finally, check that if there's anything left on the line
    255     ** that it's whitespace only.
    256     */
    257     while( ascii_len-- > 0 ) {
    258         this_ch = *ascii_data++;
    259         /* Extra '`' may be written as padding in some cases */
    260         if ( this_ch != ' ' && this_ch != ' '+64 &&
    261              this_ch != '\n' && this_ch != '\r' ) {
    262             PyErr_SetString(Error, "Trailing garbage");
    263             PyBuffer_Release(&pascii);
    264             Py_DECREF(rv);
    265             return NULL;
    266         }
    267     }
    268     PyBuffer_Release(&pascii);
    269     return rv;
    270 }
    271 
    272 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
    273 
    274 static PyObject *
    275 binascii_b2a_uu(PyObject *self, PyObject *args)
    276 {
    277     Py_buffer pbin;
    278     unsigned char *ascii_data, *bin_data;
    279     int leftbits = 0;
    280     unsigned char this_ch;
    281     unsigned int leftchar = 0;
    282     PyObject *rv;
    283     Py_ssize_t bin_len;
    284 
    285     if ( !PyArg_ParseTuple(args, "s*:b2a_uu", &pbin) )
    286         return NULL;
    287     bin_data = pbin.buf;
    288     bin_len = pbin.len;
    289     if ( bin_len > 45 ) {
    290         /* The 45 is a limit that appears in all uuencode's */
    291         PyErr_SetString(Error, "At most 45 bytes at once");
    292         PyBuffer_Release(&pbin);
    293         return NULL;
    294     }
    295 
    296     /* We're lazy and allocate to much (fixed up later) */
    297     if ( (rv=PyString_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
    298         PyBuffer_Release(&pbin);
    299         return NULL;
    300     }
    301     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
    302 
    303     /* Store the length */
    304     *ascii_data++ = ' ' + (bin_len & 077);
    305 
    306     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
    307         /* Shift the data (or padding) into our buffer */
    308         if ( bin_len > 0 )              /* Data */
    309             leftchar = (leftchar << 8) | *bin_data;
    310         else                            /* Padding */
    311             leftchar <<= 8;
    312         leftbits += 8;
    313 
    314         /* See if there are 6-bit groups ready */
    315         while ( leftbits >= 6 ) {
    316             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
    317             leftbits -= 6;
    318             *ascii_data++ = this_ch + ' ';
    319         }
    320     }
    321     *ascii_data++ = '\n';       /* Append a courtesy newline */
    322 
    323     if (_PyString_Resize(&rv,
    324                        (ascii_data -
    325                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
    326         Py_DECREF(rv);
    327         rv = NULL;
    328     }
    329     PyBuffer_Release(&pbin);
    330     return rv;
    331 }
    332 
    333 
    334 static int
    335 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
    336 {
    337     /* Finds & returns the (num+1)th
    338     ** valid character for base64, or -1 if none.
    339     */
    340 
    341     int ret = -1;
    342     unsigned char c, b64val;
    343 
    344     while ((slen > 0) && (ret == -1)) {
    345         c = *s;
    346         b64val = table_a2b_base64[c & 0x7f];
    347         if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
    348             if (num == 0)
    349                 ret = *s;
    350             num--;
    351         }
    352 
    353         s++;
    354         slen--;
    355     }
    356     return ret;
    357 }
    358 
    359 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
    360 
    361 static PyObject *
    362 binascii_a2b_base64(PyObject *self, PyObject *args)
    363 {
    364     Py_buffer pascii;
    365     unsigned char *ascii_data, *bin_data;
    366     int leftbits = 0;
    367     unsigned char this_ch;
    368     unsigned int leftchar = 0;
    369     PyObject *rv;
    370     Py_ssize_t ascii_len, bin_len;
    371     int quad_pos = 0;
    372 
    373     if ( !PyArg_ParseTuple(args, "s*:a2b_base64", &pascii) )
    374         return NULL;
    375     ascii_data = pascii.buf;
    376     ascii_len = pascii.len;
    377 
    378     assert(ascii_len >= 0);
    379 
    380     if (ascii_len > PY_SSIZE_T_MAX - 3) {
    381         PyBuffer_Release(&pascii);
    382         return PyErr_NoMemory();
    383     }
    384 
    385     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
    386 
    387     /* Allocate the buffer */
    388     if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
    389         PyBuffer_Release(&pascii);
    390         return NULL;
    391     }
    392     bin_data = (unsigned char *)PyString_AS_STRING(rv);
    393     bin_len = 0;
    394 
    395     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
    396         this_ch = *ascii_data;
    397 
    398         if (this_ch > 0x7f ||
    399             this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
    400             continue;
    401 
    402         /* Check for pad sequences and ignore
    403         ** the invalid ones.
    404         */
    405         if (this_ch == BASE64_PAD) {
    406             if ( (quad_pos < 2) ||
    407                  ((quad_pos == 2) &&
    408                   (binascii_find_valid(ascii_data, ascii_len, 1)
    409                    != BASE64_PAD)) )
    410             {
    411                 continue;
    412             }
    413             else {
    414                 /* A pad sequence means no more input.
    415                 ** We've already interpreted the data
    416                 ** from the quad at this point.
    417                 */
    418                 leftbits = 0;
    419                 break;
    420             }
    421         }
    422 
    423         this_ch = table_a2b_base64[*ascii_data];
    424         if ( this_ch == (unsigned char) -1 )
    425             continue;
    426 
    427         /*
    428         ** Shift it in on the low end, and see if there's
    429         ** a byte ready for output.
    430         */
    431         quad_pos = (quad_pos + 1) & 0x03;
    432         leftchar = (leftchar << 6) | (this_ch);
    433         leftbits += 6;
    434 
    435         if ( leftbits >= 8 ) {
    436             leftbits -= 8;
    437             *bin_data++ = (leftchar >> leftbits) & 0xff;
    438             bin_len++;
    439             leftchar &= ((1 << leftbits) - 1);
    440         }
    441     }
    442 
    443     if (leftbits != 0) {
    444         PyBuffer_Release(&pascii);
    445         PyErr_SetString(Error, "Incorrect padding");
    446         Py_DECREF(rv);
    447         return NULL;
    448     }
    449 
    450     /* And set string size correctly. If the result string is empty
    451     ** (because the input was all invalid) return the shared empty
    452     ** string instead; _PyString_Resize() won't do this for us.
    453     */
    454     if (bin_len > 0) {
    455         if (_PyString_Resize(&rv, bin_len) < 0) {
    456             Py_DECREF(rv);
    457             rv = NULL;
    458         }
    459     }
    460     else {
    461         Py_DECREF(rv);
    462         rv = PyString_FromStringAndSize("", 0);
    463     }
    464     PyBuffer_Release(&pascii);
    465     return rv;
    466 }
    467 
    468 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
    469 
    470 static PyObject *
    471 binascii_b2a_base64(PyObject *self, PyObject *args)
    472 {
    473     Py_buffer pbuf;
    474     unsigned char *ascii_data, *bin_data;
    475     int leftbits = 0;
    476     unsigned char this_ch;
    477     unsigned int leftchar = 0;
    478     PyObject *rv;
    479     Py_ssize_t bin_len;
    480 
    481     if ( !PyArg_ParseTuple(args, "s*:b2a_base64", &pbuf) )
    482         return NULL;
    483     bin_data = pbuf.buf;
    484     bin_len = pbuf.len;
    485 
    486     assert(bin_len >= 0);
    487 
    488     if ( bin_len > BASE64_MAXBIN ) {
    489         PyErr_SetString(Error, "Too much data for base64 line");
    490         PyBuffer_Release(&pbuf);
    491         return NULL;
    492     }
    493 
    494     /* We're lazy and allocate too much (fixed up later).
    495        "+3" leaves room for up to two pad characters and a trailing
    496        newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
    497     if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
    498         PyBuffer_Release(&pbuf);
    499         return NULL;
    500     }
    501     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
    502 
    503     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
    504         /* Shift the data into our buffer */
    505         leftchar = (leftchar << 8) | *bin_data;
    506         leftbits += 8;
    507 
    508         /* See if there are 6-bit groups ready */
    509         while ( leftbits >= 6 ) {
    510             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
    511             leftbits -= 6;
    512             *ascii_data++ = table_b2a_base64[this_ch];
    513         }
    514     }
    515     if ( leftbits == 2 ) {
    516         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
    517         *ascii_data++ = BASE64_PAD;
    518         *ascii_data++ = BASE64_PAD;
    519     } else if ( leftbits == 4 ) {
    520         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
    521         *ascii_data++ = BASE64_PAD;
    522     }
    523     *ascii_data++ = '\n';       /* Append a courtesy newline */
    524 
    525     if (_PyString_Resize(&rv,
    526                        (ascii_data -
    527                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
    528         Py_DECREF(rv);
    529         rv = NULL;
    530     }
    531     PyBuffer_Release(&pbuf);
    532     return rv;
    533 }
    534 
    535 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
    536 
    537 static PyObject *
    538 binascii_a2b_hqx(PyObject *self, PyObject *args)
    539 {
    540     Py_buffer pascii;
    541     unsigned char *ascii_data, *bin_data;
    542     int leftbits = 0;
    543     unsigned char this_ch;
    544     unsigned int leftchar = 0;
    545     PyObject *rv;
    546     Py_ssize_t len;
    547     int done = 0;
    548 
    549     if ( !PyArg_ParseTuple(args, "s*:a2b_hqx", &pascii) )
    550         return NULL;
    551     ascii_data = pascii.buf;
    552     len = pascii.len;
    553 
    554     assert(len >= 0);
    555 
    556     if (len > PY_SSIZE_T_MAX - 2) {
    557         PyBuffer_Release(&pascii);
    558         return PyErr_NoMemory();
    559     }
    560 
    561     /* Allocate a string that is too big (fixed later)
    562        Add two to the initial length to prevent interning which
    563        would preclude subsequent resizing.  */
    564     if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL ) {
    565         PyBuffer_Release(&pascii);
    566         return NULL;
    567     }
    568     bin_data = (unsigned char *)PyString_AS_STRING(rv);
    569 
    570     for( ; len > 0 ; len--, ascii_data++ ) {
    571         /* Get the byte and look it up */
    572         this_ch = table_a2b_hqx[*ascii_data];
    573         if ( this_ch == SKIP )
    574             continue;
    575         if ( this_ch == FAIL ) {
    576             PyErr_SetString(Error, "Illegal char");
    577             PyBuffer_Release(&pascii);
    578             Py_DECREF(rv);
    579             return NULL;
    580         }
    581         if ( this_ch == DONE ) {
    582             /* The terminating colon */
    583             done = 1;
    584             break;
    585         }
    586 
    587         /* Shift it into the buffer and see if any bytes are ready */
    588         leftchar = (leftchar << 6) | (this_ch);
    589         leftbits += 6;
    590         if ( leftbits >= 8 ) {
    591             leftbits -= 8;
    592             *bin_data++ = (leftchar >> leftbits) & 0xff;
    593             leftchar &= ((1 << leftbits) - 1);
    594         }
    595     }
    596 
    597     if ( leftbits && !done ) {
    598         PyErr_SetString(Incomplete,
    599                         "String has incomplete number of bytes");
    600         PyBuffer_Release(&pascii);
    601         Py_DECREF(rv);
    602         return NULL;
    603     }
    604     if (_PyString_Resize(&rv,
    605                        (bin_data -
    606                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
    607         Py_DECREF(rv);
    608         rv = NULL;
    609     }
    610     if (rv) {
    611         PyObject *rrv = Py_BuildValue("Oi", rv, done);
    612         PyBuffer_Release(&pascii);
    613         Py_DECREF(rv);
    614         return rrv;
    615     }
    616 
    617     PyBuffer_Release(&pascii);
    618     return NULL;
    619 }
    620 
    621 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
    622 
    623 static PyObject *
    624 binascii_rlecode_hqx(PyObject *self, PyObject *args)
    625 {
    626     Py_buffer pbuf;
    627     unsigned char *in_data, *out_data;
    628     PyObject *rv;
    629     unsigned char ch;
    630     Py_ssize_t in, inend, len;
    631 
    632     if ( !PyArg_ParseTuple(args, "s*:rlecode_hqx", &pbuf) )
    633         return NULL;
    634     in_data = pbuf.buf;
    635     len = pbuf.len;
    636 
    637     assert(len >= 0);
    638 
    639     if (len > PY_SSIZE_T_MAX / 2 - 2) {
    640         PyBuffer_Release(&pbuf);
    641         return PyErr_NoMemory();
    642     }
    643 
    644     /* Worst case: output is twice as big as input (fixed later) */
    645     if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
    646         PyBuffer_Release(&pbuf);
    647         return NULL;
    648     }
    649     out_data = (unsigned char *)PyString_AS_STRING(rv);
    650 
    651     for( in=0; in<len; in++) {
    652         ch = in_data[in];
    653         if ( ch == RUNCHAR ) {
    654             /* RUNCHAR. Escape it. */
    655             *out_data++ = RUNCHAR;
    656             *out_data++ = 0;
    657         } else {
    658             /* Check how many following are the same */
    659             for(inend=in+1;
    660                 inend<len && in_data[inend] == ch &&
    661                     inend < in+255;
    662                 inend++) ;
    663             if ( inend - in > 3 ) {
    664                 /* More than 3 in a row. Output RLE. */
    665                 *out_data++ = ch;
    666                 *out_data++ = RUNCHAR;
    667                 *out_data++ = inend-in;
    668                 in = inend-1;
    669             } else {
    670                 /* Less than 3. Output the byte itself */
    671                 *out_data++ = ch;
    672             }
    673         }
    674     }
    675     if (_PyString_Resize(&rv,
    676                        (out_data -
    677                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
    678         Py_DECREF(rv);
    679         rv = NULL;
    680     }
    681     PyBuffer_Release(&pbuf);
    682     return rv;
    683 }
    684 
    685 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
    686 
    687 static PyObject *
    688 binascii_b2a_hqx(PyObject *self, PyObject *args)
    689 {
    690     Py_buffer pbin;
    691     unsigned char *ascii_data, *bin_data;
    692     int leftbits = 0;
    693     unsigned char this_ch;
    694     unsigned int leftchar = 0;
    695     PyObject *rv;
    696     Py_ssize_t len;
    697 
    698     if ( !PyArg_ParseTuple(args, "s*:b2a_hqx", &pbin) )
    699         return NULL;
    700     bin_data = pbin.buf;
    701     len = pbin.len;
    702 
    703     assert(len >= 0);
    704 
    705     if (len > PY_SSIZE_T_MAX / 2 - 2) {
    706         PyBuffer_Release(&pbin);
    707         return PyErr_NoMemory();
    708     }
    709 
    710     /* Allocate a buffer that is at least large enough */
    711     if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
    712         PyBuffer_Release(&pbin);
    713         return NULL;
    714     }
    715     ascii_data = (unsigned char *)PyString_AS_STRING(rv);
    716 
    717     for( ; len > 0 ; len--, bin_data++ ) {
    718         /* Shift into our buffer, and output any 6bits ready */
    719         leftchar = (leftchar << 8) | *bin_data;
    720         leftbits += 8;
    721         while ( leftbits >= 6 ) {
    722             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
    723             leftbits -= 6;
    724             *ascii_data++ = table_b2a_hqx[this_ch];
    725         }
    726     }
    727     /* Output a possible runt byte */
    728     if ( leftbits ) {
    729         leftchar <<= (6-leftbits);
    730         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
    731     }
    732     if (_PyString_Resize(&rv,
    733                        (ascii_data -
    734                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
    735         Py_DECREF(rv);
    736         rv = NULL;
    737     }
    738     PyBuffer_Release(&pbin);
    739     return rv;
    740 }
    741 
    742 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
    743 
    744 static PyObject *
    745 binascii_rledecode_hqx(PyObject *self, PyObject *args)
    746 {
    747     Py_buffer pin;
    748     unsigned char *in_data, *out_data;
    749     unsigned char in_byte, in_repeat;
    750     PyObject *rv;
    751     Py_ssize_t in_len, out_len, out_len_left;
    752 
    753     if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
    754         return NULL;
    755     in_data = pin.buf;
    756     in_len = pin.len;
    757 
    758     assert(in_len >= 0);
    759 
    760     /* Empty string is a special case */
    761     if ( in_len == 0 ) {
    762         PyBuffer_Release(&pin);
    763         return PyString_FromStringAndSize("", 0);
    764     }
    765     else if (in_len > PY_SSIZE_T_MAX / 2) {
    766         PyBuffer_Release(&pin);
    767         return PyErr_NoMemory();
    768     }
    769 
    770     /* Allocate a buffer of reasonable size. Resized when needed */
    771     out_len = in_len*2;
    772     if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) {
    773         PyBuffer_Release(&pin);
    774         return NULL;
    775     }
    776     out_len_left = out_len;
    777     out_data = (unsigned char *)PyString_AS_STRING(rv);
    778 
    779     /*
    780     ** We need two macros here to get/put bytes and handle
    781     ** end-of-buffer for input and output strings.
    782     */
    783 #define INBYTE(b) \
    784     do { \
    785              if ( --in_len < 0 ) { \
    786                        PyErr_SetString(Incomplete, ""); \
    787                        Py_DECREF(rv); \
    788                        PyBuffer_Release(&pin); \
    789                        return NULL; \
    790              } \
    791              b = *in_data++; \
    792     } while(0)
    793 
    794 #define OUTBYTE(b) \
    795     do { \
    796              if ( --out_len_left < 0 ) { \
    797                       if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
    798                       if (_PyString_Resize(&rv, 2*out_len) < 0) \
    799                         { Py_DECREF(rv); PyBuffer_Release(&pin); return NULL; } \
    800                       out_data = (unsigned char *)PyString_AS_STRING(rv) \
    801                                                              + out_len; \
    802                       out_len_left = out_len-1; \
    803                       out_len = out_len * 2; \
    804              } \
    805              *out_data++ = b; \
    806     } while(0)
    807 
    808         /*
    809         ** Handle first byte separately (since we have to get angry
    810         ** in case of an orphaned RLE code).
    811         */
    812         INBYTE(in_byte);
    813 
    814     if (in_byte == RUNCHAR) {
    815         INBYTE(in_repeat);
    816         if (in_repeat != 0) {
    817             /* Note Error, not Incomplete (which is at the end
    818             ** of the string only). This is a programmer error.
    819             */
    820             PyErr_SetString(Error, "Orphaned RLE code at start");
    821             PyBuffer_Release(&pin);
    822             Py_DECREF(rv);
    823             return NULL;
    824         }
    825         OUTBYTE(RUNCHAR);
    826     } else {
    827         OUTBYTE(in_byte);
    828     }
    829 
    830     while( in_len > 0 ) {
    831         INBYTE(in_byte);
    832 
    833         if (in_byte == RUNCHAR) {
    834             INBYTE(in_repeat);
    835             if ( in_repeat == 0 ) {
    836                 /* Just an escaped RUNCHAR value */
    837                 OUTBYTE(RUNCHAR);
    838             } else {
    839                 /* Pick up value and output a sequence of it */
    840                 in_byte = out_data[-1];
    841                 while ( --in_repeat > 0 )
    842                     OUTBYTE(in_byte);
    843             }
    844         } else {
    845             /* Normal byte */
    846             OUTBYTE(in_byte);
    847         }
    848     }
    849     if (_PyString_Resize(&rv,
    850                        (out_data -
    851                         (unsigned char *)PyString_AS_STRING(rv))) < 0) {
    852         Py_DECREF(rv);
    853         rv = NULL;
    854     }
    855     PyBuffer_Release(&pin);
    856     return rv;
    857 }
    858 
    859 PyDoc_STRVAR(doc_crc_hqx,
    860 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
    861 
    862 static PyObject *
    863 binascii_crc_hqx(PyObject *self, PyObject *args)
    864 {
    865     Py_buffer pin;
    866     unsigned char *bin_data;
    867     unsigned int crc;
    868     Py_ssize_t len;
    869 
    870     if ( !PyArg_ParseTuple(args, "s*i:crc_hqx", &pin, &crc) )
    871         return NULL;
    872     bin_data = pin.buf;
    873     len = pin.len;
    874 
    875     while(len-- > 0) {
    876         crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
    877     }
    878 
    879     PyBuffer_Release(&pin);
    880     return Py_BuildValue("i", crc);
    881 }
    882 
    883 PyDoc_STRVAR(doc_crc32,
    884 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
    885 
    886 #ifdef USE_ZLIB_CRC32
    887 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
    888 static PyObject *
    889 binascii_crc32(PyObject *self, PyObject *args)
    890 {
    891     unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
    892     Py_buffer pbuf;
    893     Byte *buf;
    894     Py_ssize_t len;
    895     int signed_val;
    896 
    897     if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val))
    898     return NULL;
    899     /* In Python 2.x we return a signed integer regardless of native platform
    900      * long size (the 32bit unsigned long is treated as 32-bit signed and sign
    901      * extended into a 64-bit long inside the integer object).  3.0 does the
    902      * right thing and returns unsigned. http://bugs.python.org/issue1202 */
    903     buf = (Byte*)pbuf.buf;
    904     len = pbuf.len;
    905     signed_val = crc32(crc32val, buf, len);
    906     PyBuffer_Release(&pbuf);
    907     return PyInt_FromLong(signed_val);
    908 }
    909 #else  /* USE_ZLIB_CRC32 */
    910 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
    911     Also known as: ISO 3307
    912 **********************************************************************|
    913 *                                                                    *|
    914 * Demonstration program to compute the 32-bit CRC used as the frame  *|
    915 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
    916 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
    917 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
    918 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
    919 * this polynomial is or will be included in CCITT V.41, which        *|
    920 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
    921 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
    922 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
    923 *                                                                    *|
    924 **********************************************************************|
    925 
    926  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
    927  code or tables extracted from it, as desired without restriction.
    928 
    929  First, the polynomial itself and its table of feedback terms.  The
    930  polynomial is
    931  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
    932  Note that we take it "backwards" and put the highest-order term in
    933  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
    934  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
    935  the MSB being 1.
    936 
    937  Note that the usual hardware shift register implementation, which
    938  is what we're using (we're merely optimizing it by doing eight-bit
    939  chunks at a time) shifts bits into the lowest-order term.  In our
    940  implementation, that means shifting towards the right.  Why do we
    941  do it this way?  Because the calculated CRC must be transmitted in
    942  order from highest-order term to lowest-order term.  UARTs transmit
    943  characters in order from LSB to MSB.  By storing the CRC this way,
    944  we hand it to the UART in the order low-byte to high-byte; the UART
    945  sends each low-bit to hight-bit; and the result is transmission bit
    946  by bit from highest- to lowest-order term without requiring any bit
    947  shuffling on our part.  Reception works similarly.
    948 
    949  The feedback terms table consists of 256, 32-bit entries.  Notes:
    950 
    951   1. The table can be generated at runtime if desired; code to do so
    952      is shown later.  It might not be obvious, but the feedback
    953      terms simply represent the results of eight shift/xor opera-
    954      tions for all combinations of data and CRC register values.
    955 
    956   2. The CRC accumulation logic is the same for all CRC polynomials,
    957      be they sixteen or thirty-two bits wide.  You simply choose the
    958      appropriate table.  Alternatively, because the table can be
    959      generated at runtime, you can start by generating the table for
    960      the polynomial in question and use exactly the same "updcrc",
    961      if your application needn't simultaneously handle two CRC
    962      polynomials.  (Note, however, that XMODEM is strange.)
    963 
    964   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
    965      of course, 32-bit entries work OK if the high 16 bits are zero.
    966 
    967   4. The values must be right-shifted by eight bits by the "updcrc"
    968      logic; the shift must be unsigned (bring in zeroes).  On some
    969      hardware you could probably optimize the shift in assembler by
    970      using byte-swap instructions.
    971 ********************************************************************/
    972 
    973 static unsigned int crc_32_tab[256] = {
    974 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
    975 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
    976 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
    977 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
    978 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
    979 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
    980 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
    981 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
    982 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
    983 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
    984 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
    985 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
    986 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
    987 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
    988 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
    989 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
    990 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
    991 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
    992 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
    993 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
    994 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
    995 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
    996 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
    997 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
    998 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
    999 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
   1000 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
   1001 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
   1002 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
   1003 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
   1004 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
   1005 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
   1006 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
   1007 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
   1008 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
   1009 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
   1010 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
   1011 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
   1012 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
   1013 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
   1014 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
   1015 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
   1016 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
   1017 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
   1018 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
   1019 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
   1020 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
   1021 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
   1022 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
   1023 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
   1024 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
   1025 0x2d02ef8dU
   1026 };
   1027 
   1028 static PyObject *
   1029 binascii_crc32(PyObject *self, PyObject *args)
   1030 { /* By Jim Ahlstrom; All rights transferred to CNRI */
   1031     Py_buffer pbin;
   1032     unsigned char *bin_data;
   1033     unsigned int crc = 0U;      /* initial value of CRC */
   1034     Py_ssize_t len;
   1035     int result;
   1036 
   1037     if ( !PyArg_ParseTuple(args, "s*|I:crc32", &pbin, &crc) )
   1038         return NULL;
   1039     bin_data = pbin.buf;
   1040     len = pbin.len;
   1041 
   1042     crc = ~ crc;
   1043     while (len-- > 0)
   1044         crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
   1045         /* Note:  (crc >> 8) MUST zero fill on left */
   1046 
   1047     result = (int)(crc ^ 0xFFFFFFFFU);
   1048     PyBuffer_Release(&pbin);
   1049     return PyInt_FromLong(result);
   1050 }
   1051 #endif  /* USE_ZLIB_CRC32 */
   1052 
   1053 
   1054 static PyObject *
   1055 binascii_hexlify(PyObject *self, PyObject *args)
   1056 {
   1057     Py_buffer parg;
   1058     char* argbuf;
   1059     Py_ssize_t arglen;
   1060     PyObject *retval;
   1061     char* retbuf;
   1062     Py_ssize_t i, j;
   1063 
   1064     if (!PyArg_ParseTuple(args, "s*:b2a_hex", &parg))
   1065         return NULL;
   1066     argbuf = parg.buf;
   1067     arglen = parg.len;
   1068 
   1069     assert(arglen >= 0);
   1070     if (arglen > PY_SSIZE_T_MAX / 2) {
   1071         PyBuffer_Release(&parg);
   1072         return PyErr_NoMemory();
   1073     }
   1074 
   1075     retval = PyString_FromStringAndSize(NULL, arglen*2);
   1076     if (!retval) {
   1077         PyBuffer_Release(&parg);
   1078         return NULL;
   1079     }
   1080     retbuf = PyString_AS_STRING(retval);
   1081 
   1082     /* make hex version of string, taken from shamodule.c */
   1083     for (i=j=0; i < arglen; i++) {
   1084         char c;
   1085         c = (argbuf[i] >> 4) & 0xf;
   1086         c = (c>9) ? c+'a'-10 : c + '0';
   1087         retbuf[j++] = c;
   1088         c = argbuf[i] & 0xf;
   1089         c = (c>9) ? c+'a'-10 : c + '0';
   1090         retbuf[j++] = c;
   1091     }
   1092     PyBuffer_Release(&parg);
   1093     return retval;
   1094 }
   1095 
   1096 PyDoc_STRVAR(doc_hexlify,
   1097 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
   1098 \n\
   1099 This function is also available as \"hexlify()\".");
   1100 
   1101 
   1102 static int
   1103 to_int(int c)
   1104 {
   1105     if (isdigit(c))
   1106         return c - '0';
   1107     else {
   1108         if (isupper(c))
   1109             c = tolower(c);
   1110         if (c >= 'a' && c <= 'f')
   1111             return c - 'a' + 10;
   1112     }
   1113     return -1;
   1114 }
   1115 
   1116 
   1117 static PyObject *
   1118 binascii_unhexlify(PyObject *self, PyObject *args)
   1119 {
   1120     Py_buffer parg;
   1121     char* argbuf;
   1122     Py_ssize_t arglen;
   1123     PyObject *retval;
   1124     char* retbuf;
   1125     Py_ssize_t i, j;
   1126 
   1127     if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
   1128         return NULL;
   1129     argbuf = parg.buf;
   1130     arglen = parg.len;
   1131 
   1132     assert(arglen >= 0);
   1133 
   1134     /* XXX What should we do about strings with an odd length?  Should
   1135      * we add an implicit leading zero, or a trailing zero?  For now,
   1136      * raise an exception.
   1137      */
   1138     if (arglen % 2) {
   1139         PyBuffer_Release(&parg);
   1140         PyErr_SetString(PyExc_TypeError, "Odd-length string");
   1141         return NULL;
   1142     }
   1143 
   1144     retval = PyString_FromStringAndSize(NULL, (arglen/2));
   1145     if (!retval) {
   1146         PyBuffer_Release(&parg);
   1147         return NULL;
   1148     }
   1149     retbuf = PyString_AS_STRING(retval);
   1150 
   1151     for (i=j=0; i < arglen; i += 2) {
   1152         int top = to_int(Py_CHARMASK(argbuf[i]));
   1153         int bot = to_int(Py_CHARMASK(argbuf[i+1]));
   1154         if (top == -1 || bot == -1) {
   1155             PyErr_SetString(PyExc_TypeError,
   1156                             "Non-hexadecimal digit found");
   1157             goto finally;
   1158         }
   1159         retbuf[j++] = (top << 4) + bot;
   1160     }
   1161     PyBuffer_Release(&parg);
   1162     return retval;
   1163 
   1164   finally:
   1165     PyBuffer_Release(&parg);
   1166     Py_DECREF(retval);
   1167     return NULL;
   1168 }
   1169 
   1170 PyDoc_STRVAR(doc_unhexlify,
   1171 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
   1172 \n\
   1173 hexstr must contain an even number of hex digits (upper or lower case).\n\
   1174 This function is also available as \"unhexlify()\"");
   1175 
   1176 static int table_hex[128] = {
   1177   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1178   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1179   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1180    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
   1181   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1182   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1183   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1184   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
   1185 };
   1186 
   1187 #define hexval(c) table_hex[(unsigned int)(c)]
   1188 
   1189 #define MAXLINESIZE 76
   1190 
   1191 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
   1192 
   1193 static PyObject*
   1194 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
   1195 {
   1196     Py_ssize_t in, out;
   1197     char ch;
   1198     Py_buffer pdata;
   1199     unsigned char *data, *odata;
   1200     Py_ssize_t datalen = 0;
   1201     PyObject *rv;
   1202     static char *kwlist[] = {"data", "header", NULL};
   1203     int header = 0;
   1204 
   1205     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
   1206           &header))
   1207         return NULL;
   1208     data = pdata.buf;
   1209     datalen = pdata.len;
   1210 
   1211     /* We allocate the output same size as input, this is overkill.
   1212      * The previous implementation used calloc() so we'll zero out the
   1213      * memory here too, since PyMem_Malloc() does not guarantee that.
   1214      */
   1215     odata = (unsigned char *) PyMem_Malloc(datalen);
   1216     if (odata == NULL) {
   1217         PyBuffer_Release(&pdata);
   1218         PyErr_NoMemory();
   1219         return NULL;
   1220     }
   1221     memset(odata, 0, datalen);
   1222 
   1223     in = out = 0;
   1224     while (in < datalen) {
   1225         if (data[in] == '=') {
   1226             in++;
   1227             if (in >= datalen) break;
   1228             /* Soft line breaks */
   1229             if ((data[in] == '\n') || (data[in] == '\r')) {
   1230                 if (data[in] != '\n') {
   1231                     while (in < datalen && data[in] != '\n') in++;
   1232                 }
   1233                 if (in < datalen) in++;
   1234             }
   1235             else if (data[in] == '=') {
   1236                 /* broken case from broken python qp */
   1237                 odata[out++] = '=';
   1238                 in++;
   1239             }
   1240             else if (((data[in] >= 'A' && data[in] <= 'F') ||
   1241                       (data[in] >= 'a' && data[in] <= 'f') ||
   1242                       (data[in] >= '0' && data[in] <= '9')) &&
   1243                      ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
   1244                       (data[in+1] >= 'a' && data[in+1] <= 'f') ||
   1245                       (data[in+1] >= '0' && data[in+1] <= '9'))) {
   1246                 /* hexval */
   1247                 ch = hexval(data[in]) << 4;
   1248                 in++;
   1249                 ch |= hexval(data[in]);
   1250                 in++;
   1251                 odata[out++] = ch;
   1252             }
   1253             else {
   1254               odata[out++] = '=';
   1255             }
   1256         }
   1257         else if (header && data[in] == '_') {
   1258             odata[out++] = ' ';
   1259             in++;
   1260         }
   1261         else {
   1262             odata[out] = data[in];
   1263             in++;
   1264             out++;
   1265         }
   1266     }
   1267     if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
   1268         PyBuffer_Release(&pdata);
   1269         PyMem_Free(odata);
   1270         return NULL;
   1271     }
   1272     PyBuffer_Release(&pdata);
   1273     PyMem_Free(odata);
   1274     return rv;
   1275 }
   1276 
   1277 static int
   1278 to_hex (unsigned char ch, unsigned char *s)
   1279 {
   1280     unsigned int uvalue = ch;
   1281 
   1282     s[1] = "0123456789ABCDEF"[uvalue % 16];
   1283     uvalue = (uvalue / 16);
   1284     s[0] = "0123456789ABCDEF"[uvalue % 16];
   1285     return 0;
   1286 }
   1287 
   1288 PyDoc_STRVAR(doc_b2a_qp,
   1289 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
   1290  Encode a string using quoted-printable encoding. \n\
   1291 \n\
   1292 On encoding, when istext is set, newlines are not encoded, and white \n\
   1293 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
   1294 both encoded.  When quotetabs is set, space and tabs are encoded.");
   1295 
   1296 /* XXX: This is ridiculously complicated to be backward compatible
   1297  * (mostly) with the quopri module.  It doesn't re-create the quopri
   1298  * module bug where text ending in CRLF has the CR encoded */
   1299 static PyObject*
   1300 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
   1301 {
   1302     Py_ssize_t in, out;
   1303     Py_buffer pdata;
   1304     unsigned char *data, *odata;
   1305     Py_ssize_t datalen = 0, odatalen = 0;
   1306     PyObject *rv;
   1307     unsigned int linelen = 0;
   1308     static char *kwlist[] = {"data", "quotetabs", "istext",
   1309                                    "header", NULL};
   1310     int istext = 1;
   1311     int quotetabs = 0;
   1312     int header = 0;
   1313     unsigned char ch;
   1314     int crlf = 0;
   1315     unsigned char *p;
   1316 
   1317     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|iii", kwlist, &pdata,
   1318           &quotetabs, &istext, &header))
   1319         return NULL;
   1320     data = pdata.buf;
   1321     datalen = pdata.len;
   1322 
   1323     /* See if this string is using CRLF line ends */
   1324     /* XXX: this function has the side effect of converting all of
   1325      * the end of lines to be the same depending on this detection
   1326      * here */
   1327     p = (unsigned char *) memchr(data, '\n', datalen);
   1328     if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
   1329         crlf = 1;
   1330 
   1331     /* First, scan to see how many characters need to be encoded */
   1332     in = 0;
   1333     while (in < datalen) {
   1334         if ((data[in] > 126) ||
   1335             (data[in] == '=') ||
   1336             (header && data[in] == '_') ||
   1337             ((data[in] == '.') && (linelen == 0) &&
   1338              (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
   1339             (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
   1340             ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
   1341             ((data[in] < 33) &&
   1342              (data[in] != '\r') && (data[in] != '\n') &&
   1343              (quotetabs ||
   1344             (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
   1345         {
   1346             if ((linelen + 3) >= MAXLINESIZE) {
   1347                 linelen = 0;
   1348                 if (crlf)
   1349                     odatalen += 3;
   1350                 else
   1351                     odatalen += 2;
   1352             }
   1353             linelen += 3;
   1354             odatalen += 3;
   1355             in++;
   1356         }
   1357         else {
   1358             if (istext &&
   1359                 ((data[in] == '\n') ||
   1360                  ((in+1 < datalen) && (data[in] == '\r') &&
   1361                  (data[in+1] == '\n'))))
   1362             {
   1363                 linelen = 0;
   1364                 /* Protect against whitespace on end of line */
   1365                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
   1366                     odatalen += 2;
   1367                 if (crlf)
   1368                     odatalen += 2;
   1369                 else
   1370                     odatalen += 1;
   1371                 if (data[in] == '\r')
   1372                     in += 2;
   1373                 else
   1374                     in++;
   1375             }
   1376             else {
   1377                 if ((in + 1 != datalen) &&
   1378                     (data[in+1] != '\n') &&
   1379                     (linelen + 1) >= MAXLINESIZE) {
   1380                     linelen = 0;
   1381                     if (crlf)
   1382                         odatalen += 3;
   1383                     else
   1384                         odatalen += 2;
   1385                 }
   1386                 linelen++;
   1387                 odatalen++;
   1388                 in++;
   1389             }
   1390         }
   1391     }
   1392 
   1393     /* We allocate the output same size as input, this is overkill.
   1394      * The previous implementation used calloc() so we'll zero out the
   1395      * memory here too, since PyMem_Malloc() does not guarantee that.
   1396      */
   1397     odata = (unsigned char *) PyMem_Malloc(odatalen);
   1398     if (odata == NULL) {
   1399         PyBuffer_Release(&pdata);
   1400         PyErr_NoMemory();
   1401         return NULL;
   1402     }
   1403     memset(odata, 0, odatalen);
   1404 
   1405     in = out = linelen = 0;
   1406     while (in < datalen) {
   1407         if ((data[in] > 126) ||
   1408             (data[in] == '=') ||
   1409             (header && data[in] == '_') ||
   1410             ((data[in] == '.') && (linelen == 0) &&
   1411              (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
   1412             (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
   1413             ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
   1414             ((data[in] < 33) &&
   1415              (data[in] != '\r') && (data[in] != '\n') &&
   1416              (quotetabs ||
   1417             (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
   1418         {
   1419             if ((linelen + 3 )>= MAXLINESIZE) {
   1420                 odata[out++] = '=';
   1421                 if (crlf) odata[out++] = '\r';
   1422                 odata[out++] = '\n';
   1423                 linelen = 0;
   1424             }
   1425             odata[out++] = '=';
   1426             to_hex(data[in], &odata[out]);
   1427             out += 2;
   1428             in++;
   1429             linelen += 3;
   1430         }
   1431         else {
   1432             if (istext &&
   1433                 ((data[in] == '\n') ||
   1434                  ((in+1 < datalen) && (data[in] == '\r') &&
   1435                  (data[in+1] == '\n'))))
   1436             {
   1437                 linelen = 0;
   1438                 /* Protect against whitespace on end of line */
   1439                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
   1440                     ch = odata[out-1];
   1441                     odata[out-1] = '=';
   1442                     to_hex(ch, &odata[out]);
   1443                     out += 2;
   1444                 }
   1445 
   1446                 if (crlf) odata[out++] = '\r';
   1447                 odata[out++] = '\n';
   1448                 if (data[in] == '\r')
   1449                     in += 2;
   1450                 else
   1451                     in++;
   1452             }
   1453             else {
   1454                 if ((in + 1 != datalen) &&
   1455                     (data[in+1] != '\n') &&
   1456                     (linelen + 1) >= MAXLINESIZE) {
   1457                     odata[out++] = '=';
   1458                     if (crlf) odata[out++] = '\r';
   1459                     odata[out++] = '\n';
   1460                     linelen = 0;
   1461                 }
   1462                 linelen++;
   1463                 if (header && data[in] == ' ') {
   1464                     odata[out++] = '_';
   1465                     in++;
   1466                 }
   1467                 else {
   1468                     odata[out++] = data[in++];
   1469                 }
   1470             }
   1471         }
   1472     }
   1473     if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
   1474         PyBuffer_Release(&pdata);
   1475         PyMem_Free(odata);
   1476         return NULL;
   1477     }
   1478     PyBuffer_Release(&pdata);
   1479     PyMem_Free(odata);
   1480     return rv;
   1481 }
   1482 
   1483 /* List of functions defined in the module */
   1484 
   1485 static struct PyMethodDef binascii_module_methods[] = {
   1486     {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
   1487     {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
   1488     {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
   1489     {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
   1490     {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
   1491     {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
   1492     {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
   1493     {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
   1494     {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
   1495     {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
   1496     {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
   1497     {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
   1498      doc_rledecode_hqx},
   1499     {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
   1500     {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
   1501     {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
   1502       doc_a2b_qp},
   1503     {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
   1504       doc_b2a_qp},
   1505     {NULL, NULL}                             /* sentinel */
   1506 };
   1507 
   1508 
   1509 /* Initialization function for the module (*must* be called initbinascii) */
   1510 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
   1511 
   1512 PyMODINIT_FUNC
   1513 initbinascii(void)
   1514 {
   1515     PyObject *m, *d, *x;
   1516 
   1517     /* Create the module and add the functions */
   1518     m = Py_InitModule("binascii", binascii_module_methods);
   1519     if (m == NULL)
   1520         return;
   1521 
   1522     d = PyModule_GetDict(m);
   1523     x = PyString_FromString(doc_binascii);
   1524     PyDict_SetItemString(d, "__doc__", x);
   1525     Py_XDECREF(x);
   1526 
   1527     Error = PyErr_NewException("binascii.Error", NULL, NULL);
   1528     PyDict_SetItemString(d, "Error", Error);
   1529     Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
   1530     PyDict_SetItemString(d, "Incomplete", Incomplete);
   1531 }
   1532