Home | History | Annotate | Download | only in Modules
      1 /*
      2 ** Routines to represent binary data in ASCII and vice-versa
      3 **
      4 ** This module currently supports the following encodings:
      5 ** uuencode:
      6 **      each line encodes 45 bytes (except possibly the last)
      7 **      First char encodes (binary) length, rest data
      8 **      each char encodes 6 bits, as follows:
      9 **      binary: 01234567 abcdefgh ijklmnop
     10 **      ascii:  012345 67abcd efghij klmnop
     11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
     12 **      short binary data is zero-extended (so the bits are always in the
     13 **      right place), this does *not* reflect in the length.
     14 ** base64:
     15 **      Line breaks are insignificant, but lines are at most 76 chars
     16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
     17 **      is done via a table.
     18 **      Short binary data is filled (in ASCII) with '='.
     19 ** hqx:
     20 **      File starts with introductory text, real data starts and ends
     21 **      with colons.
     22 **      Data consists of three similar parts: info, datafork, resourcefork.
     23 **      Each part is protected (at the end) with a 16-bit crc
     24 **      The binary data is run-length encoded, and then ascii-fied:
     25 **      binary: 01234567 abcdefgh ijklmnop
     26 **      ascii:  012345 67abcd efghij klmnop
     27 **      ASCII encoding is table-driven, see the code.
     28 **      Short binary data results in the runt ascii-byte being output with
     29 **      the bits in the right place.
     30 **
     31 ** While I was reading dozens of programs that encode or decode the formats
     32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
     33 **
     34 **      Programs that encode binary data in ASCII are written in
     35 **      such a style that they are as unreadable as possible. Devices used
     36 **      include unnecessary global variables, burying important tables
     37 **      in unrelated sourcefiles, putting functions in include files,
     38 **      using seemingly-descriptive variable names for different purposes,
     39 **      calls to empty subroutines and a host of others.
     40 **
     41 ** I have attempted to break with this tradition, but I guess that that
     42 ** does make the performance sub-optimal. Oh well, too bad...
     43 **
     44 ** Jack Jansen, CWI, July 1995.
     45 **
     46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
     47 ** quoted-printable encoding specifies that non printable characters (anything
     48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
     49 ** of the character.  It also specifies some other behavior to enable 8bit data
     50 ** in a mail message with little difficulty (maximum line sizes, protecting
     51 ** some cases of whitespace, etc).
     52 **
     53 ** Brandon Long, September 2001.
     54 */
     55 
     56 #define PY_SSIZE_T_CLEAN
     57 
     58 #include "Python.h"
     59 #include "pystrhex.h"
     60 #ifdef USE_ZLIB_CRC32
     61 #include "zlib.h"
     62 #endif
     63 
     64 static PyObject *Error;
     65 static PyObject *Incomplete;
     66 
     67 /*
     68 ** hqx lookup table, ascii->binary.
     69 */
     70 
     71 #define RUNCHAR 0x90
     72 
     73 #define DONE 0x7F
     74 #define SKIP 0x7E
     75 #define FAIL 0x7D
     76 
     77 static const unsigned char table_a2b_hqx[256] = {
     78 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
     79 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
     80 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
     81 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
     82 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
     83 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
     84 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
     85 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
     86 /*              !     "     #     $     %     &     '   */
     87 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
     88 /*        (     )     *     +     ,     -     .     /   */
     89 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
     90 /*        0     1     2     3     4     5     6     7   */
     91 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
     92 /*        8     9     :     ;     <     =     >     ?   */
     93 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
     94 /*        @     A     B     C     D     E     F     G   */
     95 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
     96 /*        H     I     J     K     L     M     N     O   */
     97 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
     98 /*        P     Q     R     S     T     U     V     W   */
     99 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
    100 /*        X     Y     Z     [     \     ]     ^     _   */
    101 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
    102 /*        `     a     b     c     d     e     f     g   */
    103 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
    104 /*        h     i     j     k     l     m     n     o   */
    105 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
    106 /*        p     q     r     s     t     u     v     w   */
    107 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
    108 /*        x     y     z     {     |     }     ~    ^?   */
    109 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    110 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    111     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    112     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    113     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    114     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    115     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    116     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    117     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    118     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    119     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    120     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    121     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    122     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    123     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    124     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    125     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
    126 };
    127 
    128 static const unsigned char table_b2a_hqx[] =
    129 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
    130 
    131 static const char table_a2b_base64[] = {
    132     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    133     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
    134     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
    135     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
    136     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
    137     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
    138     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
    139     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
    140 };
    141 
    142 #define BASE64_PAD '='
    143 
    144 /* Max binary chunk size; limited only by available memory */
    145 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
    146 
    147 static const unsigned char table_b2a_base64[] =
    148 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    149 
    150 
    151 
    152 static const unsigned short crctab_hqx[256] = {
    153     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
    154     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
    155     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
    156     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
    157     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
    158     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
    159     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
    160     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
    161     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
    162     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
    163     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
    164     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
    165     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
    166     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
    167     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
    168     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
    169     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
    170     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
    171     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
    172     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
    173     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
    174     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
    175     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
    176     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
    177     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
    178     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
    179     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
    180     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
    181     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
    182     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
    183     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
    184     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
    185 };
    186 
    187 /*[clinic input]
    188 module binascii
    189 [clinic start generated code]*/
    190 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
    191 
    192 /*[python input]
    193 
    194 class ascii_buffer_converter(CConverter):
    195     type = 'Py_buffer'
    196     converter = 'ascii_buffer_converter'
    197     impl_by_reference = True
    198     c_default = "{NULL, NULL}"
    199 
    200     def cleanup(self):
    201         name = self.name
    202         return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
    203 
    204 [python start generated code]*/
    205 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
    206 
    207 static int
    208 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
    209 {
    210     if (arg == NULL) {
    211         PyBuffer_Release(buf);
    212         return 1;
    213     }
    214     if (PyUnicode_Check(arg)) {
    215         if (PyUnicode_READY(arg) < 0)
    216             return 0;
    217         if (!PyUnicode_IS_ASCII(arg)) {
    218             PyErr_SetString(PyExc_ValueError,
    219                             "string argument should contain only ASCII characters");
    220             return 0;
    221         }
    222         assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
    223         buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
    224         buf->len = PyUnicode_GET_LENGTH(arg);
    225         buf->obj = NULL;
    226         return 1;
    227     }
    228     if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
    229         PyErr_Format(PyExc_TypeError,
    230                      "argument should be bytes, buffer or ASCII string, "
    231                      "not '%.100s'", Py_TYPE(arg)->tp_name);
    232         return 0;
    233     }
    234     if (!PyBuffer_IsContiguous(buf, 'C')) {
    235         PyErr_Format(PyExc_TypeError,
    236                      "argument should be a contiguous buffer, "
    237                      "not '%.100s'", Py_TYPE(arg)->tp_name);
    238         PyBuffer_Release(buf);
    239         return 0;
    240     }
    241     return Py_CLEANUP_SUPPORTED;
    242 }
    243 
    244 #include "clinic/binascii.c.h"
    245 
    246 /*[clinic input]
    247 binascii.a2b_uu
    248 
    249     data: ascii_buffer
    250     /
    251 
    252 Decode a line of uuencoded data.
    253 [clinic start generated code]*/
    254 
    255 static PyObject *
    256 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
    257 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
    258 {
    259     const unsigned char *ascii_data;
    260     unsigned char *bin_data;
    261     int leftbits = 0;
    262     unsigned char this_ch;
    263     unsigned int leftchar = 0;
    264     PyObject *rv;
    265     Py_ssize_t ascii_len, bin_len;
    266 
    267     ascii_data = data->buf;
    268     ascii_len = data->len;
    269 
    270     assert(ascii_len >= 0);
    271 
    272     /* First byte: binary data length (in bytes) */
    273     bin_len = (*ascii_data++ - ' ') & 077;
    274     ascii_len--;
    275 
    276     /* Allocate the buffer */
    277     if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
    278         return NULL;
    279     bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
    280 
    281     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
    282         /* XXX is it really best to add NULs if there's no more data */
    283         this_ch = (ascii_len > 0) ? *ascii_data : 0;
    284         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
    285             /*
    286             ** Whitespace. Assume some spaces got eaten at
    287             ** end-of-line. (We check this later)
    288             */
    289             this_ch = 0;
    290         } else {
    291             /* Check the character for legality
    292             ** The 64 in stead of the expected 63 is because
    293             ** there are a few uuencodes out there that use
    294             ** '`' as zero instead of space.
    295             */
    296             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
    297                 PyErr_SetString(Error, "Illegal char");
    298                 Py_DECREF(rv);
    299                 return NULL;
    300             }
    301             this_ch = (this_ch - ' ') & 077;
    302         }
    303         /*
    304         ** Shift it in on the low end, and see if there's
    305         ** a byte ready for output.
    306         */
    307         leftchar = (leftchar << 6) | (this_ch);
    308         leftbits += 6;
    309         if ( leftbits >= 8 ) {
    310             leftbits -= 8;
    311             *bin_data++ = (leftchar >> leftbits) & 0xff;
    312             leftchar &= ((1 << leftbits) - 1);
    313             bin_len--;
    314         }
    315     }
    316     /*
    317     ** Finally, check that if there's anything left on the line
    318     ** that it's whitespace only.
    319     */
    320     while( ascii_len-- > 0 ) {
    321         this_ch = *ascii_data++;
    322         /* Extra '`' may be written as padding in some cases */
    323         if ( this_ch != ' ' && this_ch != ' '+64 &&
    324              this_ch != '\n' && this_ch != '\r' ) {
    325             PyErr_SetString(Error, "Trailing garbage");
    326             Py_DECREF(rv);
    327             return NULL;
    328         }
    329     }
    330     return rv;
    331 }
    332 
    333 /*[clinic input]
    334 binascii.b2a_uu
    335 
    336     data: Py_buffer
    337     /
    338 
    339 Uuencode line of data.
    340 [clinic start generated code]*/
    341 
    342 static PyObject *
    343 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data)
    344 /*[clinic end generated code: output=0070670e52e4aa6b input=00fdf458ce8b465b]*/
    345 {
    346     unsigned char *ascii_data;
    347     const unsigned char *bin_data;
    348     int leftbits = 0;
    349     unsigned char this_ch;
    350     unsigned int leftchar = 0;
    351     Py_ssize_t bin_len, out_len;
    352     _PyBytesWriter writer;
    353 
    354     _PyBytesWriter_Init(&writer);
    355     bin_data = data->buf;
    356     bin_len = data->len;
    357     if ( bin_len > 45 ) {
    358         /* The 45 is a limit that appears in all uuencode's */
    359         PyErr_SetString(Error, "At most 45 bytes at once");
    360         return NULL;
    361     }
    362 
    363     /* We're lazy and allocate to much (fixed up later) */
    364     out_len = 2 + (bin_len + 2) / 3 * 4;
    365     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
    366     if (ascii_data == NULL)
    367         return NULL;
    368 
    369     /* Store the length */
    370     *ascii_data++ = ' ' + (bin_len & 077);
    371 
    372     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
    373         /* Shift the data (or padding) into our buffer */
    374         if ( bin_len > 0 )              /* Data */
    375             leftchar = (leftchar << 8) | *bin_data;
    376         else                            /* Padding */
    377             leftchar <<= 8;
    378         leftbits += 8;
    379 
    380         /* See if there are 6-bit groups ready */
    381         while ( leftbits >= 6 ) {
    382             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
    383             leftbits -= 6;
    384             *ascii_data++ = this_ch + ' ';
    385         }
    386     }
    387     *ascii_data++ = '\n';       /* Append a courtesy newline */
    388 
    389     return _PyBytesWriter_Finish(&writer, ascii_data);
    390 }
    391 
    392 
    393 static int
    394 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
    395 {
    396     /* Finds & returns the (num+1)th
    397     ** valid character for base64, or -1 if none.
    398     */
    399 
    400     int ret = -1;
    401     unsigned char c, b64val;
    402 
    403     while ((slen > 0) && (ret == -1)) {
    404         c = *s;
    405         b64val = table_a2b_base64[c & 0x7f];
    406         if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
    407             if (num == 0)
    408                 ret = *s;
    409             num--;
    410         }
    411 
    412         s++;
    413         slen--;
    414     }
    415     return ret;
    416 }
    417 
    418 /*[clinic input]
    419 binascii.a2b_base64
    420 
    421     data: ascii_buffer
    422     /
    423 
    424 Decode a line of base64 data.
    425 [clinic start generated code]*/
    426 
    427 static PyObject *
    428 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
    429 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
    430 {
    431     const unsigned char *ascii_data;
    432     unsigned char *bin_data;
    433     int leftbits = 0;
    434     unsigned char this_ch;
    435     unsigned int leftchar = 0;
    436     Py_ssize_t ascii_len, bin_len;
    437     int quad_pos = 0;
    438     _PyBytesWriter writer;
    439 
    440     ascii_data = data->buf;
    441     ascii_len = data->len;
    442 
    443     assert(ascii_len >= 0);
    444 
    445     if (ascii_len > PY_SSIZE_T_MAX - 3)
    446         return PyErr_NoMemory();
    447 
    448     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
    449 
    450     _PyBytesWriter_Init(&writer);
    451 
    452     /* Allocate the buffer */
    453     bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
    454     if (bin_data == NULL)
    455         return NULL;
    456 
    457     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
    458         this_ch = *ascii_data;
    459 
    460         if (this_ch > 0x7f ||
    461             this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
    462             continue;
    463 
    464         /* Check for pad sequences and ignore
    465         ** the invalid ones.
    466         */
    467         if (this_ch == BASE64_PAD) {
    468             if ( (quad_pos < 2) ||
    469                  ((quad_pos == 2) &&
    470                   (binascii_find_valid(ascii_data, ascii_len, 1)
    471                    != BASE64_PAD)) )
    472             {
    473                 continue;
    474             }
    475             else {
    476                 /* A pad sequence means no more input.
    477                 ** We've already interpreted the data
    478                 ** from the quad at this point.
    479                 */
    480                 leftbits = 0;
    481                 break;
    482             }
    483         }
    484 
    485         this_ch = table_a2b_base64[*ascii_data];
    486         if ( this_ch == (unsigned char) -1 )
    487             continue;
    488 
    489         /*
    490         ** Shift it in on the low end, and see if there's
    491         ** a byte ready for output.
    492         */
    493         quad_pos = (quad_pos + 1) & 0x03;
    494         leftchar = (leftchar << 6) | (this_ch);
    495         leftbits += 6;
    496 
    497         if ( leftbits >= 8 ) {
    498             leftbits -= 8;
    499             *bin_data++ = (leftchar >> leftbits) & 0xff;
    500             leftchar &= ((1 << leftbits) - 1);
    501         }
    502     }
    503 
    504     if (leftbits != 0) {
    505         PyErr_SetString(Error, "Incorrect padding");
    506         _PyBytesWriter_Dealloc(&writer);
    507         return NULL;
    508     }
    509 
    510     return _PyBytesWriter_Finish(&writer, bin_data);
    511 }
    512 
    513 
    514 /*[clinic input]
    515 binascii.b2a_base64
    516 
    517     data: Py_buffer
    518     *
    519     newline: int(c_default="1") = True
    520 
    521 Base64-code line of data.
    522 [clinic start generated code]*/
    523 
    524 static PyObject *
    525 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
    526 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=7b2ea6fa38d8924c]*/
    527 {
    528     unsigned char *ascii_data;
    529     const unsigned char *bin_data;
    530     int leftbits = 0;
    531     unsigned char this_ch;
    532     unsigned int leftchar = 0;
    533     Py_ssize_t bin_len, out_len;
    534     _PyBytesWriter writer;
    535 
    536     bin_data = data->buf;
    537     bin_len = data->len;
    538     _PyBytesWriter_Init(&writer);
    539 
    540     assert(bin_len >= 0);
    541 
    542     if ( bin_len > BASE64_MAXBIN ) {
    543         PyErr_SetString(Error, "Too much data for base64 line");
    544         return NULL;
    545     }
    546 
    547     /* We're lazy and allocate too much (fixed up later).
    548        "+2" leaves room for up to two pad characters.
    549        Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
    550     out_len = bin_len*2 + 2;
    551     if (newline)
    552         out_len++;
    553     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
    554     if (ascii_data == NULL)
    555         return NULL;
    556 
    557     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
    558         /* Shift the data into our buffer */
    559         leftchar = (leftchar << 8) | *bin_data;
    560         leftbits += 8;
    561 
    562         /* See if there are 6-bit groups ready */
    563         while ( leftbits >= 6 ) {
    564             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
    565             leftbits -= 6;
    566             *ascii_data++ = table_b2a_base64[this_ch];
    567         }
    568     }
    569     if ( leftbits == 2 ) {
    570         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
    571         *ascii_data++ = BASE64_PAD;
    572         *ascii_data++ = BASE64_PAD;
    573     } else if ( leftbits == 4 ) {
    574         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
    575         *ascii_data++ = BASE64_PAD;
    576     }
    577     if (newline)
    578         *ascii_data++ = '\n';       /* Append a courtesy newline */
    579 
    580     return _PyBytesWriter_Finish(&writer, ascii_data);
    581 }
    582 
    583 /*[clinic input]
    584 binascii.a2b_hqx
    585 
    586     data: ascii_buffer
    587     /
    588 
    589 Decode .hqx coding.
    590 [clinic start generated code]*/
    591 
    592 static PyObject *
    593 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
    594 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
    595 {
    596     const unsigned char *ascii_data;
    597     unsigned char *bin_data;
    598     int leftbits = 0;
    599     unsigned char this_ch;
    600     unsigned int leftchar = 0;
    601     PyObject *res;
    602     Py_ssize_t len;
    603     int done = 0;
    604     _PyBytesWriter writer;
    605 
    606     ascii_data = data->buf;
    607     len = data->len;
    608     _PyBytesWriter_Init(&writer);
    609 
    610     assert(len >= 0);
    611 
    612     if (len > PY_SSIZE_T_MAX - 2)
    613         return PyErr_NoMemory();
    614 
    615     /* Allocate a string that is too big (fixed later)
    616        Add two to the initial length to prevent interning which
    617        would preclude subsequent resizing.  */
    618     bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
    619     if (bin_data == NULL)
    620         return NULL;
    621 
    622     for( ; len > 0 ; len--, ascii_data++ ) {
    623         /* Get the byte and look it up */
    624         this_ch = table_a2b_hqx[*ascii_data];
    625         if ( this_ch == SKIP )
    626             continue;
    627         if ( this_ch == FAIL ) {
    628             PyErr_SetString(Error, "Illegal char");
    629             _PyBytesWriter_Dealloc(&writer);
    630             return NULL;
    631         }
    632         if ( this_ch == DONE ) {
    633             /* The terminating colon */
    634             done = 1;
    635             break;
    636         }
    637 
    638         /* Shift it into the buffer and see if any bytes are ready */
    639         leftchar = (leftchar << 6) | (this_ch);
    640         leftbits += 6;
    641         if ( leftbits >= 8 ) {
    642             leftbits -= 8;
    643             *bin_data++ = (leftchar >> leftbits) & 0xff;
    644             leftchar &= ((1 << leftbits) - 1);
    645         }
    646     }
    647 
    648     if ( leftbits && !done ) {
    649         PyErr_SetString(Incomplete,
    650                         "String has incomplete number of bytes");
    651         _PyBytesWriter_Dealloc(&writer);
    652         return NULL;
    653     }
    654 
    655     res = _PyBytesWriter_Finish(&writer, bin_data);
    656     if (res == NULL)
    657         return NULL;
    658     return Py_BuildValue("Ni", res, done);
    659 }
    660 
    661 
    662 /*[clinic input]
    663 binascii.rlecode_hqx
    664 
    665     data: Py_buffer
    666     /
    667 
    668 Binhex RLE-code binary data.
    669 [clinic start generated code]*/
    670 
    671 static PyObject *
    672 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
    673 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
    674 {
    675     const unsigned char *in_data;
    676     unsigned char *out_data;
    677     unsigned char ch;
    678     Py_ssize_t in, inend, len;
    679     _PyBytesWriter writer;
    680 
    681     _PyBytesWriter_Init(&writer);
    682     in_data = data->buf;
    683     len = data->len;
    684 
    685     assert(len >= 0);
    686 
    687     if (len > PY_SSIZE_T_MAX / 2 - 2)
    688         return PyErr_NoMemory();
    689 
    690     /* Worst case: output is twice as big as input (fixed later) */
    691     out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
    692     if (out_data == NULL)
    693         return NULL;
    694 
    695     for( in=0; in<len; in++) {
    696         ch = in_data[in];
    697         if ( ch == RUNCHAR ) {
    698             /* RUNCHAR. Escape it. */
    699             *out_data++ = RUNCHAR;
    700             *out_data++ = 0;
    701         } else {
    702             /* Check how many following are the same */
    703             for(inend=in+1;
    704                 inend<len && in_data[inend] == ch &&
    705                     inend < in+255;
    706                 inend++) ;
    707             if ( inend - in > 3 ) {
    708                 /* More than 3 in a row. Output RLE. */
    709                 *out_data++ = ch;
    710                 *out_data++ = RUNCHAR;
    711                 *out_data++ = (unsigned char) (inend-in);
    712                 in = inend-1;
    713             } else {
    714                 /* Less than 3. Output the byte itself */
    715                 *out_data++ = ch;
    716             }
    717         }
    718     }
    719 
    720     return _PyBytesWriter_Finish(&writer, out_data);
    721 }
    722 
    723 
    724 /*[clinic input]
    725 binascii.b2a_hqx
    726 
    727     data: Py_buffer
    728     /
    729 
    730 Encode .hqx data.
    731 [clinic start generated code]*/
    732 
    733 static PyObject *
    734 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
    735 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
    736 {
    737     unsigned char *ascii_data;
    738     const unsigned char *bin_data;
    739     int leftbits = 0;
    740     unsigned char this_ch;
    741     unsigned int leftchar = 0;
    742     Py_ssize_t len;
    743     _PyBytesWriter writer;
    744 
    745     bin_data = data->buf;
    746     len = data->len;
    747     _PyBytesWriter_Init(&writer);
    748 
    749     assert(len >= 0);
    750 
    751     if (len > PY_SSIZE_T_MAX / 2 - 2)
    752         return PyErr_NoMemory();
    753 
    754     /* Allocate a buffer that is at least large enough */
    755     ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
    756     if (ascii_data == NULL)
    757         return NULL;
    758 
    759     for( ; len > 0 ; len--, bin_data++ ) {
    760         /* Shift into our buffer, and output any 6bits ready */
    761         leftchar = (leftchar << 8) | *bin_data;
    762         leftbits += 8;
    763         while ( leftbits >= 6 ) {
    764             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
    765             leftbits -= 6;
    766             *ascii_data++ = table_b2a_hqx[this_ch];
    767         }
    768     }
    769     /* Output a possible runt byte */
    770     if ( leftbits ) {
    771         leftchar <<= (6-leftbits);
    772         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
    773     }
    774 
    775     return _PyBytesWriter_Finish(&writer, ascii_data);
    776 }
    777 
    778 
    779 /*[clinic input]
    780 binascii.rledecode_hqx
    781 
    782     data: Py_buffer
    783     /
    784 
    785 Decode hexbin RLE-coded string.
    786 [clinic start generated code]*/
    787 
    788 static PyObject *
    789 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
    790 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
    791 {
    792     const unsigned char *in_data;
    793     unsigned char *out_data;
    794     unsigned char in_byte, in_repeat;
    795     Py_ssize_t in_len;
    796     _PyBytesWriter writer;
    797 
    798     in_data = data->buf;
    799     in_len = data->len;
    800     _PyBytesWriter_Init(&writer);
    801 
    802     assert(in_len >= 0);
    803 
    804     /* Empty string is a special case */
    805     if ( in_len == 0 )
    806         return PyBytes_FromStringAndSize("", 0);
    807     else if (in_len > PY_SSIZE_T_MAX / 2)
    808         return PyErr_NoMemory();
    809 
    810     /* Allocate a buffer of reasonable size. Resized when needed */
    811     out_data = _PyBytesWriter_Alloc(&writer, in_len);
    812     if (out_data == NULL)
    813         return NULL;
    814 
    815     /* Use overallocation */
    816     writer.overallocate = 1;
    817 
    818     /*
    819     ** We need two macros here to get/put bytes and handle
    820     ** end-of-buffer for input and output strings.
    821     */
    822 #define INBYTE(b)                                                       \
    823     do {                                                                \
    824          if ( --in_len < 0 ) {                                          \
    825            PyErr_SetString(Incomplete, "");                             \
    826            goto error;                                                  \
    827          }                                                              \
    828          b = *in_data++;                                                \
    829     } while(0)
    830 
    831     /*
    832     ** Handle first byte separately (since we have to get angry
    833     ** in case of an orphaned RLE code).
    834     */
    835     INBYTE(in_byte);
    836 
    837     if (in_byte == RUNCHAR) {
    838         INBYTE(in_repeat);
    839         /* only 1 byte will be written, but 2 bytes were preallocated:
    840            subtract 1 byte to prevent overallocation */
    841         writer.min_size--;
    842 
    843         if (in_repeat != 0) {
    844             /* Note Error, not Incomplete (which is at the end
    845             ** of the string only). This is a programmer error.
    846             */
    847             PyErr_SetString(Error, "Orphaned RLE code at start");
    848             goto error;
    849         }
    850         *out_data++ = RUNCHAR;
    851     } else {
    852         *out_data++ = in_byte;
    853     }
    854 
    855     while( in_len > 0 ) {
    856         INBYTE(in_byte);
    857 
    858         if (in_byte == RUNCHAR) {
    859             INBYTE(in_repeat);
    860             /* only 1 byte will be written, but 2 bytes were preallocated:
    861                subtract 1 byte to prevent overallocation */
    862             writer.min_size--;
    863 
    864             if ( in_repeat == 0 ) {
    865                 /* Just an escaped RUNCHAR value */
    866                 *out_data++ = RUNCHAR;
    867             } else {
    868                 /* Pick up value and output a sequence of it */
    869                 in_byte = out_data[-1];
    870 
    871                 /* enlarge the buffer if needed */
    872                 if (in_repeat > 1) {
    873                     /* -1 because we already preallocated 1 byte */
    874                     out_data = _PyBytesWriter_Prepare(&writer, out_data,
    875                                                       in_repeat - 1);
    876                     if (out_data == NULL)
    877                         goto error;
    878                 }
    879 
    880                 while ( --in_repeat > 0 )
    881                     *out_data++ = in_byte;
    882             }
    883         } else {
    884             /* Normal byte */
    885             *out_data++ = in_byte;
    886         }
    887     }
    888     return _PyBytesWriter_Finish(&writer, out_data);
    889 
    890 error:
    891     _PyBytesWriter_Dealloc(&writer);
    892     return NULL;
    893 }
    894 
    895 
    896 /*[clinic input]
    897 binascii.crc_hqx -> unsigned_int
    898 
    899     data: Py_buffer
    900     crc: unsigned_int(bitwise=True)
    901     /
    902 
    903 Compute CRC-CCITT incrementally.
    904 [clinic start generated code]*/
    905 
    906 static unsigned int
    907 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
    908 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
    909 {
    910     const unsigned char *bin_data;
    911     Py_ssize_t len;
    912 
    913     crc &= 0xffff;
    914     bin_data = data->buf;
    915     len = data->len;
    916 
    917     while(len-- > 0) {
    918         crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
    919     }
    920 
    921     return crc;
    922 }
    923 
    924 #ifndef USE_ZLIB_CRC32
    925 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
    926     Also known as: ISO 3307
    927 **********************************************************************|
    928 *                                                                    *|
    929 * Demonstration program to compute the 32-bit CRC used as the frame  *|
    930 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
    931 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
    932 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
    933 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
    934 * this polynomial is or will be included in CCITT V.41, which        *|
    935 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
    936 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
    937 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
    938 *                                                                    *|
    939 **********************************************************************|
    940 
    941  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
    942  code or tables extracted from it, as desired without restriction.
    943 
    944  First, the polynomial itself and its table of feedback terms.  The
    945  polynomial is
    946  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
    947  Note that we take it "backwards" and put the highest-order term in
    948  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
    949  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
    950  the MSB being 1.
    951 
    952  Note that the usual hardware shift register implementation, which
    953  is what we're using (we're merely optimizing it by doing eight-bit
    954  chunks at a time) shifts bits into the lowest-order term.  In our
    955  implementation, that means shifting towards the right.  Why do we
    956  do it this way?  Because the calculated CRC must be transmitted in
    957  order from highest-order term to lowest-order term.  UARTs transmit
    958  characters in order from LSB to MSB.  By storing the CRC this way,
    959  we hand it to the UART in the order low-byte to high-byte; the UART
    960  sends each low-bit to hight-bit; and the result is transmission bit
    961  by bit from highest- to lowest-order term without requiring any bit
    962  shuffling on our part.  Reception works similarly.
    963 
    964  The feedback terms table consists of 256, 32-bit entries.  Notes:
    965 
    966   1. The table can be generated at runtime if desired; code to do so
    967      is shown later.  It might not be obvious, but the feedback
    968      terms simply represent the results of eight shift/xor opera-
    969      tions for all combinations of data and CRC register values.
    970 
    971   2. The CRC accumulation logic is the same for all CRC polynomials,
    972      be they sixteen or thirty-two bits wide.  You simply choose the
    973      appropriate table.  Alternatively, because the table can be
    974      generated at runtime, you can start by generating the table for
    975      the polynomial in question and use exactly the same "updcrc",
    976      if your application needn't simultaneously handle two CRC
    977      polynomials.  (Note, however, that XMODEM is strange.)
    978 
    979   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
    980      of course, 32-bit entries work OK if the high 16 bits are zero.
    981 
    982   4. The values must be right-shifted by eight bits by the "updcrc"
    983      logic; the shift must be unsigned (bring in zeroes).  On some
    984      hardware you could probably optimize the shift in assembler by
    985      using byte-swap instructions.
    986 ********************************************************************/
    987 
    988 static const unsigned int crc_32_tab[256] = {
    989 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
    990 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
    991 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
    992 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
    993 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
    994 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
    995 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
    996 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
    997 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
    998 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
    999 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
   1000 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
   1001 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
   1002 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
   1003 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
   1004 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
   1005 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
   1006 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
   1007 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
   1008 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
   1009 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
   1010 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
   1011 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
   1012 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
   1013 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
   1014 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
   1015 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
   1016 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
   1017 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
   1018 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
   1019 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
   1020 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
   1021 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
   1022 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
   1023 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
   1024 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
   1025 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
   1026 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
   1027 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
   1028 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
   1029 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
   1030 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
   1031 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
   1032 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
   1033 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
   1034 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
   1035 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
   1036 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
   1037 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
   1038 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
   1039 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
   1040 0x2d02ef8dU
   1041 };
   1042 #endif  /* USE_ZLIB_CRC32 */
   1043 
   1044 /*[clinic input]
   1045 binascii.crc32 -> unsigned_int
   1046 
   1047     data: Py_buffer
   1048     crc: unsigned_int(bitwise=True) = 0
   1049     /
   1050 
   1051 Compute CRC-32 incrementally.
   1052 [clinic start generated code]*/
   1053 
   1054 static unsigned int
   1055 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
   1056 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
   1057 
   1058 #ifdef USE_ZLIB_CRC32
   1059 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
   1060 {
   1061     const Byte *buf;
   1062     Py_ssize_t len;
   1063     int signed_val;
   1064 
   1065     buf = (Byte*)data->buf;
   1066     len = data->len;
   1067     signed_val = crc32(crc, buf, len);
   1068     return (unsigned int)signed_val & 0xffffffffU;
   1069 }
   1070 #else  /* USE_ZLIB_CRC32 */
   1071 { /* By Jim Ahlstrom; All rights transferred to CNRI */
   1072     const unsigned char *bin_data;
   1073     Py_ssize_t len;
   1074     unsigned int result;
   1075 
   1076     bin_data = data->buf;
   1077     len = data->len;
   1078 
   1079     crc = ~ crc;
   1080     while (len-- > 0) {
   1081         crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
   1082         /* Note:  (crc >> 8) MUST zero fill on left */
   1083     }
   1084 
   1085     result = (crc ^ 0xFFFFFFFF);
   1086     return result & 0xffffffff;
   1087 }
   1088 #endif  /* USE_ZLIB_CRC32 */
   1089 
   1090 /*[clinic input]
   1091 binascii.b2a_hex
   1092 
   1093     data: Py_buffer
   1094     /
   1095 
   1096 Hexadecimal representation of binary data.
   1097 
   1098 The return value is a bytes object.  This function is also
   1099 available as "hexlify()".
   1100 [clinic start generated code]*/
   1101 
   1102 static PyObject *
   1103 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
   1104 /*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
   1105 {
   1106     return _Py_strhex_bytes((const char *)data->buf, data->len);
   1107 }
   1108 
   1109 /*[clinic input]
   1110 binascii.hexlify = binascii.b2a_hex
   1111 
   1112 Hexadecimal representation of binary data.
   1113 
   1114 The return value is a bytes object.
   1115 [clinic start generated code]*/
   1116 
   1117 static PyObject *
   1118 binascii_hexlify_impl(PyObject *module, Py_buffer *data)
   1119 /*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
   1120 {
   1121     return _Py_strhex_bytes((const char *)data->buf, data->len);
   1122 }
   1123 
   1124 static int
   1125 to_int(int c)
   1126 {
   1127     if (Py_ISDIGIT(c))
   1128         return c - '0';
   1129     else {
   1130         if (Py_ISUPPER(c))
   1131             c = Py_TOLOWER(c);
   1132         if (c >= 'a' && c <= 'f')
   1133             return c - 'a' + 10;
   1134     }
   1135     return -1;
   1136 }
   1137 
   1138 
   1139 /*[clinic input]
   1140 binascii.a2b_hex
   1141 
   1142     hexstr: ascii_buffer
   1143     /
   1144 
   1145 Binary data of hexadecimal representation.
   1146 
   1147 hexstr must contain an even number of hex digits (upper or lower case).
   1148 This function is also available as "unhexlify()".
   1149 [clinic start generated code]*/
   1150 
   1151 static PyObject *
   1152 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
   1153 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
   1154 {
   1155     const char* argbuf;
   1156     Py_ssize_t arglen;
   1157     PyObject *retval;
   1158     char* retbuf;
   1159     Py_ssize_t i, j;
   1160 
   1161     argbuf = hexstr->buf;
   1162     arglen = hexstr->len;
   1163 
   1164     assert(arglen >= 0);
   1165 
   1166     /* XXX What should we do about strings with an odd length?  Should
   1167      * we add an implicit leading zero, or a trailing zero?  For now,
   1168      * raise an exception.
   1169      */
   1170     if (arglen % 2) {
   1171         PyErr_SetString(Error, "Odd-length string");
   1172         return NULL;
   1173     }
   1174 
   1175     retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
   1176     if (!retval)
   1177         return NULL;
   1178     retbuf = PyBytes_AS_STRING(retval);
   1179 
   1180     for (i=j=0; i < arglen; i += 2) {
   1181         int top = to_int(Py_CHARMASK(argbuf[i]));
   1182         int bot = to_int(Py_CHARMASK(argbuf[i+1]));
   1183         if (top == -1 || bot == -1) {
   1184             PyErr_SetString(Error,
   1185                             "Non-hexadecimal digit found");
   1186             goto finally;
   1187         }
   1188         retbuf[j++] = (top << 4) + bot;
   1189     }
   1190     return retval;
   1191 
   1192   finally:
   1193     Py_DECREF(retval);
   1194     return NULL;
   1195 }
   1196 
   1197 /*[clinic input]
   1198 binascii.unhexlify = binascii.a2b_hex
   1199 
   1200 Binary data of hexadecimal representation.
   1201 
   1202 hexstr must contain an even number of hex digits (upper or lower case).
   1203 [clinic start generated code]*/
   1204 
   1205 static PyObject *
   1206 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
   1207 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
   1208 {
   1209     return binascii_a2b_hex_impl(module, hexstr);
   1210 }
   1211 
   1212 static const int table_hex[128] = {
   1213   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1214   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1215   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1216    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
   1217   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1218   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1219   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
   1220   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
   1221 };
   1222 
   1223 #define hexval(c) table_hex[(unsigned int)(c)]
   1224 
   1225 #define MAXLINESIZE 76
   1226 
   1227 
   1228 /*[clinic input]
   1229 binascii.a2b_qp
   1230 
   1231     data: ascii_buffer
   1232     header: int(c_default="0") = False
   1233 
   1234 Decode a string of qp-encoded data.
   1235 [clinic start generated code]*/
   1236 
   1237 static PyObject *
   1238 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
   1239 /*[clinic end generated code: output=e99f7846cfb9bc53 input=5187a0d3d8e54f3b]*/
   1240 {
   1241     Py_ssize_t in, out;
   1242     char ch;
   1243     const unsigned char *ascii_data;
   1244     unsigned char *odata;
   1245     Py_ssize_t datalen = 0;
   1246     PyObject *rv;
   1247 
   1248     ascii_data = data->buf;
   1249     datalen = data->len;
   1250 
   1251     /* We allocate the output same size as input, this is overkill.
   1252      * The previous implementation used calloc() so we'll zero out the
   1253      * memory here too, since PyMem_Malloc() does not guarantee that.
   1254      */
   1255     odata = (unsigned char *) PyMem_Malloc(datalen);
   1256     if (odata == NULL) {
   1257         PyErr_NoMemory();
   1258         return NULL;
   1259     }
   1260     memset(odata, 0, datalen);
   1261 
   1262     in = out = 0;
   1263     while (in < datalen) {
   1264         if (ascii_data[in] == '=') {
   1265             in++;
   1266             if (in >= datalen) break;
   1267             /* Soft line breaks */
   1268             if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
   1269                 if (ascii_data[in] != '\n') {
   1270                     while (in < datalen && ascii_data[in] != '\n') in++;
   1271                 }
   1272                 if (in < datalen) in++;
   1273             }
   1274             else if (ascii_data[in] == '=') {
   1275                 /* broken case from broken python qp */
   1276                 odata[out++] = '=';
   1277                 in++;
   1278             }
   1279             else if ((in + 1 < datalen) &&
   1280                      ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
   1281                       (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
   1282                       (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
   1283                      ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
   1284                       (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
   1285                       (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
   1286                 /* hexval */
   1287                 ch = hexval(ascii_data[in]) << 4;
   1288                 in++;
   1289                 ch |= hexval(ascii_data[in]);
   1290                 in++;
   1291                 odata[out++] = ch;
   1292             }
   1293             else {
   1294               odata[out++] = '=';
   1295             }
   1296         }
   1297         else if (header && ascii_data[in] == '_') {
   1298             odata[out++] = ' ';
   1299             in++;
   1300         }
   1301         else {
   1302             odata[out] = ascii_data[in];
   1303             in++;
   1304             out++;
   1305         }
   1306     }
   1307     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
   1308         PyMem_Free(odata);
   1309         return NULL;
   1310     }
   1311     PyMem_Free(odata);
   1312     return rv;
   1313 }
   1314 
   1315 static int
   1316 to_hex (unsigned char ch, unsigned char *s)
   1317 {
   1318     unsigned int uvalue = ch;
   1319 
   1320     s[1] = "0123456789ABCDEF"[uvalue % 16];
   1321     uvalue = (uvalue / 16);
   1322     s[0] = "0123456789ABCDEF"[uvalue % 16];
   1323     return 0;
   1324 }
   1325 
   1326 /* XXX: This is ridiculously complicated to be backward compatible
   1327  * (mostly) with the quopri module.  It doesn't re-create the quopri
   1328  * module bug where text ending in CRLF has the CR encoded */
   1329 
   1330 /*[clinic input]
   1331 binascii.b2a_qp
   1332 
   1333     data: Py_buffer
   1334     quotetabs: int(c_default="0") = False
   1335     istext: int(c_default="1") = True
   1336     header: int(c_default="0") = False
   1337 
   1338 Encode a string using quoted-printable encoding.
   1339 
   1340 On encoding, when istext is set, newlines are not encoded, and white
   1341 space at end of lines is.  When istext is not set, \r and \n (CR/LF)
   1342 are both encoded.  When quotetabs is set, space and tabs are encoded.
   1343 [clinic start generated code]*/
   1344 
   1345 static PyObject *
   1346 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
   1347                      int istext, int header)
   1348 /*[clinic end generated code: output=e9884472ebb1a94c input=7f2a9aaa008e92b2]*/
   1349 {
   1350     Py_ssize_t in, out;
   1351     const unsigned char *databuf;
   1352     unsigned char *odata;
   1353     Py_ssize_t datalen = 0, odatalen = 0;
   1354     PyObject *rv;
   1355     unsigned int linelen = 0;
   1356     unsigned char ch;
   1357     int crlf = 0;
   1358     const unsigned char *p;
   1359 
   1360     databuf = data->buf;
   1361     datalen = data->len;
   1362 
   1363     /* See if this string is using CRLF line ends */
   1364     /* XXX: this function has the side effect of converting all of
   1365      * the end of lines to be the same depending on this detection
   1366      * here */
   1367     p = (const unsigned char *) memchr(databuf, '\n', datalen);
   1368     if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
   1369         crlf = 1;
   1370 
   1371     /* First, scan to see how many characters need to be encoded */
   1372     in = 0;
   1373     while (in < datalen) {
   1374         Py_ssize_t delta = 0;
   1375         if ((databuf[in] > 126) ||
   1376             (databuf[in] == '=') ||
   1377             (header && databuf[in] == '_') ||
   1378             ((databuf[in] == '.') && (linelen == 0) &&
   1379              (in + 1 == datalen || databuf[in+1] == '\n' ||
   1380               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
   1381             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
   1382             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
   1383             ((databuf[in] < 33) &&
   1384              (databuf[in] != '\r') && (databuf[in] != '\n') &&
   1385              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
   1386         {
   1387             if ((linelen + 3) >= MAXLINESIZE) {
   1388                 linelen = 0;
   1389                 if (crlf)
   1390                     delta += 3;
   1391                 else
   1392                     delta += 2;
   1393             }
   1394             linelen += 3;
   1395             delta += 3;
   1396             in++;
   1397         }
   1398         else {
   1399             if (istext &&
   1400                 ((databuf[in] == '\n') ||
   1401                  ((in+1 < datalen) && (databuf[in] == '\r') &&
   1402                  (databuf[in+1] == '\n'))))
   1403             {
   1404                 linelen = 0;
   1405                 /* Protect against whitespace on end of line */
   1406                 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
   1407                     delta += 2;
   1408                 if (crlf)
   1409                     delta += 2;
   1410                 else
   1411                     delta += 1;
   1412                 if (databuf[in] == '\r')
   1413                     in += 2;
   1414                 else
   1415                     in++;
   1416             }
   1417             else {
   1418                 if ((in + 1 != datalen) &&
   1419                     (databuf[in+1] != '\n') &&
   1420                     (linelen + 1) >= MAXLINESIZE) {
   1421                     linelen = 0;
   1422                     if (crlf)
   1423                         delta += 3;
   1424                     else
   1425                         delta += 2;
   1426                 }
   1427                 linelen++;
   1428                 delta++;
   1429                 in++;
   1430             }
   1431         }
   1432         if (PY_SSIZE_T_MAX - delta < odatalen) {
   1433             PyErr_NoMemory();
   1434             return NULL;
   1435         }
   1436         odatalen += delta;
   1437     }
   1438 
   1439     /* We allocate the output same size as input, this is overkill.
   1440      * The previous implementation used calloc() so we'll zero out the
   1441      * memory here too, since PyMem_Malloc() does not guarantee that.
   1442      */
   1443     odata = (unsigned char *) PyMem_Malloc(odatalen);
   1444     if (odata == NULL) {
   1445         PyErr_NoMemory();
   1446         return NULL;
   1447     }
   1448     memset(odata, 0, odatalen);
   1449 
   1450     in = out = linelen = 0;
   1451     while (in < datalen) {
   1452         if ((databuf[in] > 126) ||
   1453             (databuf[in] == '=') ||
   1454             (header && databuf[in] == '_') ||
   1455             ((databuf[in] == '.') && (linelen == 0) &&
   1456              (in + 1 == datalen || databuf[in+1] == '\n' ||
   1457               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
   1458             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
   1459             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
   1460             ((databuf[in] < 33) &&
   1461              (databuf[in] != '\r') && (databuf[in] != '\n') &&
   1462              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
   1463         {
   1464             if ((linelen + 3 )>= MAXLINESIZE) {
   1465                 odata[out++] = '=';
   1466                 if (crlf) odata[out++] = '\r';
   1467                 odata[out++] = '\n';
   1468                 linelen = 0;
   1469             }
   1470             odata[out++] = '=';
   1471             to_hex(databuf[in], &odata[out]);
   1472             out += 2;
   1473             in++;
   1474             linelen += 3;
   1475         }
   1476         else {
   1477             if (istext &&
   1478                 ((databuf[in] == '\n') ||
   1479                  ((in+1 < datalen) && (databuf[in] == '\r') &&
   1480                  (databuf[in+1] == '\n'))))
   1481             {
   1482                 linelen = 0;
   1483                 /* Protect against whitespace on end of line */
   1484                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
   1485                     ch = odata[out-1];
   1486                     odata[out-1] = '=';
   1487                     to_hex(ch, &odata[out]);
   1488                     out += 2;
   1489                 }
   1490 
   1491                 if (crlf) odata[out++] = '\r';
   1492                 odata[out++] = '\n';
   1493                 if (databuf[in] == '\r')
   1494                     in += 2;
   1495                 else
   1496                     in++;
   1497             }
   1498             else {
   1499                 if ((in + 1 != datalen) &&
   1500                     (databuf[in+1] != '\n') &&
   1501                     (linelen + 1) >= MAXLINESIZE) {
   1502                     odata[out++] = '=';
   1503                     if (crlf) odata[out++] = '\r';
   1504                     odata[out++] = '\n';
   1505                     linelen = 0;
   1506                 }
   1507                 linelen++;
   1508                 if (header && databuf[in] == ' ') {
   1509                     odata[out++] = '_';
   1510                     in++;
   1511                 }
   1512                 else {
   1513                     odata[out++] = databuf[in++];
   1514                 }
   1515             }
   1516         }
   1517     }
   1518     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
   1519         PyMem_Free(odata);
   1520         return NULL;
   1521     }
   1522     PyMem_Free(odata);
   1523     return rv;
   1524 }
   1525 
   1526 /* List of functions defined in the module */
   1527 
   1528 static struct PyMethodDef binascii_module_methods[] = {
   1529     BINASCII_A2B_UU_METHODDEF
   1530     BINASCII_B2A_UU_METHODDEF
   1531     BINASCII_A2B_BASE64_METHODDEF
   1532     BINASCII_B2A_BASE64_METHODDEF
   1533     BINASCII_A2B_HQX_METHODDEF
   1534     BINASCII_B2A_HQX_METHODDEF
   1535     BINASCII_A2B_HEX_METHODDEF
   1536     BINASCII_B2A_HEX_METHODDEF
   1537     BINASCII_HEXLIFY_METHODDEF
   1538     BINASCII_UNHEXLIFY_METHODDEF
   1539     BINASCII_RLECODE_HQX_METHODDEF
   1540     BINASCII_RLEDECODE_HQX_METHODDEF
   1541     BINASCII_CRC_HQX_METHODDEF
   1542     BINASCII_CRC32_METHODDEF
   1543     BINASCII_A2B_QP_METHODDEF
   1544     BINASCII_B2A_QP_METHODDEF
   1545     {NULL, NULL}                             /* sentinel */
   1546 };
   1547 
   1548 
   1549 /* Initialization function for the module (*must* be called PyInit_binascii) */
   1550 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
   1551 
   1552 
   1553 static struct PyModuleDef binasciimodule = {
   1554     PyModuleDef_HEAD_INIT,
   1555     "binascii",
   1556     doc_binascii,
   1557     -1,
   1558     binascii_module_methods,
   1559     NULL,
   1560     NULL,
   1561     NULL,
   1562     NULL
   1563 };
   1564 
   1565 PyMODINIT_FUNC
   1566 PyInit_binascii(void)
   1567 {
   1568     PyObject *m, *d;
   1569 
   1570     /* Create the module and add the functions */
   1571     m = PyModule_Create(&binasciimodule);
   1572     if (m == NULL)
   1573         return NULL;
   1574 
   1575     d = PyModule_GetDict(m);
   1576 
   1577     Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
   1578     PyDict_SetItemString(d, "Error", Error);
   1579     Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
   1580     PyDict_SetItemString(d, "Incomplete", Incomplete);
   1581     if (PyErr_Occurred()) {
   1582         Py_DECREF(m);
   1583         m = NULL;
   1584     }
   1585     return m;
   1586 }
   1587