1 /* 2 ** Routines to represent binary data in ASCII and vice-versa 3 ** 4 ** This module currently supports the following encodings: 5 ** uuencode: 6 ** each line encodes 45 bytes (except possibly the last) 7 ** First char encodes (binary) length, rest data 8 ** each char encodes 6 bits, as follows: 9 ** binary: 01234567 abcdefgh ijklmnop 10 ** ascii: 012345 67abcd efghij klmnop 11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc. 12 ** short binary data is zero-extended (so the bits are always in the 13 ** right place), this does *not* reflect in the length. 14 ** base64: 15 ** Line breaks are insignificant, but lines are at most 76 chars 16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding 17 ** is done via a table. 18 ** Short binary data is filled (in ASCII) with '='. 19 ** hqx: 20 ** File starts with introductory text, real data starts and ends 21 ** with colons. 22 ** Data consists of three similar parts: info, datafork, resourcefork. 23 ** Each part is protected (at the end) with a 16-bit crc 24 ** The binary data is run-length encoded, and then ascii-fied: 25 ** binary: 01234567 abcdefgh ijklmnop 26 ** ascii: 012345 67abcd efghij klmnop 27 ** ASCII encoding is table-driven, see the code. 28 ** Short binary data results in the runt ascii-byte being output with 29 ** the bits in the right place. 30 ** 31 ** While I was reading dozens of programs that encode or decode the formats 32 ** here (documentation? hihi:-) I have formulated Jansen's Observation: 33 ** 34 ** Programs that encode binary data in ASCII are written in 35 ** such a style that they are as unreadable as possible. Devices used 36 ** include unnecessary global variables, burying important tables 37 ** in unrelated sourcefiles, putting functions in include files, 38 ** using seemingly-descriptive variable names for different purposes, 39 ** calls to empty subroutines and a host of others. 40 ** 41 ** I have attempted to break with this tradition, but I guess that that 42 ** does make the performance sub-optimal. Oh well, too bad... 43 ** 44 ** Jack Jansen, CWI, July 1995. 45 ** 46 ** Added support for quoted-printable encoding, based on rfc 1521 et al 47 ** quoted-printable encoding specifies that non printable characters (anything 48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value 49 ** of the character. It also specifies some other behavior to enable 8bit data 50 ** in a mail message with little difficulty (maximum line sizes, protecting 51 ** some cases of whitespace, etc). 52 ** 53 ** Brandon Long, September 2001. 54 */ 55 56 #define PY_SSIZE_T_CLEAN 57 58 #include "Python.h" 59 #include "pystrhex.h" 60 #ifdef USE_ZLIB_CRC32 61 #include "zlib.h" 62 #endif 63 64 static PyObject *Error; 65 static PyObject *Incomplete; 66 67 /* 68 ** hqx lookup table, ascii->binary. 69 */ 70 71 #define RUNCHAR 0x90 72 73 #define DONE 0x7F 74 #define SKIP 0x7E 75 #define FAIL 0x7D 76 77 static const unsigned char table_a2b_hqx[256] = { 78 /* ^@ ^A ^B ^C ^D ^E ^F ^G */ 79 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 80 /* \b \t \n ^K ^L \r ^N ^O */ 81 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL, 82 /* ^P ^Q ^R ^S ^T ^U ^V ^W */ 83 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 84 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */ 85 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 86 /* ! " # $ % & ' */ 87 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 88 /* ( ) * + , - . / */ 89 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL, 90 /* 0 1 2 3 4 5 6 7 */ 91 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL, 92 /* 8 9 : ; < = > ? */ 93 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL, 94 /* @ A B C D E F G */ 95 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 96 /* H I J K L M N O */ 97 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL, 98 /* P Q R S T U V W */ 99 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL, 100 /* X Y Z [ \ ] ^ _ */ 101 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL, 102 /* ` a b c d e f g */ 103 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL, 104 /* h i j k l m n o */ 105 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL, 106 /* p q r s t u v w */ 107 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL, 108 /* x y z { | } ~ ^? */ 109 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 110 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, 126 }; 127 128 static const unsigned char table_b2a_hqx[] = 129 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr"; 130 131 static const char table_a2b_base64[] = { 132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 135 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */ 136 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 137 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 138 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 139 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 140 }; 141 142 #define BASE64_PAD '=' 143 144 /* Max binary chunk size; limited only by available memory */ 145 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2) 146 147 static const unsigned char table_b2a_base64[] = 148 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 149 150 151 152 static const unsigned short crctab_hqx[256] = { 153 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 154 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 155 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, 156 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 157 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, 158 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, 159 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, 160 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 161 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, 162 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 163 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, 164 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, 165 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, 166 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, 167 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, 168 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 169 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, 170 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, 171 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 172 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, 173 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, 174 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 175 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, 176 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, 177 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 178 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, 179 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, 180 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, 181 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 182 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, 183 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 184 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0, 185 }; 186 187 /*[clinic input] 188 module binascii 189 [clinic start generated code]*/ 190 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/ 191 192 /*[python input] 193 194 class ascii_buffer_converter(CConverter): 195 type = 'Py_buffer' 196 converter = 'ascii_buffer_converter' 197 impl_by_reference = True 198 c_default = "{NULL, NULL}" 199 200 def cleanup(self): 201 name = self.name 202 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"]) 203 204 [python start generated code]*/ 205 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/ 206 207 static int 208 ascii_buffer_converter(PyObject *arg, Py_buffer *buf) 209 { 210 if (arg == NULL) { 211 PyBuffer_Release(buf); 212 return 1; 213 } 214 if (PyUnicode_Check(arg)) { 215 if (PyUnicode_READY(arg) < 0) 216 return 0; 217 if (!PyUnicode_IS_ASCII(arg)) { 218 PyErr_SetString(PyExc_ValueError, 219 "string argument should contain only ASCII characters"); 220 return 0; 221 } 222 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND); 223 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg); 224 buf->len = PyUnicode_GET_LENGTH(arg); 225 buf->obj = NULL; 226 return 1; 227 } 228 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) { 229 PyErr_Format(PyExc_TypeError, 230 "argument should be bytes, buffer or ASCII string, " 231 "not '%.100s'", Py_TYPE(arg)->tp_name); 232 return 0; 233 } 234 if (!PyBuffer_IsContiguous(buf, 'C')) { 235 PyErr_Format(PyExc_TypeError, 236 "argument should be a contiguous buffer, " 237 "not '%.100s'", Py_TYPE(arg)->tp_name); 238 PyBuffer_Release(buf); 239 return 0; 240 } 241 return Py_CLEANUP_SUPPORTED; 242 } 243 244 #include "clinic/binascii.c.h" 245 246 /*[clinic input] 247 binascii.a2b_uu 248 249 data: ascii_buffer 250 / 251 252 Decode a line of uuencoded data. 253 [clinic start generated code]*/ 254 255 static PyObject * 256 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data) 257 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/ 258 { 259 const unsigned char *ascii_data; 260 unsigned char *bin_data; 261 int leftbits = 0; 262 unsigned char this_ch; 263 unsigned int leftchar = 0; 264 PyObject *rv; 265 Py_ssize_t ascii_len, bin_len; 266 267 ascii_data = data->buf; 268 ascii_len = data->len; 269 270 assert(ascii_len >= 0); 271 272 /* First byte: binary data length (in bytes) */ 273 bin_len = (*ascii_data++ - ' ') & 077; 274 ascii_len--; 275 276 /* Allocate the buffer */ 277 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) 278 return NULL; 279 bin_data = (unsigned char *)PyBytes_AS_STRING(rv); 280 281 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) { 282 /* XXX is it really best to add NULs if there's no more data */ 283 this_ch = (ascii_len > 0) ? *ascii_data : 0; 284 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) { 285 /* 286 ** Whitespace. Assume some spaces got eaten at 287 ** end-of-line. (We check this later) 288 */ 289 this_ch = 0; 290 } else { 291 /* Check the character for legality 292 ** The 64 in stead of the expected 63 is because 293 ** there are a few uuencodes out there that use 294 ** '`' as zero instead of space. 295 */ 296 if ( this_ch < ' ' || this_ch > (' ' + 64)) { 297 PyErr_SetString(Error, "Illegal char"); 298 Py_DECREF(rv); 299 return NULL; 300 } 301 this_ch = (this_ch - ' ') & 077; 302 } 303 /* 304 ** Shift it in on the low end, and see if there's 305 ** a byte ready for output. 306 */ 307 leftchar = (leftchar << 6) | (this_ch); 308 leftbits += 6; 309 if ( leftbits >= 8 ) { 310 leftbits -= 8; 311 *bin_data++ = (leftchar >> leftbits) & 0xff; 312 leftchar &= ((1 << leftbits) - 1); 313 bin_len--; 314 } 315 } 316 /* 317 ** Finally, check that if there's anything left on the line 318 ** that it's whitespace only. 319 */ 320 while( ascii_len-- > 0 ) { 321 this_ch = *ascii_data++; 322 /* Extra '`' may be written as padding in some cases */ 323 if ( this_ch != ' ' && this_ch != ' '+64 && 324 this_ch != '\n' && this_ch != '\r' ) { 325 PyErr_SetString(Error, "Trailing garbage"); 326 Py_DECREF(rv); 327 return NULL; 328 } 329 } 330 return rv; 331 } 332 333 /*[clinic input] 334 binascii.b2a_uu 335 336 data: Py_buffer 337 / 338 339 Uuencode line of data. 340 [clinic start generated code]*/ 341 342 static PyObject * 343 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data) 344 /*[clinic end generated code: output=0070670e52e4aa6b input=00fdf458ce8b465b]*/ 345 { 346 unsigned char *ascii_data; 347 const unsigned char *bin_data; 348 int leftbits = 0; 349 unsigned char this_ch; 350 unsigned int leftchar = 0; 351 Py_ssize_t bin_len, out_len; 352 _PyBytesWriter writer; 353 354 _PyBytesWriter_Init(&writer); 355 bin_data = data->buf; 356 bin_len = data->len; 357 if ( bin_len > 45 ) { 358 /* The 45 is a limit that appears in all uuencode's */ 359 PyErr_SetString(Error, "At most 45 bytes at once"); 360 return NULL; 361 } 362 363 /* We're lazy and allocate to much (fixed up later) */ 364 out_len = 2 + (bin_len + 2) / 3 * 4; 365 ascii_data = _PyBytesWriter_Alloc(&writer, out_len); 366 if (ascii_data == NULL) 367 return NULL; 368 369 /* Store the length */ 370 *ascii_data++ = ' ' + (bin_len & 077); 371 372 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) { 373 /* Shift the data (or padding) into our buffer */ 374 if ( bin_len > 0 ) /* Data */ 375 leftchar = (leftchar << 8) | *bin_data; 376 else /* Padding */ 377 leftchar <<= 8; 378 leftbits += 8; 379 380 /* See if there are 6-bit groups ready */ 381 while ( leftbits >= 6 ) { 382 this_ch = (leftchar >> (leftbits-6)) & 0x3f; 383 leftbits -= 6; 384 *ascii_data++ = this_ch + ' '; 385 } 386 } 387 *ascii_data++ = '\n'; /* Append a courtesy newline */ 388 389 return _PyBytesWriter_Finish(&writer, ascii_data); 390 } 391 392 393 static int 394 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num) 395 { 396 /* Finds & returns the (num+1)th 397 ** valid character for base64, or -1 if none. 398 */ 399 400 int ret = -1; 401 unsigned char c, b64val; 402 403 while ((slen > 0) && (ret == -1)) { 404 c = *s; 405 b64val = table_a2b_base64[c & 0x7f]; 406 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) { 407 if (num == 0) 408 ret = *s; 409 num--; 410 } 411 412 s++; 413 slen--; 414 } 415 return ret; 416 } 417 418 /*[clinic input] 419 binascii.a2b_base64 420 421 data: ascii_buffer 422 / 423 424 Decode a line of base64 data. 425 [clinic start generated code]*/ 426 427 static PyObject * 428 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data) 429 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/ 430 { 431 const unsigned char *ascii_data; 432 unsigned char *bin_data; 433 int leftbits = 0; 434 unsigned char this_ch; 435 unsigned int leftchar = 0; 436 Py_ssize_t ascii_len, bin_len; 437 int quad_pos = 0; 438 _PyBytesWriter writer; 439 440 ascii_data = data->buf; 441 ascii_len = data->len; 442 443 assert(ascii_len >= 0); 444 445 if (ascii_len > PY_SSIZE_T_MAX - 3) 446 return PyErr_NoMemory(); 447 448 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ 449 450 _PyBytesWriter_Init(&writer); 451 452 /* Allocate the buffer */ 453 bin_data = _PyBytesWriter_Alloc(&writer, bin_len); 454 if (bin_data == NULL) 455 return NULL; 456 457 for( ; ascii_len > 0; ascii_len--, ascii_data++) { 458 this_ch = *ascii_data; 459 460 if (this_ch > 0x7f || 461 this_ch == '\r' || this_ch == '\n' || this_ch == ' ') 462 continue; 463 464 /* Check for pad sequences and ignore 465 ** the invalid ones. 466 */ 467 if (this_ch == BASE64_PAD) { 468 if ( (quad_pos < 2) || 469 ((quad_pos == 2) && 470 (binascii_find_valid(ascii_data, ascii_len, 1) 471 != BASE64_PAD)) ) 472 { 473 continue; 474 } 475 else { 476 /* A pad sequence means no more input. 477 ** We've already interpreted the data 478 ** from the quad at this point. 479 */ 480 leftbits = 0; 481 break; 482 } 483 } 484 485 this_ch = table_a2b_base64[*ascii_data]; 486 if ( this_ch == (unsigned char) -1 ) 487 continue; 488 489 /* 490 ** Shift it in on the low end, and see if there's 491 ** a byte ready for output. 492 */ 493 quad_pos = (quad_pos + 1) & 0x03; 494 leftchar = (leftchar << 6) | (this_ch); 495 leftbits += 6; 496 497 if ( leftbits >= 8 ) { 498 leftbits -= 8; 499 *bin_data++ = (leftchar >> leftbits) & 0xff; 500 leftchar &= ((1 << leftbits) - 1); 501 } 502 } 503 504 if (leftbits != 0) { 505 PyErr_SetString(Error, "Incorrect padding"); 506 _PyBytesWriter_Dealloc(&writer); 507 return NULL; 508 } 509 510 return _PyBytesWriter_Finish(&writer, bin_data); 511 } 512 513 514 /*[clinic input] 515 binascii.b2a_base64 516 517 data: Py_buffer 518 * 519 newline: int(c_default="1") = True 520 521 Base64-code line of data. 522 [clinic start generated code]*/ 523 524 static PyObject * 525 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) 526 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=7b2ea6fa38d8924c]*/ 527 { 528 unsigned char *ascii_data; 529 const unsigned char *bin_data; 530 int leftbits = 0; 531 unsigned char this_ch; 532 unsigned int leftchar = 0; 533 Py_ssize_t bin_len, out_len; 534 _PyBytesWriter writer; 535 536 bin_data = data->buf; 537 bin_len = data->len; 538 _PyBytesWriter_Init(&writer); 539 540 assert(bin_len >= 0); 541 542 if ( bin_len > BASE64_MAXBIN ) { 543 PyErr_SetString(Error, "Too much data for base64 line"); 544 return NULL; 545 } 546 547 /* We're lazy and allocate too much (fixed up later). 548 "+2" leaves room for up to two pad characters. 549 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */ 550 out_len = bin_len*2 + 2; 551 if (newline) 552 out_len++; 553 ascii_data = _PyBytesWriter_Alloc(&writer, out_len); 554 if (ascii_data == NULL) 555 return NULL; 556 557 for( ; bin_len > 0 ; bin_len--, bin_data++ ) { 558 /* Shift the data into our buffer */ 559 leftchar = (leftchar << 8) | *bin_data; 560 leftbits += 8; 561 562 /* See if there are 6-bit groups ready */ 563 while ( leftbits >= 6 ) { 564 this_ch = (leftchar >> (leftbits-6)) & 0x3f; 565 leftbits -= 6; 566 *ascii_data++ = table_b2a_base64[this_ch]; 567 } 568 } 569 if ( leftbits == 2 ) { 570 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4]; 571 *ascii_data++ = BASE64_PAD; 572 *ascii_data++ = BASE64_PAD; 573 } else if ( leftbits == 4 ) { 574 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2]; 575 *ascii_data++ = BASE64_PAD; 576 } 577 if (newline) 578 *ascii_data++ = '\n'; /* Append a courtesy newline */ 579 580 return _PyBytesWriter_Finish(&writer, ascii_data); 581 } 582 583 /*[clinic input] 584 binascii.a2b_hqx 585 586 data: ascii_buffer 587 / 588 589 Decode .hqx coding. 590 [clinic start generated code]*/ 591 592 static PyObject * 593 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data) 594 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/ 595 { 596 const unsigned char *ascii_data; 597 unsigned char *bin_data; 598 int leftbits = 0; 599 unsigned char this_ch; 600 unsigned int leftchar = 0; 601 PyObject *res; 602 Py_ssize_t len; 603 int done = 0; 604 _PyBytesWriter writer; 605 606 ascii_data = data->buf; 607 len = data->len; 608 _PyBytesWriter_Init(&writer); 609 610 assert(len >= 0); 611 612 if (len > PY_SSIZE_T_MAX - 2) 613 return PyErr_NoMemory(); 614 615 /* Allocate a string that is too big (fixed later) 616 Add two to the initial length to prevent interning which 617 would preclude subsequent resizing. */ 618 bin_data = _PyBytesWriter_Alloc(&writer, len + 2); 619 if (bin_data == NULL) 620 return NULL; 621 622 for( ; len > 0 ; len--, ascii_data++ ) { 623 /* Get the byte and look it up */ 624 this_ch = table_a2b_hqx[*ascii_data]; 625 if ( this_ch == SKIP ) 626 continue; 627 if ( this_ch == FAIL ) { 628 PyErr_SetString(Error, "Illegal char"); 629 _PyBytesWriter_Dealloc(&writer); 630 return NULL; 631 } 632 if ( this_ch == DONE ) { 633 /* The terminating colon */ 634 done = 1; 635 break; 636 } 637 638 /* Shift it into the buffer and see if any bytes are ready */ 639 leftchar = (leftchar << 6) | (this_ch); 640 leftbits += 6; 641 if ( leftbits >= 8 ) { 642 leftbits -= 8; 643 *bin_data++ = (leftchar >> leftbits) & 0xff; 644 leftchar &= ((1 << leftbits) - 1); 645 } 646 } 647 648 if ( leftbits && !done ) { 649 PyErr_SetString(Incomplete, 650 "String has incomplete number of bytes"); 651 _PyBytesWriter_Dealloc(&writer); 652 return NULL; 653 } 654 655 res = _PyBytesWriter_Finish(&writer, bin_data); 656 if (res == NULL) 657 return NULL; 658 return Py_BuildValue("Ni", res, done); 659 } 660 661 662 /*[clinic input] 663 binascii.rlecode_hqx 664 665 data: Py_buffer 666 / 667 668 Binhex RLE-code binary data. 669 [clinic start generated code]*/ 670 671 static PyObject * 672 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data) 673 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/ 674 { 675 const unsigned char *in_data; 676 unsigned char *out_data; 677 unsigned char ch; 678 Py_ssize_t in, inend, len; 679 _PyBytesWriter writer; 680 681 _PyBytesWriter_Init(&writer); 682 in_data = data->buf; 683 len = data->len; 684 685 assert(len >= 0); 686 687 if (len > PY_SSIZE_T_MAX / 2 - 2) 688 return PyErr_NoMemory(); 689 690 /* Worst case: output is twice as big as input (fixed later) */ 691 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2); 692 if (out_data == NULL) 693 return NULL; 694 695 for( in=0; in<len; in++) { 696 ch = in_data[in]; 697 if ( ch == RUNCHAR ) { 698 /* RUNCHAR. Escape it. */ 699 *out_data++ = RUNCHAR; 700 *out_data++ = 0; 701 } else { 702 /* Check how many following are the same */ 703 for(inend=in+1; 704 inend<len && in_data[inend] == ch && 705 inend < in+255; 706 inend++) ; 707 if ( inend - in > 3 ) { 708 /* More than 3 in a row. Output RLE. */ 709 *out_data++ = ch; 710 *out_data++ = RUNCHAR; 711 *out_data++ = (unsigned char) (inend-in); 712 in = inend-1; 713 } else { 714 /* Less than 3. Output the byte itself */ 715 *out_data++ = ch; 716 } 717 } 718 } 719 720 return _PyBytesWriter_Finish(&writer, out_data); 721 } 722 723 724 /*[clinic input] 725 binascii.b2a_hqx 726 727 data: Py_buffer 728 / 729 730 Encode .hqx data. 731 [clinic start generated code]*/ 732 733 static PyObject * 734 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data) 735 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/ 736 { 737 unsigned char *ascii_data; 738 const unsigned char *bin_data; 739 int leftbits = 0; 740 unsigned char this_ch; 741 unsigned int leftchar = 0; 742 Py_ssize_t len; 743 _PyBytesWriter writer; 744 745 bin_data = data->buf; 746 len = data->len; 747 _PyBytesWriter_Init(&writer); 748 749 assert(len >= 0); 750 751 if (len > PY_SSIZE_T_MAX / 2 - 2) 752 return PyErr_NoMemory(); 753 754 /* Allocate a buffer that is at least large enough */ 755 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2); 756 if (ascii_data == NULL) 757 return NULL; 758 759 for( ; len > 0 ; len--, bin_data++ ) { 760 /* Shift into our buffer, and output any 6bits ready */ 761 leftchar = (leftchar << 8) | *bin_data; 762 leftbits += 8; 763 while ( leftbits >= 6 ) { 764 this_ch = (leftchar >> (leftbits-6)) & 0x3f; 765 leftbits -= 6; 766 *ascii_data++ = table_b2a_hqx[this_ch]; 767 } 768 } 769 /* Output a possible runt byte */ 770 if ( leftbits ) { 771 leftchar <<= (6-leftbits); 772 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f]; 773 } 774 775 return _PyBytesWriter_Finish(&writer, ascii_data); 776 } 777 778 779 /*[clinic input] 780 binascii.rledecode_hqx 781 782 data: Py_buffer 783 / 784 785 Decode hexbin RLE-coded string. 786 [clinic start generated code]*/ 787 788 static PyObject * 789 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data) 790 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/ 791 { 792 const unsigned char *in_data; 793 unsigned char *out_data; 794 unsigned char in_byte, in_repeat; 795 Py_ssize_t in_len; 796 _PyBytesWriter writer; 797 798 in_data = data->buf; 799 in_len = data->len; 800 _PyBytesWriter_Init(&writer); 801 802 assert(in_len >= 0); 803 804 /* Empty string is a special case */ 805 if ( in_len == 0 ) 806 return PyBytes_FromStringAndSize("", 0); 807 else if (in_len > PY_SSIZE_T_MAX / 2) 808 return PyErr_NoMemory(); 809 810 /* Allocate a buffer of reasonable size. Resized when needed */ 811 out_data = _PyBytesWriter_Alloc(&writer, in_len); 812 if (out_data == NULL) 813 return NULL; 814 815 /* Use overallocation */ 816 writer.overallocate = 1; 817 818 /* 819 ** We need two macros here to get/put bytes and handle 820 ** end-of-buffer for input and output strings. 821 */ 822 #define INBYTE(b) \ 823 do { \ 824 if ( --in_len < 0 ) { \ 825 PyErr_SetString(Incomplete, ""); \ 826 goto error; \ 827 } \ 828 b = *in_data++; \ 829 } while(0) 830 831 /* 832 ** Handle first byte separately (since we have to get angry 833 ** in case of an orphaned RLE code). 834 */ 835 INBYTE(in_byte); 836 837 if (in_byte == RUNCHAR) { 838 INBYTE(in_repeat); 839 /* only 1 byte will be written, but 2 bytes were preallocated: 840 subtract 1 byte to prevent overallocation */ 841 writer.min_size--; 842 843 if (in_repeat != 0) { 844 /* Note Error, not Incomplete (which is at the end 845 ** of the string only). This is a programmer error. 846 */ 847 PyErr_SetString(Error, "Orphaned RLE code at start"); 848 goto error; 849 } 850 *out_data++ = RUNCHAR; 851 } else { 852 *out_data++ = in_byte; 853 } 854 855 while( in_len > 0 ) { 856 INBYTE(in_byte); 857 858 if (in_byte == RUNCHAR) { 859 INBYTE(in_repeat); 860 /* only 1 byte will be written, but 2 bytes were preallocated: 861 subtract 1 byte to prevent overallocation */ 862 writer.min_size--; 863 864 if ( in_repeat == 0 ) { 865 /* Just an escaped RUNCHAR value */ 866 *out_data++ = RUNCHAR; 867 } else { 868 /* Pick up value and output a sequence of it */ 869 in_byte = out_data[-1]; 870 871 /* enlarge the buffer if needed */ 872 if (in_repeat > 1) { 873 /* -1 because we already preallocated 1 byte */ 874 out_data = _PyBytesWriter_Prepare(&writer, out_data, 875 in_repeat - 1); 876 if (out_data == NULL) 877 goto error; 878 } 879 880 while ( --in_repeat > 0 ) 881 *out_data++ = in_byte; 882 } 883 } else { 884 /* Normal byte */ 885 *out_data++ = in_byte; 886 } 887 } 888 return _PyBytesWriter_Finish(&writer, out_data); 889 890 error: 891 _PyBytesWriter_Dealloc(&writer); 892 return NULL; 893 } 894 895 896 /*[clinic input] 897 binascii.crc_hqx -> unsigned_int 898 899 data: Py_buffer 900 crc: unsigned_int(bitwise=True) 901 / 902 903 Compute CRC-CCITT incrementally. 904 [clinic start generated code]*/ 905 906 static unsigned int 907 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc) 908 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/ 909 { 910 const unsigned char *bin_data; 911 Py_ssize_t len; 912 913 crc &= 0xffff; 914 bin_data = data->buf; 915 len = data->len; 916 917 while(len-- > 0) { 918 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++]; 919 } 920 921 return crc; 922 } 923 924 #ifndef USE_ZLIB_CRC32 925 /* Crc - 32 BIT ANSI X3.66 CRC checksum files 926 Also known as: ISO 3307 927 **********************************************************************| 928 * *| 929 * Demonstration program to compute the 32-bit CRC used as the frame *| 930 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *| 931 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *| 932 * protocol). The 32-bit FCS was added via the Federal Register, *| 933 * 1 June 1982, p.23798. I presume but don't know for certain that *| 934 * this polynomial is or will be included in CCITT V.41, which *| 935 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *| 936 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *| 937 * errors by a factor of 10^-5 over 16-bit FCS. *| 938 * *| 939 **********************************************************************| 940 941 Copyright (C) 1986 Gary S. Brown. You may use this program, or 942 code or tables extracted from it, as desired without restriction. 943 944 First, the polynomial itself and its table of feedback terms. The 945 polynomial is 946 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 947 Note that we take it "backwards" and put the highest-order term in 948 the lowest-order bit. The X^32 term is "implied"; the LSB is the 949 X^31 term, etc. The X^0 term (usually shown as "+1") results in 950 the MSB being 1. 951 952 Note that the usual hardware shift register implementation, which 953 is what we're using (we're merely optimizing it by doing eight-bit 954 chunks at a time) shifts bits into the lowest-order term. In our 955 implementation, that means shifting towards the right. Why do we 956 do it this way? Because the calculated CRC must be transmitted in 957 order from highest-order term to lowest-order term. UARTs transmit 958 characters in order from LSB to MSB. By storing the CRC this way, 959 we hand it to the UART in the order low-byte to high-byte; the UART 960 sends each low-bit to hight-bit; and the result is transmission bit 961 by bit from highest- to lowest-order term without requiring any bit 962 shuffling on our part. Reception works similarly. 963 964 The feedback terms table consists of 256, 32-bit entries. Notes: 965 966 1. The table can be generated at runtime if desired; code to do so 967 is shown later. It might not be obvious, but the feedback 968 terms simply represent the results of eight shift/xor opera- 969 tions for all combinations of data and CRC register values. 970 971 2. The CRC accumulation logic is the same for all CRC polynomials, 972 be they sixteen or thirty-two bits wide. You simply choose the 973 appropriate table. Alternatively, because the table can be 974 generated at runtime, you can start by generating the table for 975 the polynomial in question and use exactly the same "updcrc", 976 if your application needn't simultaneously handle two CRC 977 polynomials. (Note, however, that XMODEM is strange.) 978 979 3. For 16-bit CRCs, the table entries need be only 16 bits wide; 980 of course, 32-bit entries work OK if the high 16 bits are zero. 981 982 4. The values must be right-shifted by eight bits by the "updcrc" 983 logic; the shift must be unsigned (bring in zeroes). On some 984 hardware you could probably optimize the shift in assembler by 985 using byte-swap instructions. 986 ********************************************************************/ 987 988 static const unsigned int crc_32_tab[256] = { 989 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 990 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 991 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 992 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU, 993 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U, 994 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U, 995 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 996 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 997 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 998 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU, 999 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U, 1000 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U, 1001 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 1002 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 1003 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 1004 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U, 1005 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU, 1006 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U, 1007 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 1008 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 1009 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 1010 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U, 1011 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U, 1012 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU, 1013 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 1014 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 1015 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U, 1016 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U, 1017 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U, 1018 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU, 1019 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 1020 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 1021 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 1022 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU, 1023 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU, 1024 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U, 1025 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 1026 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 1027 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 1028 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U, 1029 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU, 1030 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U, 1031 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 1032 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 1033 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 1034 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U, 1035 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U, 1036 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U, 1037 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 1038 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 1039 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 1040 0x2d02ef8dU 1041 }; 1042 #endif /* USE_ZLIB_CRC32 */ 1043 1044 /*[clinic input] 1045 binascii.crc32 -> unsigned_int 1046 1047 data: Py_buffer 1048 crc: unsigned_int(bitwise=True) = 0 1049 / 1050 1051 Compute CRC-32 incrementally. 1052 [clinic start generated code]*/ 1053 1054 static unsigned int 1055 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) 1056 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/ 1057 1058 #ifdef USE_ZLIB_CRC32 1059 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */ 1060 { 1061 const Byte *buf; 1062 Py_ssize_t len; 1063 int signed_val; 1064 1065 buf = (Byte*)data->buf; 1066 len = data->len; 1067 signed_val = crc32(crc, buf, len); 1068 return (unsigned int)signed_val & 0xffffffffU; 1069 } 1070 #else /* USE_ZLIB_CRC32 */ 1071 { /* By Jim Ahlstrom; All rights transferred to CNRI */ 1072 const unsigned char *bin_data; 1073 Py_ssize_t len; 1074 unsigned int result; 1075 1076 bin_data = data->buf; 1077 len = data->len; 1078 1079 crc = ~ crc; 1080 while (len-- > 0) { 1081 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8); 1082 /* Note: (crc >> 8) MUST zero fill on left */ 1083 } 1084 1085 result = (crc ^ 0xFFFFFFFF); 1086 return result & 0xffffffff; 1087 } 1088 #endif /* USE_ZLIB_CRC32 */ 1089 1090 /*[clinic input] 1091 binascii.b2a_hex 1092 1093 data: Py_buffer 1094 / 1095 1096 Hexadecimal representation of binary data. 1097 1098 The return value is a bytes object. This function is also 1099 available as "hexlify()". 1100 [clinic start generated code]*/ 1101 1102 static PyObject * 1103 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data) 1104 /*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/ 1105 { 1106 return _Py_strhex_bytes((const char *)data->buf, data->len); 1107 } 1108 1109 /*[clinic input] 1110 binascii.hexlify = binascii.b2a_hex 1111 1112 Hexadecimal representation of binary data. 1113 1114 The return value is a bytes object. 1115 [clinic start generated code]*/ 1116 1117 static PyObject * 1118 binascii_hexlify_impl(PyObject *module, Py_buffer *data) 1119 /*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/ 1120 { 1121 return _Py_strhex_bytes((const char *)data->buf, data->len); 1122 } 1123 1124 static int 1125 to_int(int c) 1126 { 1127 if (Py_ISDIGIT(c)) 1128 return c - '0'; 1129 else { 1130 if (Py_ISUPPER(c)) 1131 c = Py_TOLOWER(c); 1132 if (c >= 'a' && c <= 'f') 1133 return c - 'a' + 10; 1134 } 1135 return -1; 1136 } 1137 1138 1139 /*[clinic input] 1140 binascii.a2b_hex 1141 1142 hexstr: ascii_buffer 1143 / 1144 1145 Binary data of hexadecimal representation. 1146 1147 hexstr must contain an even number of hex digits (upper or lower case). 1148 This function is also available as "unhexlify()". 1149 [clinic start generated code]*/ 1150 1151 static PyObject * 1152 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) 1153 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/ 1154 { 1155 const char* argbuf; 1156 Py_ssize_t arglen; 1157 PyObject *retval; 1158 char* retbuf; 1159 Py_ssize_t i, j; 1160 1161 argbuf = hexstr->buf; 1162 arglen = hexstr->len; 1163 1164 assert(arglen >= 0); 1165 1166 /* XXX What should we do about strings with an odd length? Should 1167 * we add an implicit leading zero, or a trailing zero? For now, 1168 * raise an exception. 1169 */ 1170 if (arglen % 2) { 1171 PyErr_SetString(Error, "Odd-length string"); 1172 return NULL; 1173 } 1174 1175 retval = PyBytes_FromStringAndSize(NULL, (arglen/2)); 1176 if (!retval) 1177 return NULL; 1178 retbuf = PyBytes_AS_STRING(retval); 1179 1180 for (i=j=0; i < arglen; i += 2) { 1181 int top = to_int(Py_CHARMASK(argbuf[i])); 1182 int bot = to_int(Py_CHARMASK(argbuf[i+1])); 1183 if (top == -1 || bot == -1) { 1184 PyErr_SetString(Error, 1185 "Non-hexadecimal digit found"); 1186 goto finally; 1187 } 1188 retbuf[j++] = (top << 4) + bot; 1189 } 1190 return retval; 1191 1192 finally: 1193 Py_DECREF(retval); 1194 return NULL; 1195 } 1196 1197 /*[clinic input] 1198 binascii.unhexlify = binascii.a2b_hex 1199 1200 Binary data of hexadecimal representation. 1201 1202 hexstr must contain an even number of hex digits (upper or lower case). 1203 [clinic start generated code]*/ 1204 1205 static PyObject * 1206 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr) 1207 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/ 1208 { 1209 return binascii_a2b_hex_impl(module, hexstr); 1210 } 1211 1212 static const int table_hex[128] = { 1213 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1214 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1215 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1216 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, 1217 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1218 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1219 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1220 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 1221 }; 1222 1223 #define hexval(c) table_hex[(unsigned int)(c)] 1224 1225 #define MAXLINESIZE 76 1226 1227 1228 /*[clinic input] 1229 binascii.a2b_qp 1230 1231 data: ascii_buffer 1232 header: int(c_default="0") = False 1233 1234 Decode a string of qp-encoded data. 1235 [clinic start generated code]*/ 1236 1237 static PyObject * 1238 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header) 1239 /*[clinic end generated code: output=e99f7846cfb9bc53 input=5187a0d3d8e54f3b]*/ 1240 { 1241 Py_ssize_t in, out; 1242 char ch; 1243 const unsigned char *ascii_data; 1244 unsigned char *odata; 1245 Py_ssize_t datalen = 0; 1246 PyObject *rv; 1247 1248 ascii_data = data->buf; 1249 datalen = data->len; 1250 1251 /* We allocate the output same size as input, this is overkill. 1252 * The previous implementation used calloc() so we'll zero out the 1253 * memory here too, since PyMem_Malloc() does not guarantee that. 1254 */ 1255 odata = (unsigned char *) PyMem_Malloc(datalen); 1256 if (odata == NULL) { 1257 PyErr_NoMemory(); 1258 return NULL; 1259 } 1260 memset(odata, 0, datalen); 1261 1262 in = out = 0; 1263 while (in < datalen) { 1264 if (ascii_data[in] == '=') { 1265 in++; 1266 if (in >= datalen) break; 1267 /* Soft line breaks */ 1268 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) { 1269 if (ascii_data[in] != '\n') { 1270 while (in < datalen && ascii_data[in] != '\n') in++; 1271 } 1272 if (in < datalen) in++; 1273 } 1274 else if (ascii_data[in] == '=') { 1275 /* broken case from broken python qp */ 1276 odata[out++] = '='; 1277 in++; 1278 } 1279 else if ((in + 1 < datalen) && 1280 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') || 1281 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') || 1282 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) && 1283 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') || 1284 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') || 1285 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) { 1286 /* hexval */ 1287 ch = hexval(ascii_data[in]) << 4; 1288 in++; 1289 ch |= hexval(ascii_data[in]); 1290 in++; 1291 odata[out++] = ch; 1292 } 1293 else { 1294 odata[out++] = '='; 1295 } 1296 } 1297 else if (header && ascii_data[in] == '_') { 1298 odata[out++] = ' '; 1299 in++; 1300 } 1301 else { 1302 odata[out] = ascii_data[in]; 1303 in++; 1304 out++; 1305 } 1306 } 1307 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) { 1308 PyMem_Free(odata); 1309 return NULL; 1310 } 1311 PyMem_Free(odata); 1312 return rv; 1313 } 1314 1315 static int 1316 to_hex (unsigned char ch, unsigned char *s) 1317 { 1318 unsigned int uvalue = ch; 1319 1320 s[1] = "0123456789ABCDEF"[uvalue % 16]; 1321 uvalue = (uvalue / 16); 1322 s[0] = "0123456789ABCDEF"[uvalue % 16]; 1323 return 0; 1324 } 1325 1326 /* XXX: This is ridiculously complicated to be backward compatible 1327 * (mostly) with the quopri module. It doesn't re-create the quopri 1328 * module bug where text ending in CRLF has the CR encoded */ 1329 1330 /*[clinic input] 1331 binascii.b2a_qp 1332 1333 data: Py_buffer 1334 quotetabs: int(c_default="0") = False 1335 istext: int(c_default="1") = True 1336 header: int(c_default="0") = False 1337 1338 Encode a string using quoted-printable encoding. 1339 1340 On encoding, when istext is set, newlines are not encoded, and white 1341 space at end of lines is. When istext is not set, \r and \n (CR/LF) 1342 are both encoded. When quotetabs is set, space and tabs are encoded. 1343 [clinic start generated code]*/ 1344 1345 static PyObject * 1346 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs, 1347 int istext, int header) 1348 /*[clinic end generated code: output=e9884472ebb1a94c input=7f2a9aaa008e92b2]*/ 1349 { 1350 Py_ssize_t in, out; 1351 const unsigned char *databuf; 1352 unsigned char *odata; 1353 Py_ssize_t datalen = 0, odatalen = 0; 1354 PyObject *rv; 1355 unsigned int linelen = 0; 1356 unsigned char ch; 1357 int crlf = 0; 1358 const unsigned char *p; 1359 1360 databuf = data->buf; 1361 datalen = data->len; 1362 1363 /* See if this string is using CRLF line ends */ 1364 /* XXX: this function has the side effect of converting all of 1365 * the end of lines to be the same depending on this detection 1366 * here */ 1367 p = (const unsigned char *) memchr(databuf, '\n', datalen); 1368 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r')) 1369 crlf = 1; 1370 1371 /* First, scan to see how many characters need to be encoded */ 1372 in = 0; 1373 while (in < datalen) { 1374 Py_ssize_t delta = 0; 1375 if ((databuf[in] > 126) || 1376 (databuf[in] == '=') || 1377 (header && databuf[in] == '_') || 1378 ((databuf[in] == '.') && (linelen == 0) && 1379 (in + 1 == datalen || databuf[in+1] == '\n' || 1380 databuf[in+1] == '\r' || databuf[in+1] == 0)) || 1381 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) || 1382 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) || 1383 ((databuf[in] < 33) && 1384 (databuf[in] != '\r') && (databuf[in] != '\n') && 1385 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' '))))) 1386 { 1387 if ((linelen + 3) >= MAXLINESIZE) { 1388 linelen = 0; 1389 if (crlf) 1390 delta += 3; 1391 else 1392 delta += 2; 1393 } 1394 linelen += 3; 1395 delta += 3; 1396 in++; 1397 } 1398 else { 1399 if (istext && 1400 ((databuf[in] == '\n') || 1401 ((in+1 < datalen) && (databuf[in] == '\r') && 1402 (databuf[in+1] == '\n')))) 1403 { 1404 linelen = 0; 1405 /* Protect against whitespace on end of line */ 1406 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t'))) 1407 delta += 2; 1408 if (crlf) 1409 delta += 2; 1410 else 1411 delta += 1; 1412 if (databuf[in] == '\r') 1413 in += 2; 1414 else 1415 in++; 1416 } 1417 else { 1418 if ((in + 1 != datalen) && 1419 (databuf[in+1] != '\n') && 1420 (linelen + 1) >= MAXLINESIZE) { 1421 linelen = 0; 1422 if (crlf) 1423 delta += 3; 1424 else 1425 delta += 2; 1426 } 1427 linelen++; 1428 delta++; 1429 in++; 1430 } 1431 } 1432 if (PY_SSIZE_T_MAX - delta < odatalen) { 1433 PyErr_NoMemory(); 1434 return NULL; 1435 } 1436 odatalen += delta; 1437 } 1438 1439 /* We allocate the output same size as input, this is overkill. 1440 * The previous implementation used calloc() so we'll zero out the 1441 * memory here too, since PyMem_Malloc() does not guarantee that. 1442 */ 1443 odata = (unsigned char *) PyMem_Malloc(odatalen); 1444 if (odata == NULL) { 1445 PyErr_NoMemory(); 1446 return NULL; 1447 } 1448 memset(odata, 0, odatalen); 1449 1450 in = out = linelen = 0; 1451 while (in < datalen) { 1452 if ((databuf[in] > 126) || 1453 (databuf[in] == '=') || 1454 (header && databuf[in] == '_') || 1455 ((databuf[in] == '.') && (linelen == 0) && 1456 (in + 1 == datalen || databuf[in+1] == '\n' || 1457 databuf[in+1] == '\r' || databuf[in+1] == 0)) || 1458 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) || 1459 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) || 1460 ((databuf[in] < 33) && 1461 (databuf[in] != '\r') && (databuf[in] != '\n') && 1462 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' '))))) 1463 { 1464 if ((linelen + 3 )>= MAXLINESIZE) { 1465 odata[out++] = '='; 1466 if (crlf) odata[out++] = '\r'; 1467 odata[out++] = '\n'; 1468 linelen = 0; 1469 } 1470 odata[out++] = '='; 1471 to_hex(databuf[in], &odata[out]); 1472 out += 2; 1473 in++; 1474 linelen += 3; 1475 } 1476 else { 1477 if (istext && 1478 ((databuf[in] == '\n') || 1479 ((in+1 < datalen) && (databuf[in] == '\r') && 1480 (databuf[in+1] == '\n')))) 1481 { 1482 linelen = 0; 1483 /* Protect against whitespace on end of line */ 1484 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) { 1485 ch = odata[out-1]; 1486 odata[out-1] = '='; 1487 to_hex(ch, &odata[out]); 1488 out += 2; 1489 } 1490 1491 if (crlf) odata[out++] = '\r'; 1492 odata[out++] = '\n'; 1493 if (databuf[in] == '\r') 1494 in += 2; 1495 else 1496 in++; 1497 } 1498 else { 1499 if ((in + 1 != datalen) && 1500 (databuf[in+1] != '\n') && 1501 (linelen + 1) >= MAXLINESIZE) { 1502 odata[out++] = '='; 1503 if (crlf) odata[out++] = '\r'; 1504 odata[out++] = '\n'; 1505 linelen = 0; 1506 } 1507 linelen++; 1508 if (header && databuf[in] == ' ') { 1509 odata[out++] = '_'; 1510 in++; 1511 } 1512 else { 1513 odata[out++] = databuf[in++]; 1514 } 1515 } 1516 } 1517 } 1518 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) { 1519 PyMem_Free(odata); 1520 return NULL; 1521 } 1522 PyMem_Free(odata); 1523 return rv; 1524 } 1525 1526 /* List of functions defined in the module */ 1527 1528 static struct PyMethodDef binascii_module_methods[] = { 1529 BINASCII_A2B_UU_METHODDEF 1530 BINASCII_B2A_UU_METHODDEF 1531 BINASCII_A2B_BASE64_METHODDEF 1532 BINASCII_B2A_BASE64_METHODDEF 1533 BINASCII_A2B_HQX_METHODDEF 1534 BINASCII_B2A_HQX_METHODDEF 1535 BINASCII_A2B_HEX_METHODDEF 1536 BINASCII_B2A_HEX_METHODDEF 1537 BINASCII_HEXLIFY_METHODDEF 1538 BINASCII_UNHEXLIFY_METHODDEF 1539 BINASCII_RLECODE_HQX_METHODDEF 1540 BINASCII_RLEDECODE_HQX_METHODDEF 1541 BINASCII_CRC_HQX_METHODDEF 1542 BINASCII_CRC32_METHODDEF 1543 BINASCII_A2B_QP_METHODDEF 1544 BINASCII_B2A_QP_METHODDEF 1545 {NULL, NULL} /* sentinel */ 1546 }; 1547 1548 1549 /* Initialization function for the module (*must* be called PyInit_binascii) */ 1550 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII"); 1551 1552 1553 static struct PyModuleDef binasciimodule = { 1554 PyModuleDef_HEAD_INIT, 1555 "binascii", 1556 doc_binascii, 1557 -1, 1558 binascii_module_methods, 1559 NULL, 1560 NULL, 1561 NULL, 1562 NULL 1563 }; 1564 1565 PyMODINIT_FUNC 1566 PyInit_binascii(void) 1567 { 1568 PyObject *m, *d; 1569 1570 /* Create the module and add the functions */ 1571 m = PyModule_Create(&binasciimodule); 1572 if (m == NULL) 1573 return NULL; 1574 1575 d = PyModule_GetDict(m); 1576 1577 Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL); 1578 PyDict_SetItemString(d, "Error", Error); 1579 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL); 1580 PyDict_SetItemString(d, "Incomplete", Incomplete); 1581 if (PyErr_Occurred()) { 1582 Py_DECREF(m); 1583 m = NULL; 1584 } 1585 return m; 1586 } 1587