1 /* bytes object implementation */ 2 3 #define PY_SSIZE_T_CLEAN 4 5 #include "Python.h" 6 7 #include "bytes_methods.h" 8 #include "pystrhex.h" 9 #include <stddef.h> 10 11 /*[clinic input] 12 class bytes "PyBytesObject *" "&PyBytes_Type" 13 [clinic start generated code]*/ 14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/ 15 16 #include "clinic/bytesobject.c.h" 17 18 #ifdef COUNT_ALLOCS 19 Py_ssize_t null_strings, one_strings; 20 #endif 21 22 static PyBytesObject *characters[UCHAR_MAX + 1]; 23 static PyBytesObject *nullstring; 24 25 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation 26 for a string of length n should request PyBytesObject_SIZE + n bytes. 27 28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves 29 3 bytes per string allocation on a typical system. 30 */ 31 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) 32 33 /* Forward declaration */ 34 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, 35 char *str); 36 37 /* 38 For PyBytes_FromString(), the parameter `str' points to a null-terminated 39 string containing exactly `size' bytes. 40 41 For PyBytes_FromStringAndSize(), the parameter `str' is 42 either NULL or else points to a string containing at least `size' bytes. 43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does 44 not have to be null-terminated. (Therefore it is safe to construct a 45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.) 46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1' 47 bytes (setting the last byte to the null terminating character) and you can 48 fill in the data yourself. If `str' is non-NULL then the resulting 49 PyBytes object must be treated as immutable and you must not fill in nor 50 alter the data yourself, since the strings may be shared. 51 52 The PyObject member `op->ob_size', which denotes the number of "extra 53 items" in a variable-size object, will contain the number of bytes 54 allocated for string data, not counting the null terminating character. 55 It is therefore equal to the `size' parameter (for 56 PyBytes_FromStringAndSize()) or the length of the string in the `str' 57 parameter (for PyBytes_FromString()). 58 */ 59 static PyObject * 60 _PyBytes_FromSize(Py_ssize_t size, int use_calloc) 61 { 62 PyBytesObject *op; 63 assert(size >= 0); 64 65 if (size == 0 && (op = nullstring) != NULL) { 66 #ifdef COUNT_ALLOCS 67 null_strings++; 68 #endif 69 Py_INCREF(op); 70 return (PyObject *)op; 71 } 72 73 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) { 74 PyErr_SetString(PyExc_OverflowError, 75 "byte string is too large"); 76 return NULL; 77 } 78 79 /* Inline PyObject_NewVar */ 80 if (use_calloc) 81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size); 82 else 83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size); 84 if (op == NULL) 85 return PyErr_NoMemory(); 86 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); 87 op->ob_shash = -1; 88 if (!use_calloc) 89 op->ob_sval[size] = '\0'; 90 /* empty byte string singleton */ 91 if (size == 0) { 92 nullstring = op; 93 Py_INCREF(op); 94 } 95 return (PyObject *) op; 96 } 97 98 PyObject * 99 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) 100 { 101 PyBytesObject *op; 102 if (size < 0) { 103 PyErr_SetString(PyExc_SystemError, 104 "Negative size passed to PyBytes_FromStringAndSize"); 105 return NULL; 106 } 107 if (size == 1 && str != NULL && 108 (op = characters[*str & UCHAR_MAX]) != NULL) 109 { 110 #ifdef COUNT_ALLOCS 111 one_strings++; 112 #endif 113 Py_INCREF(op); 114 return (PyObject *)op; 115 } 116 117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0); 118 if (op == NULL) 119 return NULL; 120 if (str == NULL) 121 return (PyObject *) op; 122 123 memcpy(op->ob_sval, str, size); 124 /* share short strings */ 125 if (size == 1) { 126 characters[*str & UCHAR_MAX] = op; 127 Py_INCREF(op); 128 } 129 return (PyObject *) op; 130 } 131 132 PyObject * 133 PyBytes_FromString(const char *str) 134 { 135 size_t size; 136 PyBytesObject *op; 137 138 assert(str != NULL); 139 size = strlen(str); 140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) { 141 PyErr_SetString(PyExc_OverflowError, 142 "byte string is too long"); 143 return NULL; 144 } 145 if (size == 0 && (op = nullstring) != NULL) { 146 #ifdef COUNT_ALLOCS 147 null_strings++; 148 #endif 149 Py_INCREF(op); 150 return (PyObject *)op; 151 } 152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { 153 #ifdef COUNT_ALLOCS 154 one_strings++; 155 #endif 156 Py_INCREF(op); 157 return (PyObject *)op; 158 } 159 160 /* Inline PyObject_NewVar */ 161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size); 162 if (op == NULL) 163 return PyErr_NoMemory(); 164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); 165 op->ob_shash = -1; 166 memcpy(op->ob_sval, str, size+1); 167 /* share short strings */ 168 if (size == 0) { 169 nullstring = op; 170 Py_INCREF(op); 171 } else if (size == 1) { 172 characters[*str & UCHAR_MAX] = op; 173 Py_INCREF(op); 174 } 175 return (PyObject *) op; 176 } 177 178 PyObject * 179 PyBytes_FromFormatV(const char *format, va_list vargs) 180 { 181 char *s; 182 const char *f; 183 const char *p; 184 Py_ssize_t prec; 185 int longflag; 186 int size_tflag; 187 /* Longest 64-bit formatted numbers: 188 - "18446744073709551615\0" (21 bytes) 189 - "-9223372036854775808\0" (21 bytes) 190 Decimal takes the most space (it isn't enough for octal.) 191 192 Longest 64-bit pointer representation: 193 "0xffffffffffffffff\0" (19 bytes). */ 194 char buffer[21]; 195 _PyBytesWriter writer; 196 197 _PyBytesWriter_Init(&writer); 198 199 s = _PyBytesWriter_Alloc(&writer, strlen(format)); 200 if (s == NULL) 201 return NULL; 202 writer.overallocate = 1; 203 204 #define WRITE_BYTES(str) \ 205 do { \ 206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \ 207 if (s == NULL) \ 208 goto error; \ 209 } while (0) 210 211 for (f = format; *f; f++) { 212 if (*f != '%') { 213 *s++ = *f; 214 continue; 215 } 216 217 p = f++; 218 219 /* ignore the width (ex: 10 in "%10s") */ 220 while (Py_ISDIGIT(*f)) 221 f++; 222 223 /* parse the precision (ex: 10 in "%.10s") */ 224 prec = 0; 225 if (*f == '.') { 226 f++; 227 for (; Py_ISDIGIT(*f); f++) { 228 prec = (prec * 10) + (*f - '0'); 229 } 230 } 231 232 while (*f && *f != '%' && !Py_ISALPHA(*f)) 233 f++; 234 235 /* handle the long flag ('l'), but only for %ld and %lu. 236 others can be added when necessary. */ 237 longflag = 0; 238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { 239 longflag = 1; 240 ++f; 241 } 242 243 /* handle the size_t flag ('z'). */ 244 size_tflag = 0; 245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { 246 size_tflag = 1; 247 ++f; 248 } 249 250 /* subtract bytes preallocated for the format string 251 (ex: 2 for "%s") */ 252 writer.min_size -= (f - p + 1); 253 254 switch (*f) { 255 case 'c': 256 { 257 int c = va_arg(vargs, int); 258 if (c < 0 || c > 255) { 259 PyErr_SetString(PyExc_OverflowError, 260 "PyBytes_FromFormatV(): %c format " 261 "expects an integer in range [0; 255]"); 262 goto error; 263 } 264 writer.min_size++; 265 *s++ = (unsigned char)c; 266 break; 267 } 268 269 case 'd': 270 if (longflag) 271 sprintf(buffer, "%ld", va_arg(vargs, long)); 272 else if (size_tflag) 273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d", 274 va_arg(vargs, Py_ssize_t)); 275 else 276 sprintf(buffer, "%d", va_arg(vargs, int)); 277 assert(strlen(buffer) < sizeof(buffer)); 278 WRITE_BYTES(buffer); 279 break; 280 281 case 'u': 282 if (longflag) 283 sprintf(buffer, "%lu", 284 va_arg(vargs, unsigned long)); 285 else if (size_tflag) 286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u", 287 va_arg(vargs, size_t)); 288 else 289 sprintf(buffer, "%u", 290 va_arg(vargs, unsigned int)); 291 assert(strlen(buffer) < sizeof(buffer)); 292 WRITE_BYTES(buffer); 293 break; 294 295 case 'i': 296 sprintf(buffer, "%i", va_arg(vargs, int)); 297 assert(strlen(buffer) < sizeof(buffer)); 298 WRITE_BYTES(buffer); 299 break; 300 301 case 'x': 302 sprintf(buffer, "%x", va_arg(vargs, int)); 303 assert(strlen(buffer) < sizeof(buffer)); 304 WRITE_BYTES(buffer); 305 break; 306 307 case 's': 308 { 309 Py_ssize_t i; 310 311 p = va_arg(vargs, const char*); 312 i = strlen(p); 313 if (prec > 0 && i > prec) 314 i = prec; 315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i); 316 if (s == NULL) 317 goto error; 318 break; 319 } 320 321 case 'p': 322 sprintf(buffer, "%p", va_arg(vargs, void*)); 323 assert(strlen(buffer) < sizeof(buffer)); 324 /* %p is ill-defined: ensure leading 0x. */ 325 if (buffer[1] == 'X') 326 buffer[1] = 'x'; 327 else if (buffer[1] != 'x') { 328 memmove(buffer+2, buffer, strlen(buffer)+1); 329 buffer[0] = '0'; 330 buffer[1] = 'x'; 331 } 332 WRITE_BYTES(buffer); 333 break; 334 335 case '%': 336 writer.min_size++; 337 *s++ = '%'; 338 break; 339 340 default: 341 if (*f == 0) { 342 /* fix min_size if we reached the end of the format string */ 343 writer.min_size++; 344 } 345 346 /* invalid format string: copy unformatted string and exit */ 347 WRITE_BYTES(p); 348 return _PyBytesWriter_Finish(&writer, s); 349 } 350 } 351 352 #undef WRITE_BYTES 353 354 return _PyBytesWriter_Finish(&writer, s); 355 356 error: 357 _PyBytesWriter_Dealloc(&writer); 358 return NULL; 359 } 360 361 PyObject * 362 PyBytes_FromFormat(const char *format, ...) 363 { 364 PyObject* ret; 365 va_list vargs; 366 367 #ifdef HAVE_STDARG_PROTOTYPES 368 va_start(vargs, format); 369 #else 370 va_start(vargs); 371 #endif 372 ret = PyBytes_FromFormatV(format, vargs); 373 va_end(vargs); 374 return ret; 375 } 376 377 /* Helpers for formatstring */ 378 379 Py_LOCAL_INLINE(PyObject *) 380 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) 381 { 382 Py_ssize_t argidx = *p_argidx; 383 if (argidx < arglen) { 384 (*p_argidx)++; 385 if (arglen < 0) 386 return args; 387 else 388 return PyTuple_GetItem(args, argidx); 389 } 390 PyErr_SetString(PyExc_TypeError, 391 "not enough arguments for format string"); 392 return NULL; 393 } 394 395 /* Format codes 396 * F_LJUST '-' 397 * F_SIGN '+' 398 * F_BLANK ' ' 399 * F_ALT '#' 400 * F_ZERO '0' 401 */ 402 #define F_LJUST (1<<0) 403 #define F_SIGN (1<<1) 404 #define F_BLANK (1<<2) 405 #define F_ALT (1<<3) 406 #define F_ZERO (1<<4) 407 408 /* Returns a new reference to a PyBytes object, or NULL on failure. */ 409 410 static char* 411 formatfloat(PyObject *v, int flags, int prec, int type, 412 PyObject **p_result, _PyBytesWriter *writer, char *str) 413 { 414 char *p; 415 PyObject *result; 416 double x; 417 size_t len; 418 419 x = PyFloat_AsDouble(v); 420 if (x == -1.0 && PyErr_Occurred()) { 421 PyErr_Format(PyExc_TypeError, "float argument required, " 422 "not %.200s", Py_TYPE(v)->tp_name); 423 return NULL; 424 } 425 426 if (prec < 0) 427 prec = 6; 428 429 p = PyOS_double_to_string(x, type, prec, 430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); 431 432 if (p == NULL) 433 return NULL; 434 435 len = strlen(p); 436 if (writer != NULL) { 437 str = _PyBytesWriter_Prepare(writer, str, len); 438 if (str == NULL) 439 return NULL; 440 memcpy(str, p, len); 441 PyMem_Free(p); 442 str += len; 443 return str; 444 } 445 446 result = PyBytes_FromStringAndSize(p, len); 447 PyMem_Free(p); 448 *p_result = result; 449 return str; 450 } 451 452 static PyObject * 453 formatlong(PyObject *v, int flags, int prec, int type) 454 { 455 PyObject *result, *iobj; 456 if (type == 'i') 457 type = 'd'; 458 if (PyLong_Check(v)) 459 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type); 460 if (PyNumber_Check(v)) { 461 /* make sure number is a type of integer for o, x, and X */ 462 if (type == 'o' || type == 'x' || type == 'X') 463 iobj = PyNumber_Index(v); 464 else 465 iobj = PyNumber_Long(v); 466 if (iobj == NULL) { 467 if (!PyErr_ExceptionMatches(PyExc_TypeError)) 468 return NULL; 469 } 470 else if (!PyLong_Check(iobj)) 471 Py_CLEAR(iobj); 472 if (iobj != NULL) { 473 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type); 474 Py_DECREF(iobj); 475 return result; 476 } 477 } 478 PyErr_Format(PyExc_TypeError, 479 "%%%c format: %s is required, not %.200s", type, 480 (type == 'o' || type == 'x' || type == 'X') ? "an integer" 481 : "a number", 482 Py_TYPE(v)->tp_name); 483 return NULL; 484 } 485 486 static int 487 byte_converter(PyObject *arg, char *p) 488 { 489 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) { 490 *p = PyBytes_AS_STRING(arg)[0]; 491 return 1; 492 } 493 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) { 494 *p = PyByteArray_AS_STRING(arg)[0]; 495 return 1; 496 } 497 else { 498 PyObject *iobj; 499 long ival; 500 int overflow; 501 /* make sure number is a type of integer */ 502 if (PyLong_Check(arg)) { 503 ival = PyLong_AsLongAndOverflow(arg, &overflow); 504 } 505 else { 506 iobj = PyNumber_Index(arg); 507 if (iobj == NULL) { 508 if (!PyErr_ExceptionMatches(PyExc_TypeError)) 509 return 0; 510 goto onError; 511 } 512 ival = PyLong_AsLongAndOverflow(iobj, &overflow); 513 Py_DECREF(iobj); 514 } 515 if (!overflow && ival == -1 && PyErr_Occurred()) 516 goto onError; 517 if (overflow || !(0 <= ival && ival <= 255)) { 518 PyErr_SetString(PyExc_OverflowError, 519 "%c arg not in range(256)"); 520 return 0; 521 } 522 *p = (char)ival; 523 return 1; 524 } 525 onError: 526 PyErr_SetString(PyExc_TypeError, 527 "%c requires an integer in range(256) or a single byte"); 528 return 0; 529 } 530 531 static PyObject * 532 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen) 533 { 534 PyObject *func, *result; 535 _Py_IDENTIFIER(__bytes__); 536 /* is it a bytes object? */ 537 if (PyBytes_Check(v)) { 538 *pbuf = PyBytes_AS_STRING(v); 539 *plen = PyBytes_GET_SIZE(v); 540 Py_INCREF(v); 541 return v; 542 } 543 if (PyByteArray_Check(v)) { 544 *pbuf = PyByteArray_AS_STRING(v); 545 *plen = PyByteArray_GET_SIZE(v); 546 Py_INCREF(v); 547 return v; 548 } 549 /* does it support __bytes__? */ 550 func = _PyObject_LookupSpecial(v, &PyId___bytes__); 551 if (func != NULL) { 552 result = PyObject_CallFunctionObjArgs(func, NULL); 553 Py_DECREF(func); 554 if (result == NULL) 555 return NULL; 556 if (!PyBytes_Check(result)) { 557 PyErr_Format(PyExc_TypeError, 558 "__bytes__ returned non-bytes (type %.200s)", 559 Py_TYPE(result)->tp_name); 560 Py_DECREF(result); 561 return NULL; 562 } 563 *pbuf = PyBytes_AS_STRING(result); 564 *plen = PyBytes_GET_SIZE(result); 565 return result; 566 } 567 PyErr_Format(PyExc_TypeError, 568 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'", 569 Py_TYPE(v)->tp_name); 570 return NULL; 571 } 572 573 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */ 574 575 PyObject * 576 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, 577 PyObject *args, int use_bytearray) 578 { 579 const char *fmt; 580 char *res; 581 Py_ssize_t arglen, argidx; 582 Py_ssize_t fmtcnt; 583 int args_owned = 0; 584 PyObject *dict = NULL; 585 _PyBytesWriter writer; 586 587 if (args == NULL) { 588 PyErr_BadInternalCall(); 589 return NULL; 590 } 591 fmt = format; 592 fmtcnt = format_len; 593 594 _PyBytesWriter_Init(&writer); 595 writer.use_bytearray = use_bytearray; 596 597 res = _PyBytesWriter_Alloc(&writer, fmtcnt); 598 if (res == NULL) 599 return NULL; 600 if (!use_bytearray) 601 writer.overallocate = 1; 602 603 if (PyTuple_Check(args)) { 604 arglen = PyTuple_GET_SIZE(args); 605 argidx = 0; 606 } 607 else { 608 arglen = -1; 609 argidx = -2; 610 } 611 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && 612 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) && 613 !PyByteArray_Check(args)) { 614 dict = args; 615 } 616 617 while (--fmtcnt >= 0) { 618 if (*fmt != '%') { 619 Py_ssize_t len; 620 char *pos; 621 622 pos = strchr(fmt + 1, '%'); 623 if (pos != NULL) 624 len = pos - fmt; 625 else 626 len = format_len - (fmt - format); 627 assert(len != 0); 628 629 memcpy(res, fmt, len); 630 res += len; 631 fmt += len; 632 fmtcnt -= (len - 1); 633 } 634 else { 635 /* Got a format specifier */ 636 int flags = 0; 637 Py_ssize_t width = -1; 638 int prec = -1; 639 int c = '\0'; 640 int fill; 641 PyObject *v = NULL; 642 PyObject *temp = NULL; 643 const char *pbuf = NULL; 644 int sign; 645 Py_ssize_t len = 0; 646 char onechar; /* For byte_converter() */ 647 Py_ssize_t alloc; 648 #ifdef Py_DEBUG 649 char *before; 650 #endif 651 652 fmt++; 653 if (*fmt == '(') { 654 const char *keystart; 655 Py_ssize_t keylen; 656 PyObject *key; 657 int pcount = 1; 658 659 if (dict == NULL) { 660 PyErr_SetString(PyExc_TypeError, 661 "format requires a mapping"); 662 goto error; 663 } 664 ++fmt; 665 --fmtcnt; 666 keystart = fmt; 667 /* Skip over balanced parentheses */ 668 while (pcount > 0 && --fmtcnt >= 0) { 669 if (*fmt == ')') 670 --pcount; 671 else if (*fmt == '(') 672 ++pcount; 673 fmt++; 674 } 675 keylen = fmt - keystart - 1; 676 if (fmtcnt < 0 || pcount > 0) { 677 PyErr_SetString(PyExc_ValueError, 678 "incomplete format key"); 679 goto error; 680 } 681 key = PyBytes_FromStringAndSize(keystart, 682 keylen); 683 if (key == NULL) 684 goto error; 685 if (args_owned) { 686 Py_DECREF(args); 687 args_owned = 0; 688 } 689 args = PyObject_GetItem(dict, key); 690 Py_DECREF(key); 691 if (args == NULL) { 692 goto error; 693 } 694 args_owned = 1; 695 arglen = -1; 696 argidx = -2; 697 } 698 699 /* Parse flags. Example: "%+i" => flags=F_SIGN. */ 700 while (--fmtcnt >= 0) { 701 switch (c = *fmt++) { 702 case '-': flags |= F_LJUST; continue; 703 case '+': flags |= F_SIGN; continue; 704 case ' ': flags |= F_BLANK; continue; 705 case '#': flags |= F_ALT; continue; 706 case '0': flags |= F_ZERO; continue; 707 } 708 break; 709 } 710 711 /* Parse width. Example: "%10s" => width=10 */ 712 if (c == '*') { 713 v = getnextarg(args, arglen, &argidx); 714 if (v == NULL) 715 goto error; 716 if (!PyLong_Check(v)) { 717 PyErr_SetString(PyExc_TypeError, 718 "* wants int"); 719 goto error; 720 } 721 width = PyLong_AsSsize_t(v); 722 if (width == -1 && PyErr_Occurred()) 723 goto error; 724 if (width < 0) { 725 flags |= F_LJUST; 726 width = -width; 727 } 728 if (--fmtcnt >= 0) 729 c = *fmt++; 730 } 731 else if (c >= 0 && isdigit(c)) { 732 width = c - '0'; 733 while (--fmtcnt >= 0) { 734 c = Py_CHARMASK(*fmt++); 735 if (!isdigit(c)) 736 break; 737 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { 738 PyErr_SetString( 739 PyExc_ValueError, 740 "width too big"); 741 goto error; 742 } 743 width = width*10 + (c - '0'); 744 } 745 } 746 747 /* Parse precision. Example: "%.3f" => prec=3 */ 748 if (c == '.') { 749 prec = 0; 750 if (--fmtcnt >= 0) 751 c = *fmt++; 752 if (c == '*') { 753 v = getnextarg(args, arglen, &argidx); 754 if (v == NULL) 755 goto error; 756 if (!PyLong_Check(v)) { 757 PyErr_SetString( 758 PyExc_TypeError, 759 "* wants int"); 760 goto error; 761 } 762 prec = _PyLong_AsInt(v); 763 if (prec == -1 && PyErr_Occurred()) 764 goto error; 765 if (prec < 0) 766 prec = 0; 767 if (--fmtcnt >= 0) 768 c = *fmt++; 769 } 770 else if (c >= 0 && isdigit(c)) { 771 prec = c - '0'; 772 while (--fmtcnt >= 0) { 773 c = Py_CHARMASK(*fmt++); 774 if (!isdigit(c)) 775 break; 776 if (prec > (INT_MAX - ((int)c - '0')) / 10) { 777 PyErr_SetString( 778 PyExc_ValueError, 779 "prec too big"); 780 goto error; 781 } 782 prec = prec*10 + (c - '0'); 783 } 784 } 785 } /* prec */ 786 if (fmtcnt >= 0) { 787 if (c == 'h' || c == 'l' || c == 'L') { 788 if (--fmtcnt >= 0) 789 c = *fmt++; 790 } 791 } 792 if (fmtcnt < 0) { 793 PyErr_SetString(PyExc_ValueError, 794 "incomplete format"); 795 goto error; 796 } 797 if (c != '%') { 798 v = getnextarg(args, arglen, &argidx); 799 if (v == NULL) 800 goto error; 801 } 802 803 if (fmtcnt < 0) { 804 /* last writer: disable writer overallocation */ 805 writer.overallocate = 0; 806 } 807 808 sign = 0; 809 fill = ' '; 810 switch (c) { 811 case '%': 812 *res++ = '%'; 813 continue; 814 815 case 'r': 816 // %r is only for 2/3 code; 3 only code should use %a 817 case 'a': 818 temp = PyObject_ASCII(v); 819 if (temp == NULL) 820 goto error; 821 assert(PyUnicode_IS_ASCII(temp)); 822 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp); 823 len = PyUnicode_GET_LENGTH(temp); 824 if (prec >= 0 && len > prec) 825 len = prec; 826 break; 827 828 case 's': 829 // %s is only for 2/3 code; 3 only code should use %b 830 case 'b': 831 temp = format_obj(v, &pbuf, &len); 832 if (temp == NULL) 833 goto error; 834 if (prec >= 0 && len > prec) 835 len = prec; 836 break; 837 838 case 'i': 839 case 'd': 840 case 'u': 841 case 'o': 842 case 'x': 843 case 'X': 844 if (PyLong_CheckExact(v) 845 && width == -1 && prec == -1 846 && !(flags & (F_SIGN | F_BLANK)) 847 && c != 'X') 848 { 849 /* Fast path */ 850 int alternate = flags & F_ALT; 851 int base; 852 853 switch(c) 854 { 855 default: 856 assert(0 && "'type' not in [diuoxX]"); 857 case 'd': 858 case 'i': 859 case 'u': 860 base = 10; 861 break; 862 case 'o': 863 base = 8; 864 break; 865 case 'x': 866 case 'X': 867 base = 16; 868 break; 869 } 870 871 /* Fast path */ 872 writer.min_size -= 2; /* size preallocated for "%d" */ 873 res = _PyLong_FormatBytesWriter(&writer, res, 874 v, base, alternate); 875 if (res == NULL) 876 goto error; 877 continue; 878 } 879 880 temp = formatlong(v, flags, prec, c); 881 if (!temp) 882 goto error; 883 assert(PyUnicode_IS_ASCII(temp)); 884 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp); 885 len = PyUnicode_GET_LENGTH(temp); 886 sign = 1; 887 if (flags & F_ZERO) 888 fill = '0'; 889 break; 890 891 case 'e': 892 case 'E': 893 case 'f': 894 case 'F': 895 case 'g': 896 case 'G': 897 if (width == -1 && prec == -1 898 && !(flags & (F_SIGN | F_BLANK))) 899 { 900 /* Fast path */ 901 writer.min_size -= 2; /* size preallocated for "%f" */ 902 res = formatfloat(v, flags, prec, c, NULL, &writer, res); 903 if (res == NULL) 904 goto error; 905 continue; 906 } 907 908 if (!formatfloat(v, flags, prec, c, &temp, NULL, res)) 909 goto error; 910 pbuf = PyBytes_AS_STRING(temp); 911 len = PyBytes_GET_SIZE(temp); 912 sign = 1; 913 if (flags & F_ZERO) 914 fill = '0'; 915 break; 916 917 case 'c': 918 pbuf = &onechar; 919 len = byte_converter(v, &onechar); 920 if (!len) 921 goto error; 922 if (width == -1) { 923 /* Fast path */ 924 *res++ = onechar; 925 continue; 926 } 927 break; 928 929 default: 930 PyErr_Format(PyExc_ValueError, 931 "unsupported format character '%c' (0x%x) " 932 "at index %zd", 933 c, c, 934 (Py_ssize_t)(fmt - 1 - format)); 935 goto error; 936 } 937 938 if (sign) { 939 if (*pbuf == '-' || *pbuf == '+') { 940 sign = *pbuf++; 941 len--; 942 } 943 else if (flags & F_SIGN) 944 sign = '+'; 945 else if (flags & F_BLANK) 946 sign = ' '; 947 else 948 sign = 0; 949 } 950 if (width < len) 951 width = len; 952 953 alloc = width; 954 if (sign != 0 && len == width) 955 alloc++; 956 /* 2: size preallocated for %s */ 957 if (alloc > 2) { 958 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2); 959 if (res == NULL) 960 goto error; 961 } 962 #ifdef Py_DEBUG 963 before = res; 964 #endif 965 966 /* Write the sign if needed */ 967 if (sign) { 968 if (fill != ' ') 969 *res++ = sign; 970 if (width > len) 971 width--; 972 } 973 974 /* Write the numeric prefix for "x", "X" and "o" formats 975 if the alternate form is used. 976 For example, write "0x" for the "%#x" format. */ 977 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) { 978 assert(pbuf[0] == '0'); 979 assert(pbuf[1] == c); 980 if (fill != ' ') { 981 *res++ = *pbuf++; 982 *res++ = *pbuf++; 983 } 984 width -= 2; 985 if (width < 0) 986 width = 0; 987 len -= 2; 988 } 989 990 /* Pad left with the fill character if needed */ 991 if (width > len && !(flags & F_LJUST)) { 992 memset(res, fill, width - len); 993 res += (width - len); 994 width = len; 995 } 996 997 /* If padding with spaces: write sign if needed and/or numeric 998 prefix if the alternate form is used */ 999 if (fill == ' ') { 1000 if (sign) 1001 *res++ = sign; 1002 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) { 1003 assert(pbuf[0] == '0'); 1004 assert(pbuf[1] == c); 1005 *res++ = *pbuf++; 1006 *res++ = *pbuf++; 1007 } 1008 } 1009 1010 /* Copy bytes */ 1011 memcpy(res, pbuf, len); 1012 res += len; 1013 1014 /* Pad right with the fill character if needed */ 1015 if (width > len) { 1016 memset(res, ' ', width - len); 1017 res += (width - len); 1018 } 1019 1020 if (dict && (argidx < arglen) && c != '%') { 1021 PyErr_SetString(PyExc_TypeError, 1022 "not all arguments converted during bytes formatting"); 1023 Py_XDECREF(temp); 1024 goto error; 1025 } 1026 Py_XDECREF(temp); 1027 1028 #ifdef Py_DEBUG 1029 /* check that we computed the exact size for this write */ 1030 assert((res - before) == alloc); 1031 #endif 1032 } /* '%' */ 1033 1034 /* If overallocation was disabled, ensure that it was the last 1035 write. Otherwise, we missed an optimization */ 1036 assert(writer.overallocate || fmtcnt < 0 || use_bytearray); 1037 } /* until end */ 1038 1039 if (argidx < arglen && !dict) { 1040 PyErr_SetString(PyExc_TypeError, 1041 "not all arguments converted during bytes formatting"); 1042 goto error; 1043 } 1044 1045 if (args_owned) { 1046 Py_DECREF(args); 1047 } 1048 return _PyBytesWriter_Finish(&writer, res); 1049 1050 error: 1051 _PyBytesWriter_Dealloc(&writer); 1052 if (args_owned) { 1053 Py_DECREF(args); 1054 } 1055 return NULL; 1056 } 1057 1058 /* =-= */ 1059 1060 static void 1061 bytes_dealloc(PyObject *op) 1062 { 1063 Py_TYPE(op)->tp_free(op); 1064 } 1065 1066 /* Unescape a backslash-escaped string. If unicode is non-zero, 1067 the string is a u-literal. If recode_encoding is non-zero, 1068 the string is UTF-8 encoded and should be re-encoded in the 1069 specified encoding. */ 1070 1071 static char * 1072 _PyBytes_DecodeEscapeRecode(const char **s, const char *end, 1073 const char *errors, const char *recode_encoding, 1074 _PyBytesWriter *writer, char *p) 1075 { 1076 PyObject *u, *w; 1077 const char* t; 1078 1079 t = *s; 1080 /* Decode non-ASCII bytes as UTF-8. */ 1081 while (t < end && (*t & 0x80)) 1082 t++; 1083 u = PyUnicode_DecodeUTF8(*s, t - *s, errors); 1084 if (u == NULL) 1085 return NULL; 1086 1087 /* Recode them in target encoding. */ 1088 w = PyUnicode_AsEncodedString(u, recode_encoding, errors); 1089 Py_DECREF(u); 1090 if (w == NULL) 1091 return NULL; 1092 assert(PyBytes_Check(w)); 1093 1094 /* Append bytes to output buffer. */ 1095 writer->min_size--; /* subtract 1 preallocated byte */ 1096 p = _PyBytesWriter_WriteBytes(writer, p, 1097 PyBytes_AS_STRING(w), 1098 PyBytes_GET_SIZE(w)); 1099 Py_DECREF(w); 1100 if (p == NULL) 1101 return NULL; 1102 1103 *s = t; 1104 return p; 1105 } 1106 1107 PyObject *_PyBytes_DecodeEscape(const char *s, 1108 Py_ssize_t len, 1109 const char *errors, 1110 Py_ssize_t unicode, 1111 const char *recode_encoding, 1112 const char **first_invalid_escape) 1113 { 1114 int c; 1115 char *p; 1116 const char *end; 1117 _PyBytesWriter writer; 1118 1119 _PyBytesWriter_Init(&writer); 1120 1121 p = _PyBytesWriter_Alloc(&writer, len); 1122 if (p == NULL) 1123 return NULL; 1124 writer.overallocate = 1; 1125 1126 *first_invalid_escape = NULL; 1127 1128 end = s + len; 1129 while (s < end) { 1130 if (*s != '\\') { 1131 non_esc: 1132 if (!(recode_encoding && (*s & 0x80))) { 1133 *p++ = *s++; 1134 } 1135 else { 1136 /* non-ASCII character and need to recode */ 1137 p = _PyBytes_DecodeEscapeRecode(&s, end, 1138 errors, recode_encoding, 1139 &writer, p); 1140 if (p == NULL) 1141 goto failed; 1142 } 1143 continue; 1144 } 1145 1146 s++; 1147 if (s == end) { 1148 PyErr_SetString(PyExc_ValueError, 1149 "Trailing \\ in string"); 1150 goto failed; 1151 } 1152 1153 switch (*s++) { 1154 /* XXX This assumes ASCII! */ 1155 case '\n': break; 1156 case '\\': *p++ = '\\'; break; 1157 case '\'': *p++ = '\''; break; 1158 case '\"': *p++ = '\"'; break; 1159 case 'b': *p++ = '\b'; break; 1160 case 'f': *p++ = '\014'; break; /* FF */ 1161 case 't': *p++ = '\t'; break; 1162 case 'n': *p++ = '\n'; break; 1163 case 'r': *p++ = '\r'; break; 1164 case 'v': *p++ = '\013'; break; /* VT */ 1165 case 'a': *p++ = '\007'; break; /* BEL, not classic C */ 1166 case '0': case '1': case '2': case '3': 1167 case '4': case '5': case '6': case '7': 1168 c = s[-1] - '0'; 1169 if (s < end && '0' <= *s && *s <= '7') { 1170 c = (c<<3) + *s++ - '0'; 1171 if (s < end && '0' <= *s && *s <= '7') 1172 c = (c<<3) + *s++ - '0'; 1173 } 1174 *p++ = c; 1175 break; 1176 case 'x': 1177 if (s+1 < end) { 1178 int digit1, digit2; 1179 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])]; 1180 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])]; 1181 if (digit1 < 16 && digit2 < 16) { 1182 *p++ = (unsigned char)((digit1 << 4) + digit2); 1183 s += 2; 1184 break; 1185 } 1186 } 1187 /* invalid hexadecimal digits */ 1188 1189 if (!errors || strcmp(errors, "strict") == 0) { 1190 PyErr_Format(PyExc_ValueError, 1191 "invalid \\x escape at position %d", 1192 s - 2 - (end - len)); 1193 goto failed; 1194 } 1195 if (strcmp(errors, "replace") == 0) { 1196 *p++ = '?'; 1197 } else if (strcmp(errors, "ignore") == 0) 1198 /* do nothing */; 1199 else { 1200 PyErr_Format(PyExc_ValueError, 1201 "decoding error; unknown " 1202 "error handling code: %.400s", 1203 errors); 1204 goto failed; 1205 } 1206 /* skip \x */ 1207 if (s < end && Py_ISXDIGIT(s[0])) 1208 s++; /* and a hexdigit */ 1209 break; 1210 1211 default: 1212 if (*first_invalid_escape == NULL) { 1213 *first_invalid_escape = s-1; /* Back up one char, since we've 1214 already incremented s. */ 1215 } 1216 *p++ = '\\'; 1217 s--; 1218 goto non_esc; /* an arbitrary number of unescaped 1219 UTF-8 bytes may follow. */ 1220 } 1221 } 1222 1223 return _PyBytesWriter_Finish(&writer, p); 1224 1225 failed: 1226 _PyBytesWriter_Dealloc(&writer); 1227 return NULL; 1228 } 1229 1230 PyObject *PyBytes_DecodeEscape(const char *s, 1231 Py_ssize_t len, 1232 const char *errors, 1233 Py_ssize_t unicode, 1234 const char *recode_encoding) 1235 { 1236 const char* first_invalid_escape; 1237 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode, 1238 recode_encoding, 1239 &first_invalid_escape); 1240 if (result == NULL) 1241 return NULL; 1242 if (first_invalid_escape != NULL) { 1243 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, 1244 "invalid escape sequence '\\%c'", 1245 *first_invalid_escape) < 0) { 1246 Py_DECREF(result); 1247 return NULL; 1248 } 1249 } 1250 return result; 1251 1252 } 1253 /* -------------------------------------------------------------------- */ 1254 /* object api */ 1255 1256 Py_ssize_t 1257 PyBytes_Size(PyObject *op) 1258 { 1259 if (!PyBytes_Check(op)) { 1260 PyErr_Format(PyExc_TypeError, 1261 "expected bytes, %.200s found", Py_TYPE(op)->tp_name); 1262 return -1; 1263 } 1264 return Py_SIZE(op); 1265 } 1266 1267 char * 1268 PyBytes_AsString(PyObject *op) 1269 { 1270 if (!PyBytes_Check(op)) { 1271 PyErr_Format(PyExc_TypeError, 1272 "expected bytes, %.200s found", Py_TYPE(op)->tp_name); 1273 return NULL; 1274 } 1275 return ((PyBytesObject *)op)->ob_sval; 1276 } 1277 1278 int 1279 PyBytes_AsStringAndSize(PyObject *obj, 1280 char **s, 1281 Py_ssize_t *len) 1282 { 1283 if (s == NULL) { 1284 PyErr_BadInternalCall(); 1285 return -1; 1286 } 1287 1288 if (!PyBytes_Check(obj)) { 1289 PyErr_Format(PyExc_TypeError, 1290 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name); 1291 return -1; 1292 } 1293 1294 *s = PyBytes_AS_STRING(obj); 1295 if (len != NULL) 1296 *len = PyBytes_GET_SIZE(obj); 1297 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) { 1298 PyErr_SetString(PyExc_ValueError, 1299 "embedded null byte"); 1300 return -1; 1301 } 1302 return 0; 1303 } 1304 1305 /* -------------------------------------------------------------------- */ 1306 /* Methods */ 1307 1308 #include "stringlib/stringdefs.h" 1309 1310 #include "stringlib/fastsearch.h" 1311 #include "stringlib/count.h" 1312 #include "stringlib/find.h" 1313 #include "stringlib/join.h" 1314 #include "stringlib/partition.h" 1315 #include "stringlib/split.h" 1316 #include "stringlib/ctype.h" 1317 1318 #include "stringlib/transmogrify.h" 1319 1320 PyObject * 1321 PyBytes_Repr(PyObject *obj, int smartquotes) 1322 { 1323 PyBytesObject* op = (PyBytesObject*) obj; 1324 Py_ssize_t i, length = Py_SIZE(op); 1325 Py_ssize_t newsize, squotes, dquotes; 1326 PyObject *v; 1327 unsigned char quote, *s, *p; 1328 1329 /* Compute size of output string */ 1330 squotes = dquotes = 0; 1331 newsize = 3; /* b'' */ 1332 s = (unsigned char*)op->ob_sval; 1333 for (i = 0; i < length; i++) { 1334 Py_ssize_t incr = 1; 1335 switch(s[i]) { 1336 case '\'': squotes++; break; 1337 case '"': dquotes++; break; 1338 case '\\': case '\t': case '\n': case '\r': 1339 incr = 2; break; /* \C */ 1340 default: 1341 if (s[i] < ' ' || s[i] >= 0x7f) 1342 incr = 4; /* \xHH */ 1343 } 1344 if (newsize > PY_SSIZE_T_MAX - incr) 1345 goto overflow; 1346 newsize += incr; 1347 } 1348 quote = '\''; 1349 if (smartquotes && squotes && !dquotes) 1350 quote = '"'; 1351 if (squotes && quote == '\'') { 1352 if (newsize > PY_SSIZE_T_MAX - squotes) 1353 goto overflow; 1354 newsize += squotes; 1355 } 1356 1357 v = PyUnicode_New(newsize, 127); 1358 if (v == NULL) { 1359 return NULL; 1360 } 1361 p = PyUnicode_1BYTE_DATA(v); 1362 1363 *p++ = 'b', *p++ = quote; 1364 for (i = 0; i < length; i++) { 1365 unsigned char c = op->ob_sval[i]; 1366 if (c == quote || c == '\\') 1367 *p++ = '\\', *p++ = c; 1368 else if (c == '\t') 1369 *p++ = '\\', *p++ = 't'; 1370 else if (c == '\n') 1371 *p++ = '\\', *p++ = 'n'; 1372 else if (c == '\r') 1373 *p++ = '\\', *p++ = 'r'; 1374 else if (c < ' ' || c >= 0x7f) { 1375 *p++ = '\\'; 1376 *p++ = 'x'; 1377 *p++ = Py_hexdigits[(c & 0xf0) >> 4]; 1378 *p++ = Py_hexdigits[c & 0xf]; 1379 } 1380 else 1381 *p++ = c; 1382 } 1383 *p++ = quote; 1384 assert(_PyUnicode_CheckConsistency(v, 1)); 1385 return v; 1386 1387 overflow: 1388 PyErr_SetString(PyExc_OverflowError, 1389 "bytes object is too large to make repr"); 1390 return NULL; 1391 } 1392 1393 static PyObject * 1394 bytes_repr(PyObject *op) 1395 { 1396 return PyBytes_Repr(op, 1); 1397 } 1398 1399 static PyObject * 1400 bytes_str(PyObject *op) 1401 { 1402 if (Py_BytesWarningFlag) { 1403 if (PyErr_WarnEx(PyExc_BytesWarning, 1404 "str() on a bytes instance", 1)) 1405 return NULL; 1406 } 1407 return bytes_repr(op); 1408 } 1409 1410 static Py_ssize_t 1411 bytes_length(PyBytesObject *a) 1412 { 1413 return Py_SIZE(a); 1414 } 1415 1416 /* This is also used by PyBytes_Concat() */ 1417 static PyObject * 1418 bytes_concat(PyObject *a, PyObject *b) 1419 { 1420 Py_buffer va, vb; 1421 PyObject *result = NULL; 1422 1423 va.len = -1; 1424 vb.len = -1; 1425 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 || 1426 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) { 1427 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", 1428 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name); 1429 goto done; 1430 } 1431 1432 /* Optimize end cases */ 1433 if (va.len == 0 && PyBytes_CheckExact(b)) { 1434 result = b; 1435 Py_INCREF(result); 1436 goto done; 1437 } 1438 if (vb.len == 0 && PyBytes_CheckExact(a)) { 1439 result = a; 1440 Py_INCREF(result); 1441 goto done; 1442 } 1443 1444 if (va.len > PY_SSIZE_T_MAX - vb.len) { 1445 PyErr_NoMemory(); 1446 goto done; 1447 } 1448 1449 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len); 1450 if (result != NULL) { 1451 memcpy(PyBytes_AS_STRING(result), va.buf, va.len); 1452 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len); 1453 } 1454 1455 done: 1456 if (va.len != -1) 1457 PyBuffer_Release(&va); 1458 if (vb.len != -1) 1459 PyBuffer_Release(&vb); 1460 return result; 1461 } 1462 1463 static PyObject * 1464 bytes_repeat(PyBytesObject *a, Py_ssize_t n) 1465 { 1466 Py_ssize_t i; 1467 Py_ssize_t j; 1468 Py_ssize_t size; 1469 PyBytesObject *op; 1470 size_t nbytes; 1471 if (n < 0) 1472 n = 0; 1473 /* watch out for overflows: the size can overflow int, 1474 * and the # of bytes needed can overflow size_t 1475 */ 1476 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) { 1477 PyErr_SetString(PyExc_OverflowError, 1478 "repeated bytes are too long"); 1479 return NULL; 1480 } 1481 size = Py_SIZE(a) * n; 1482 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) { 1483 Py_INCREF(a); 1484 return (PyObject *)a; 1485 } 1486 nbytes = (size_t)size; 1487 if (nbytes + PyBytesObject_SIZE <= nbytes) { 1488 PyErr_SetString(PyExc_OverflowError, 1489 "repeated bytes are too long"); 1490 return NULL; 1491 } 1492 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes); 1493 if (op == NULL) 1494 return PyErr_NoMemory(); 1495 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size); 1496 op->ob_shash = -1; 1497 op->ob_sval[size] = '\0'; 1498 if (Py_SIZE(a) == 1 && n > 0) { 1499 memset(op->ob_sval, a->ob_sval[0] , n); 1500 return (PyObject *) op; 1501 } 1502 i = 0; 1503 if (i < size) { 1504 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1505 i = Py_SIZE(a); 1506 } 1507 while (i < size) { 1508 j = (i <= size-i) ? i : size-i; 1509 memcpy(op->ob_sval+i, op->ob_sval, j); 1510 i += j; 1511 } 1512 return (PyObject *) op; 1513 } 1514 1515 static int 1516 bytes_contains(PyObject *self, PyObject *arg) 1517 { 1518 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg); 1519 } 1520 1521 static PyObject * 1522 bytes_item(PyBytesObject *a, Py_ssize_t i) 1523 { 1524 if (i < 0 || i >= Py_SIZE(a)) { 1525 PyErr_SetString(PyExc_IndexError, "index out of range"); 1526 return NULL; 1527 } 1528 return PyLong_FromLong((unsigned char)a->ob_sval[i]); 1529 } 1530 1531 static int 1532 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b) 1533 { 1534 int cmp; 1535 Py_ssize_t len; 1536 1537 len = Py_SIZE(a); 1538 if (Py_SIZE(b) != len) 1539 return 0; 1540 1541 if (a->ob_sval[0] != b->ob_sval[0]) 1542 return 0; 1543 1544 cmp = memcmp(a->ob_sval, b->ob_sval, len); 1545 return (cmp == 0); 1546 } 1547 1548 static PyObject* 1549 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) 1550 { 1551 int c; 1552 Py_ssize_t len_a, len_b; 1553 Py_ssize_t min_len; 1554 PyObject *result; 1555 int rc; 1556 1557 /* Make sure both arguments are strings. */ 1558 if (!(PyBytes_Check(a) && PyBytes_Check(b))) { 1559 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) { 1560 rc = PyObject_IsInstance((PyObject*)a, 1561 (PyObject*)&PyUnicode_Type); 1562 if (!rc) 1563 rc = PyObject_IsInstance((PyObject*)b, 1564 (PyObject*)&PyUnicode_Type); 1565 if (rc < 0) 1566 return NULL; 1567 if (rc) { 1568 if (PyErr_WarnEx(PyExc_BytesWarning, 1569 "Comparison between bytes and string", 1)) 1570 return NULL; 1571 } 1572 else { 1573 rc = PyObject_IsInstance((PyObject*)a, 1574 (PyObject*)&PyLong_Type); 1575 if (!rc) 1576 rc = PyObject_IsInstance((PyObject*)b, 1577 (PyObject*)&PyLong_Type); 1578 if (rc < 0) 1579 return NULL; 1580 if (rc) { 1581 if (PyErr_WarnEx(PyExc_BytesWarning, 1582 "Comparison between bytes and int", 1)) 1583 return NULL; 1584 } 1585 } 1586 } 1587 result = Py_NotImplemented; 1588 } 1589 else if (a == b) { 1590 switch (op) { 1591 case Py_EQ: 1592 case Py_LE: 1593 case Py_GE: 1594 /* a string is equal to itself */ 1595 result = Py_True; 1596 break; 1597 case Py_NE: 1598 case Py_LT: 1599 case Py_GT: 1600 result = Py_False; 1601 break; 1602 default: 1603 PyErr_BadArgument(); 1604 return NULL; 1605 } 1606 } 1607 else if (op == Py_EQ || op == Py_NE) { 1608 int eq = bytes_compare_eq(a, b); 1609 eq ^= (op == Py_NE); 1610 result = eq ? Py_True : Py_False; 1611 } 1612 else { 1613 len_a = Py_SIZE(a); 1614 len_b = Py_SIZE(b); 1615 min_len = Py_MIN(len_a, len_b); 1616 if (min_len > 0) { 1617 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); 1618 if (c == 0) 1619 c = memcmp(a->ob_sval, b->ob_sval, min_len); 1620 } 1621 else 1622 c = 0; 1623 if (c == 0) 1624 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; 1625 switch (op) { 1626 case Py_LT: c = c < 0; break; 1627 case Py_LE: c = c <= 0; break; 1628 case Py_GT: c = c > 0; break; 1629 case Py_GE: c = c >= 0; break; 1630 default: 1631 PyErr_BadArgument(); 1632 return NULL; 1633 } 1634 result = c ? Py_True : Py_False; 1635 } 1636 1637 Py_INCREF(result); 1638 return result; 1639 } 1640 1641 static Py_hash_t 1642 bytes_hash(PyBytesObject *a) 1643 { 1644 if (a->ob_shash == -1) { 1645 /* Can't fail */ 1646 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a)); 1647 } 1648 return a->ob_shash; 1649 } 1650 1651 static PyObject* 1652 bytes_subscript(PyBytesObject* self, PyObject* item) 1653 { 1654 if (PyIndex_Check(item)) { 1655 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1656 if (i == -1 && PyErr_Occurred()) 1657 return NULL; 1658 if (i < 0) 1659 i += PyBytes_GET_SIZE(self); 1660 if (i < 0 || i >= PyBytes_GET_SIZE(self)) { 1661 PyErr_SetString(PyExc_IndexError, 1662 "index out of range"); 1663 return NULL; 1664 } 1665 return PyLong_FromLong((unsigned char)self->ob_sval[i]); 1666 } 1667 else if (PySlice_Check(item)) { 1668 Py_ssize_t start, stop, step, slicelength, cur, i; 1669 char* source_buf; 1670 char* result_buf; 1671 PyObject* result; 1672 1673 if (PySlice_GetIndicesEx(item, 1674 PyBytes_GET_SIZE(self), 1675 &start, &stop, &step, &slicelength) < 0) { 1676 return NULL; 1677 } 1678 1679 if (slicelength <= 0) { 1680 return PyBytes_FromStringAndSize("", 0); 1681 } 1682 else if (start == 0 && step == 1 && 1683 slicelength == PyBytes_GET_SIZE(self) && 1684 PyBytes_CheckExact(self)) { 1685 Py_INCREF(self); 1686 return (PyObject *)self; 1687 } 1688 else if (step == 1) { 1689 return PyBytes_FromStringAndSize( 1690 PyBytes_AS_STRING(self) + start, 1691 slicelength); 1692 } 1693 else { 1694 source_buf = PyBytes_AS_STRING(self); 1695 result = PyBytes_FromStringAndSize(NULL, slicelength); 1696 if (result == NULL) 1697 return NULL; 1698 1699 result_buf = PyBytes_AS_STRING(result); 1700 for (cur = start, i = 0; i < slicelength; 1701 cur += step, i++) { 1702 result_buf[i] = source_buf[cur]; 1703 } 1704 1705 return result; 1706 } 1707 } 1708 else { 1709 PyErr_Format(PyExc_TypeError, 1710 "byte indices must be integers or slices, not %.200s", 1711 Py_TYPE(item)->tp_name); 1712 return NULL; 1713 } 1714 } 1715 1716 static int 1717 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags) 1718 { 1719 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self), 1720 1, flags); 1721 } 1722 1723 static PySequenceMethods bytes_as_sequence = { 1724 (lenfunc)bytes_length, /*sq_length*/ 1725 (binaryfunc)bytes_concat, /*sq_concat*/ 1726 (ssizeargfunc)bytes_repeat, /*sq_repeat*/ 1727 (ssizeargfunc)bytes_item, /*sq_item*/ 1728 0, /*sq_slice*/ 1729 0, /*sq_ass_item*/ 1730 0, /*sq_ass_slice*/ 1731 (objobjproc)bytes_contains /*sq_contains*/ 1732 }; 1733 1734 static PyMappingMethods bytes_as_mapping = { 1735 (lenfunc)bytes_length, 1736 (binaryfunc)bytes_subscript, 1737 0, 1738 }; 1739 1740 static PyBufferProcs bytes_as_buffer = { 1741 (getbufferproc)bytes_buffer_getbuffer, 1742 NULL, 1743 }; 1744 1745 1746 #define LEFTSTRIP 0 1747 #define RIGHTSTRIP 1 1748 #define BOTHSTRIP 2 1749 1750 /*[clinic input] 1751 bytes.split 1752 1753 sep: object = None 1754 The delimiter according which to split the bytes. 1755 None (the default value) means split on ASCII whitespace characters 1756 (space, tab, return, newline, formfeed, vertical tab). 1757 maxsplit: Py_ssize_t = -1 1758 Maximum number of splits to do. 1759 -1 (the default value) means no limit. 1760 1761 Return a list of the sections in the bytes, using sep as the delimiter. 1762 [clinic start generated code]*/ 1763 1764 static PyObject * 1765 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit) 1766 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/ 1767 { 1768 Py_ssize_t len = PyBytes_GET_SIZE(self), n; 1769 const char *s = PyBytes_AS_STRING(self), *sub; 1770 Py_buffer vsub; 1771 PyObject *list; 1772 1773 if (maxsplit < 0) 1774 maxsplit = PY_SSIZE_T_MAX; 1775 if (sep == Py_None) 1776 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); 1777 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0) 1778 return NULL; 1779 sub = vsub.buf; 1780 n = vsub.len; 1781 1782 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit); 1783 PyBuffer_Release(&vsub); 1784 return list; 1785 } 1786 1787 /*[clinic input] 1788 bytes.partition 1789 1790 sep: Py_buffer 1791 / 1792 1793 Partition the bytes into three parts using the given separator. 1794 1795 This will search for the separator sep in the bytes. If the separator is found, 1796 returns a 3-tuple containing the part before the separator, the separator 1797 itself, and the part after it. 1798 1799 If the separator is not found, returns a 3-tuple containing the original bytes 1800 object and two empty bytes objects. 1801 [clinic start generated code]*/ 1802 1803 static PyObject * 1804 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep) 1805 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/ 1806 { 1807 return stringlib_partition( 1808 (PyObject*) self, 1809 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), 1810 sep->obj, (const char *)sep->buf, sep->len 1811 ); 1812 } 1813 1814 /*[clinic input] 1815 bytes.rpartition 1816 1817 sep: Py_buffer 1818 / 1819 1820 Partition the bytes into three parts using the given separator. 1821 1822 This will search for the separator sep in the bytes, starting and the end. If 1823 the separator is found, returns a 3-tuple containing the part before the 1824 separator, the separator itself, and the part after it. 1825 1826 If the separator is not found, returns a 3-tuple containing two empty bytes 1827 objects and the original bytes object. 1828 [clinic start generated code]*/ 1829 1830 static PyObject * 1831 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep) 1832 /*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/ 1833 { 1834 return stringlib_rpartition( 1835 (PyObject*) self, 1836 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), 1837 sep->obj, (const char *)sep->buf, sep->len 1838 ); 1839 } 1840 1841 /*[clinic input] 1842 bytes.rsplit = bytes.split 1843 1844 Return a list of the sections in the bytes, using sep as the delimiter. 1845 1846 Splitting is done starting at the end of the bytes and working to the front. 1847 [clinic start generated code]*/ 1848 1849 static PyObject * 1850 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit) 1851 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/ 1852 { 1853 Py_ssize_t len = PyBytes_GET_SIZE(self), n; 1854 const char *s = PyBytes_AS_STRING(self), *sub; 1855 Py_buffer vsub; 1856 PyObject *list; 1857 1858 if (maxsplit < 0) 1859 maxsplit = PY_SSIZE_T_MAX; 1860 if (sep == Py_None) 1861 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); 1862 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0) 1863 return NULL; 1864 sub = vsub.buf; 1865 n = vsub.len; 1866 1867 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit); 1868 PyBuffer_Release(&vsub); 1869 return list; 1870 } 1871 1872 1873 /*[clinic input] 1874 bytes.join 1875 1876 iterable_of_bytes: object 1877 / 1878 1879 Concatenate any number of bytes objects. 1880 1881 The bytes whose method is called is inserted in between each pair. 1882 1883 The result is returned as a new bytes object. 1884 1885 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'. 1886 [clinic start generated code]*/ 1887 1888 static PyObject * 1889 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes) 1890 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/ 1891 { 1892 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes); 1893 } 1894 1895 PyObject * 1896 _PyBytes_Join(PyObject *sep, PyObject *x) 1897 { 1898 assert(sep != NULL && PyBytes_Check(sep)); 1899 assert(x != NULL); 1900 return bytes_join((PyBytesObject*)sep, x); 1901 } 1902 1903 static PyObject * 1904 bytes_find(PyBytesObject *self, PyObject *args) 1905 { 1906 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args); 1907 } 1908 1909 static PyObject * 1910 bytes_index(PyBytesObject *self, PyObject *args) 1911 { 1912 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args); 1913 } 1914 1915 1916 static PyObject * 1917 bytes_rfind(PyBytesObject *self, PyObject *args) 1918 { 1919 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args); 1920 } 1921 1922 1923 static PyObject * 1924 bytes_rindex(PyBytesObject *self, PyObject *args) 1925 { 1926 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args); 1927 } 1928 1929 1930 Py_LOCAL_INLINE(PyObject *) 1931 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj) 1932 { 1933 Py_buffer vsep; 1934 char *s = PyBytes_AS_STRING(self); 1935 Py_ssize_t len = PyBytes_GET_SIZE(self); 1936 char *sep; 1937 Py_ssize_t seplen; 1938 Py_ssize_t i, j; 1939 1940 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0) 1941 return NULL; 1942 sep = vsep.buf; 1943 seplen = vsep.len; 1944 1945 i = 0; 1946 if (striptype != RIGHTSTRIP) { 1947 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { 1948 i++; 1949 } 1950 } 1951 1952 j = len; 1953 if (striptype != LEFTSTRIP) { 1954 do { 1955 j--; 1956 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen)); 1957 j++; 1958 } 1959 1960 PyBuffer_Release(&vsep); 1961 1962 if (i == 0 && j == len && PyBytes_CheckExact(self)) { 1963 Py_INCREF(self); 1964 return (PyObject*)self; 1965 } 1966 else 1967 return PyBytes_FromStringAndSize(s+i, j-i); 1968 } 1969 1970 1971 Py_LOCAL_INLINE(PyObject *) 1972 do_strip(PyBytesObject *self, int striptype) 1973 { 1974 char *s = PyBytes_AS_STRING(self); 1975 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j; 1976 1977 i = 0; 1978 if (striptype != RIGHTSTRIP) { 1979 while (i < len && Py_ISSPACE(s[i])) { 1980 i++; 1981 } 1982 } 1983 1984 j = len; 1985 if (striptype != LEFTSTRIP) { 1986 do { 1987 j--; 1988 } while (j >= i && Py_ISSPACE(s[j])); 1989 j++; 1990 } 1991 1992 if (i == 0 && j == len && PyBytes_CheckExact(self)) { 1993 Py_INCREF(self); 1994 return (PyObject*)self; 1995 } 1996 else 1997 return PyBytes_FromStringAndSize(s+i, j-i); 1998 } 1999 2000 2001 Py_LOCAL_INLINE(PyObject *) 2002 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes) 2003 { 2004 if (bytes != NULL && bytes != Py_None) { 2005 return do_xstrip(self, striptype, bytes); 2006 } 2007 return do_strip(self, striptype); 2008 } 2009 2010 /*[clinic input] 2011 bytes.strip 2012 2013 bytes: object = None 2014 / 2015 2016 Strip leading and trailing bytes contained in the argument. 2017 2018 If the argument is omitted or None, strip leading and trailing ASCII whitespace. 2019 [clinic start generated code]*/ 2020 2021 static PyObject * 2022 bytes_strip_impl(PyBytesObject *self, PyObject *bytes) 2023 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/ 2024 { 2025 return do_argstrip(self, BOTHSTRIP, bytes); 2026 } 2027 2028 /*[clinic input] 2029 bytes.lstrip 2030 2031 bytes: object = None 2032 / 2033 2034 Strip leading bytes contained in the argument. 2035 2036 If the argument is omitted or None, strip leading ASCII whitespace. 2037 [clinic start generated code]*/ 2038 2039 static PyObject * 2040 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes) 2041 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/ 2042 { 2043 return do_argstrip(self, LEFTSTRIP, bytes); 2044 } 2045 2046 /*[clinic input] 2047 bytes.rstrip 2048 2049 bytes: object = None 2050 / 2051 2052 Strip trailing bytes contained in the argument. 2053 2054 If the argument is omitted or None, strip trailing ASCII whitespace. 2055 [clinic start generated code]*/ 2056 2057 static PyObject * 2058 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes) 2059 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/ 2060 { 2061 return do_argstrip(self, RIGHTSTRIP, bytes); 2062 } 2063 2064 2065 static PyObject * 2066 bytes_count(PyBytesObject *self, PyObject *args) 2067 { 2068 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args); 2069 } 2070 2071 2072 /*[clinic input] 2073 bytes.translate 2074 2075 table: object 2076 Translation table, which must be a bytes object of length 256. 2077 / 2078 delete as deletechars: object(c_default="NULL") = b'' 2079 2080 Return a copy with each character mapped by the given translation table. 2081 2082 All characters occurring in the optional argument delete are removed. 2083 The remaining characters are mapped through the given translation table. 2084 [clinic start generated code]*/ 2085 2086 static PyObject * 2087 bytes_translate_impl(PyBytesObject *self, PyObject *table, 2088 PyObject *deletechars) 2089 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/ 2090 { 2091 char *input, *output; 2092 Py_buffer table_view = {NULL, NULL}; 2093 Py_buffer del_table_view = {NULL, NULL}; 2094 const char *table_chars; 2095 Py_ssize_t i, c, changed = 0; 2096 PyObject *input_obj = (PyObject*)self; 2097 const char *output_start, *del_table_chars=NULL; 2098 Py_ssize_t inlen, tablen, dellen = 0; 2099 PyObject *result; 2100 int trans_table[256]; 2101 2102 if (PyBytes_Check(table)) { 2103 table_chars = PyBytes_AS_STRING(table); 2104 tablen = PyBytes_GET_SIZE(table); 2105 } 2106 else if (table == Py_None) { 2107 table_chars = NULL; 2108 tablen = 256; 2109 } 2110 else { 2111 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0) 2112 return NULL; 2113 table_chars = table_view.buf; 2114 tablen = table_view.len; 2115 } 2116 2117 if (tablen != 256) { 2118 PyErr_SetString(PyExc_ValueError, 2119 "translation table must be 256 characters long"); 2120 PyBuffer_Release(&table_view); 2121 return NULL; 2122 } 2123 2124 if (deletechars != NULL) { 2125 if (PyBytes_Check(deletechars)) { 2126 del_table_chars = PyBytes_AS_STRING(deletechars); 2127 dellen = PyBytes_GET_SIZE(deletechars); 2128 } 2129 else { 2130 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) { 2131 PyBuffer_Release(&table_view); 2132 return NULL; 2133 } 2134 del_table_chars = del_table_view.buf; 2135 dellen = del_table_view.len; 2136 } 2137 } 2138 else { 2139 del_table_chars = NULL; 2140 dellen = 0; 2141 } 2142 2143 inlen = PyBytes_GET_SIZE(input_obj); 2144 result = PyBytes_FromStringAndSize((char *)NULL, inlen); 2145 if (result == NULL) { 2146 PyBuffer_Release(&del_table_view); 2147 PyBuffer_Release(&table_view); 2148 return NULL; 2149 } 2150 output_start = output = PyBytes_AS_STRING(result); 2151 input = PyBytes_AS_STRING(input_obj); 2152 2153 if (dellen == 0 && table_chars != NULL) { 2154 /* If no deletions are required, use faster code */ 2155 for (i = inlen; --i >= 0; ) { 2156 c = Py_CHARMASK(*input++); 2157 if (Py_CHARMASK((*output++ = table_chars[c])) != c) 2158 changed = 1; 2159 } 2160 if (!changed && PyBytes_CheckExact(input_obj)) { 2161 Py_INCREF(input_obj); 2162 Py_DECREF(result); 2163 result = input_obj; 2164 } 2165 PyBuffer_Release(&del_table_view); 2166 PyBuffer_Release(&table_view); 2167 return result; 2168 } 2169 2170 if (table_chars == NULL) { 2171 for (i = 0; i < 256; i++) 2172 trans_table[i] = Py_CHARMASK(i); 2173 } else { 2174 for (i = 0; i < 256; i++) 2175 trans_table[i] = Py_CHARMASK(table_chars[i]); 2176 } 2177 PyBuffer_Release(&table_view); 2178 2179 for (i = 0; i < dellen; i++) 2180 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1; 2181 PyBuffer_Release(&del_table_view); 2182 2183 for (i = inlen; --i >= 0; ) { 2184 c = Py_CHARMASK(*input++); 2185 if (trans_table[c] != -1) 2186 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) 2187 continue; 2188 changed = 1; 2189 } 2190 if (!changed && PyBytes_CheckExact(input_obj)) { 2191 Py_DECREF(result); 2192 Py_INCREF(input_obj); 2193 return input_obj; 2194 } 2195 /* Fix the size of the resulting string */ 2196 if (inlen > 0) 2197 _PyBytes_Resize(&result, output - output_start); 2198 return result; 2199 } 2200 2201 2202 /*[clinic input] 2203 2204 @staticmethod 2205 bytes.maketrans 2206 2207 frm: Py_buffer 2208 to: Py_buffer 2209 / 2210 2211 Return a translation table useable for the bytes or bytearray translate method. 2212 2213 The returned table will be one where each byte in frm is mapped to the byte at 2214 the same position in to. 2215 2216 The bytes objects frm and to must be of the same length. 2217 [clinic start generated code]*/ 2218 2219 static PyObject * 2220 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to) 2221 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/ 2222 { 2223 return _Py_bytes_maketrans(frm, to); 2224 } 2225 2226 2227 /*[clinic input] 2228 bytes.replace 2229 2230 old: Py_buffer 2231 new: Py_buffer 2232 count: Py_ssize_t = -1 2233 Maximum number of occurrences to replace. 2234 -1 (the default value) means replace all occurrences. 2235 / 2236 2237 Return a copy with all occurrences of substring old replaced by new. 2238 2239 If the optional argument count is given, only the first count occurrences are 2240 replaced. 2241 [clinic start generated code]*/ 2242 2243 static PyObject * 2244 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new, 2245 Py_ssize_t count) 2246 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/ 2247 { 2248 return stringlib_replace((PyObject *)self, 2249 (const char *)old->buf, old->len, 2250 (const char *)new->buf, new->len, count); 2251 } 2252 2253 /** End DALKE **/ 2254 2255 2256 static PyObject * 2257 bytes_startswith(PyBytesObject *self, PyObject *args) 2258 { 2259 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args); 2260 } 2261 2262 static PyObject * 2263 bytes_endswith(PyBytesObject *self, PyObject *args) 2264 { 2265 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args); 2266 } 2267 2268 2269 /*[clinic input] 2270 bytes.decode 2271 2272 encoding: str(c_default="NULL") = 'utf-8' 2273 The encoding with which to decode the bytes. 2274 errors: str(c_default="NULL") = 'strict' 2275 The error handling scheme to use for the handling of decoding errors. 2276 The default is 'strict' meaning that decoding errors raise a 2277 UnicodeDecodeError. Other possible values are 'ignore' and 'replace' 2278 as well as any other name registered with codecs.register_error that 2279 can handle UnicodeDecodeErrors. 2280 2281 Decode the bytes using the codec registered for encoding. 2282 [clinic start generated code]*/ 2283 2284 static PyObject * 2285 bytes_decode_impl(PyBytesObject *self, const char *encoding, 2286 const char *errors) 2287 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/ 2288 { 2289 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors); 2290 } 2291 2292 2293 /*[clinic input] 2294 bytes.splitlines 2295 2296 keepends: int(c_default="0") = False 2297 2298 Return a list of the lines in the bytes, breaking at line boundaries. 2299 2300 Line breaks are not included in the resulting list unless keepends is given and 2301 true. 2302 [clinic start generated code]*/ 2303 2304 static PyObject * 2305 bytes_splitlines_impl(PyBytesObject *self, int keepends) 2306 /*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/ 2307 { 2308 return stringlib_splitlines( 2309 (PyObject*) self, PyBytes_AS_STRING(self), 2310 PyBytes_GET_SIZE(self), keepends 2311 ); 2312 } 2313 2314 /*[clinic input] 2315 @classmethod 2316 bytes.fromhex 2317 2318 string: unicode 2319 / 2320 2321 Create a bytes object from a string of hexadecimal numbers. 2322 2323 Spaces between two numbers are accepted. 2324 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'. 2325 [clinic start generated code]*/ 2326 2327 static PyObject * 2328 bytes_fromhex_impl(PyTypeObject *type, PyObject *string) 2329 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/ 2330 { 2331 PyObject *result = _PyBytes_FromHex(string, 0); 2332 if (type != &PyBytes_Type && result != NULL) { 2333 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type, 2334 result, NULL)); 2335 } 2336 return result; 2337 } 2338 2339 PyObject* 2340 _PyBytes_FromHex(PyObject *string, int use_bytearray) 2341 { 2342 char *buf; 2343 Py_ssize_t hexlen, invalid_char; 2344 unsigned int top, bot; 2345 Py_UCS1 *str, *end; 2346 _PyBytesWriter writer; 2347 2348 _PyBytesWriter_Init(&writer); 2349 writer.use_bytearray = use_bytearray; 2350 2351 assert(PyUnicode_Check(string)); 2352 if (PyUnicode_READY(string)) 2353 return NULL; 2354 hexlen = PyUnicode_GET_LENGTH(string); 2355 2356 if (!PyUnicode_IS_ASCII(string)) { 2357 void *data = PyUnicode_DATA(string); 2358 unsigned int kind = PyUnicode_KIND(string); 2359 Py_ssize_t i; 2360 2361 /* search for the first non-ASCII character */ 2362 for (i = 0; i < hexlen; i++) { 2363 if (PyUnicode_READ(kind, data, i) >= 128) 2364 break; 2365 } 2366 invalid_char = i; 2367 goto error; 2368 } 2369 2370 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); 2371 str = PyUnicode_1BYTE_DATA(string); 2372 2373 /* This overestimates if there are spaces */ 2374 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); 2375 if (buf == NULL) 2376 return NULL; 2377 2378 end = str + hexlen; 2379 while (str < end) { 2380 /* skip over spaces in the input */ 2381 if (*str == ' ') { 2382 do { 2383 str++; 2384 } while (*str == ' '); 2385 if (str >= end) 2386 break; 2387 } 2388 2389 top = _PyLong_DigitValue[*str]; 2390 if (top >= 16) { 2391 invalid_char = str - PyUnicode_1BYTE_DATA(string); 2392 goto error; 2393 } 2394 str++; 2395 2396 bot = _PyLong_DigitValue[*str]; 2397 if (bot >= 16) { 2398 invalid_char = str - PyUnicode_1BYTE_DATA(string); 2399 goto error; 2400 } 2401 str++; 2402 2403 *buf++ = (unsigned char)((top << 4) + bot); 2404 } 2405 2406 return _PyBytesWriter_Finish(&writer, buf); 2407 2408 error: 2409 PyErr_Format(PyExc_ValueError, 2410 "non-hexadecimal number found in " 2411 "fromhex() arg at position %zd", invalid_char); 2412 _PyBytesWriter_Dealloc(&writer); 2413 return NULL; 2414 } 2415 2416 PyDoc_STRVAR(hex__doc__, 2417 "B.hex() -> string\n\ 2418 \n\ 2419 Create a string of hexadecimal numbers from a bytes object.\n\ 2420 Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'."); 2421 2422 static PyObject * 2423 bytes_hex(PyBytesObject *self) 2424 { 2425 char* argbuf = PyBytes_AS_STRING(self); 2426 Py_ssize_t arglen = PyBytes_GET_SIZE(self); 2427 return _Py_strhex(argbuf, arglen); 2428 } 2429 2430 static PyObject * 2431 bytes_getnewargs(PyBytesObject *v) 2432 { 2433 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v)); 2434 } 2435 2436 2437 static PyMethodDef 2438 bytes_methods[] = { 2439 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS}, 2440 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS, 2441 _Py_capitalize__doc__}, 2442 {"center", (PyCFunction)stringlib_center, METH_VARARGS, 2443 _Py_center__doc__}, 2444 {"count", (PyCFunction)bytes_count, METH_VARARGS, 2445 _Py_count__doc__}, 2446 BYTES_DECODE_METHODDEF 2447 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, 2448 _Py_endswith__doc__}, 2449 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS, 2450 _Py_expandtabs__doc__}, 2451 {"find", (PyCFunction)bytes_find, METH_VARARGS, 2452 _Py_find__doc__}, 2453 BYTES_FROMHEX_METHODDEF 2454 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__}, 2455 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__}, 2456 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, 2457 _Py_isalnum__doc__}, 2458 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS, 2459 _Py_isalpha__doc__}, 2460 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS, 2461 _Py_isdigit__doc__}, 2462 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS, 2463 _Py_islower__doc__}, 2464 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS, 2465 _Py_isspace__doc__}, 2466 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS, 2467 _Py_istitle__doc__}, 2468 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS, 2469 _Py_isupper__doc__}, 2470 BYTES_JOIN_METHODDEF 2471 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__}, 2472 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, 2473 BYTES_LSTRIP_METHODDEF 2474 BYTES_MAKETRANS_METHODDEF 2475 BYTES_PARTITION_METHODDEF 2476 BYTES_REPLACE_METHODDEF 2477 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__}, 2478 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__}, 2479 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__}, 2480 BYTES_RPARTITION_METHODDEF 2481 BYTES_RSPLIT_METHODDEF 2482 BYTES_RSTRIP_METHODDEF 2483 BYTES_SPLIT_METHODDEF 2484 BYTES_SPLITLINES_METHODDEF 2485 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, 2486 _Py_startswith__doc__}, 2487 BYTES_STRIP_METHODDEF 2488 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, 2489 _Py_swapcase__doc__}, 2490 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, 2491 BYTES_TRANSLATE_METHODDEF 2492 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, 2493 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__}, 2494 {NULL, NULL} /* sentinel */ 2495 }; 2496 2497 static PyObject * 2498 bytes_mod(PyObject *self, PyObject *arg) 2499 { 2500 if (!PyBytes_Check(self)) { 2501 Py_RETURN_NOTIMPLEMENTED; 2502 } 2503 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), 2504 arg, 0); 2505 } 2506 2507 static PyNumberMethods bytes_as_number = { 2508 0, /*nb_add*/ 2509 0, /*nb_subtract*/ 2510 0, /*nb_multiply*/ 2511 bytes_mod, /*nb_remainder*/ 2512 }; 2513 2514 static PyObject * 2515 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); 2516 2517 static PyObject * 2518 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 2519 { 2520 PyObject *x = NULL; 2521 const char *encoding = NULL; 2522 const char *errors = NULL; 2523 PyObject *new = NULL; 2524 PyObject *func; 2525 Py_ssize_t size; 2526 static char *kwlist[] = {"source", "encoding", "errors", 0}; 2527 _Py_IDENTIFIER(__bytes__); 2528 2529 if (type != &PyBytes_Type) 2530 return bytes_subtype_new(type, args, kwds); 2531 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x, 2532 &encoding, &errors)) 2533 return NULL; 2534 if (x == NULL) { 2535 if (encoding != NULL || errors != NULL) { 2536 PyErr_SetString(PyExc_TypeError, 2537 "encoding or errors without sequence " 2538 "argument"); 2539 return NULL; 2540 } 2541 return PyBytes_FromStringAndSize(NULL, 0); 2542 } 2543 2544 if (encoding != NULL) { 2545 /* Encode via the codec registry */ 2546 if (!PyUnicode_Check(x)) { 2547 PyErr_SetString(PyExc_TypeError, 2548 "encoding without a string argument"); 2549 return NULL; 2550 } 2551 new = PyUnicode_AsEncodedString(x, encoding, errors); 2552 if (new == NULL) 2553 return NULL; 2554 assert(PyBytes_Check(new)); 2555 return new; 2556 } 2557 2558 if (errors != NULL) { 2559 PyErr_SetString(PyExc_TypeError, 2560 PyUnicode_Check(x) ? 2561 "string argument without an encoding" : 2562 "errors without a string argument"); 2563 return NULL; 2564 } 2565 2566 /* We'd like to call PyObject_Bytes here, but we need to check for an 2567 integer argument before deferring to PyBytes_FromObject, something 2568 PyObject_Bytes doesn't do. */ 2569 func = _PyObject_LookupSpecial(x, &PyId___bytes__); 2570 if (func != NULL) { 2571 new = PyObject_CallFunctionObjArgs(func, NULL); 2572 Py_DECREF(func); 2573 if (new == NULL) 2574 return NULL; 2575 if (!PyBytes_Check(new)) { 2576 PyErr_Format(PyExc_TypeError, 2577 "__bytes__ returned non-bytes (type %.200s)", 2578 Py_TYPE(new)->tp_name); 2579 Py_DECREF(new); 2580 return NULL; 2581 } 2582 return new; 2583 } 2584 else if (PyErr_Occurred()) 2585 return NULL; 2586 2587 if (PyUnicode_Check(x)) { 2588 PyErr_SetString(PyExc_TypeError, 2589 "string argument without an encoding"); 2590 return NULL; 2591 } 2592 /* Is it an integer? */ 2593 if (PyIndex_Check(x)) { 2594 size = PyNumber_AsSsize_t(x, PyExc_OverflowError); 2595 if (size == -1 && PyErr_Occurred()) { 2596 if (PyErr_ExceptionMatches(PyExc_OverflowError)) 2597 return NULL; 2598 PyErr_Clear(); /* fall through */ 2599 } 2600 else { 2601 if (size < 0) { 2602 PyErr_SetString(PyExc_ValueError, "negative count"); 2603 return NULL; 2604 } 2605 new = _PyBytes_FromSize(size, 1); 2606 if (new == NULL) 2607 return NULL; 2608 return new; 2609 } 2610 } 2611 2612 return PyBytes_FromObject(x); 2613 } 2614 2615 static PyObject* 2616 _PyBytes_FromBuffer(PyObject *x) 2617 { 2618 PyObject *new; 2619 Py_buffer view; 2620 2621 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0) 2622 return NULL; 2623 2624 new = PyBytes_FromStringAndSize(NULL, view.len); 2625 if (!new) 2626 goto fail; 2627 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval, 2628 &view, view.len, 'C') < 0) 2629 goto fail; 2630 PyBuffer_Release(&view); 2631 return new; 2632 2633 fail: 2634 Py_XDECREF(new); 2635 PyBuffer_Release(&view); 2636 return NULL; 2637 } 2638 2639 #define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \ 2640 do { \ 2641 PyObject *bytes; \ 2642 Py_ssize_t i; \ 2643 Py_ssize_t value; \ 2644 char *str; \ 2645 PyObject *item; \ 2646 \ 2647 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \ 2648 if (bytes == NULL) \ 2649 return NULL; \ 2650 str = ((PyBytesObject *)bytes)->ob_sval; \ 2651 \ 2652 for (i = 0; i < Py_SIZE(x); i++) { \ 2653 item = GET_ITEM((x), i); \ 2654 value = PyNumber_AsSsize_t(item, NULL); \ 2655 if (value == -1 && PyErr_Occurred()) \ 2656 goto error; \ 2657 \ 2658 if (value < 0 || value >= 256) { \ 2659 PyErr_SetString(PyExc_ValueError, \ 2660 "bytes must be in range(0, 256)"); \ 2661 goto error; \ 2662 } \ 2663 *str++ = (char) value; \ 2664 } \ 2665 return bytes; \ 2666 \ 2667 error: \ 2668 Py_DECREF(bytes); \ 2669 return NULL; \ 2670 } while (0) 2671 2672 static PyObject* 2673 _PyBytes_FromList(PyObject *x) 2674 { 2675 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM); 2676 } 2677 2678 static PyObject* 2679 _PyBytes_FromTuple(PyObject *x) 2680 { 2681 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM); 2682 } 2683 2684 static PyObject * 2685 _PyBytes_FromIterator(PyObject *it, PyObject *x) 2686 { 2687 char *str; 2688 Py_ssize_t i, size; 2689 _PyBytesWriter writer; 2690 2691 /* For iterator version, create a string object and resize as needed */ 2692 size = PyObject_LengthHint(x, 64); 2693 if (size == -1 && PyErr_Occurred()) 2694 return NULL; 2695 2696 _PyBytesWriter_Init(&writer); 2697 str = _PyBytesWriter_Alloc(&writer, size); 2698 if (str == NULL) 2699 return NULL; 2700 writer.overallocate = 1; 2701 size = writer.allocated; 2702 2703 /* Run the iterator to exhaustion */ 2704 for (i = 0; ; i++) { 2705 PyObject *item; 2706 Py_ssize_t value; 2707 2708 /* Get the next item */ 2709 item = PyIter_Next(it); 2710 if (item == NULL) { 2711 if (PyErr_Occurred()) 2712 goto error; 2713 break; 2714 } 2715 2716 /* Interpret it as an int (__index__) */ 2717 value = PyNumber_AsSsize_t(item, NULL); 2718 Py_DECREF(item); 2719 if (value == -1 && PyErr_Occurred()) 2720 goto error; 2721 2722 /* Range check */ 2723 if (value < 0 || value >= 256) { 2724 PyErr_SetString(PyExc_ValueError, 2725 "bytes must be in range(0, 256)"); 2726 goto error; 2727 } 2728 2729 /* Append the byte */ 2730 if (i >= size) { 2731 str = _PyBytesWriter_Resize(&writer, str, size+1); 2732 if (str == NULL) 2733 return NULL; 2734 size = writer.allocated; 2735 } 2736 *str++ = (char) value; 2737 } 2738 2739 return _PyBytesWriter_Finish(&writer, str); 2740 2741 error: 2742 _PyBytesWriter_Dealloc(&writer); 2743 return NULL; 2744 } 2745 2746 PyObject * 2747 PyBytes_FromObject(PyObject *x) 2748 { 2749 PyObject *it, *result; 2750 2751 if (x == NULL) { 2752 PyErr_BadInternalCall(); 2753 return NULL; 2754 } 2755 2756 if (PyBytes_CheckExact(x)) { 2757 Py_INCREF(x); 2758 return x; 2759 } 2760 2761 /* Use the modern buffer interface */ 2762 if (PyObject_CheckBuffer(x)) 2763 return _PyBytes_FromBuffer(x); 2764 2765 if (PyList_CheckExact(x)) 2766 return _PyBytes_FromList(x); 2767 2768 if (PyTuple_CheckExact(x)) 2769 return _PyBytes_FromTuple(x); 2770 2771 if (!PyUnicode_Check(x)) { 2772 it = PyObject_GetIter(x); 2773 if (it != NULL) { 2774 result = _PyBytes_FromIterator(it, x); 2775 Py_DECREF(it); 2776 return result; 2777 } 2778 } 2779 2780 PyErr_Format(PyExc_TypeError, 2781 "cannot convert '%.200s' object to bytes", 2782 x->ob_type->tp_name); 2783 return NULL; 2784 } 2785 2786 static PyObject * 2787 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 2788 { 2789 PyObject *tmp, *pnew; 2790 Py_ssize_t n; 2791 2792 assert(PyType_IsSubtype(type, &PyBytes_Type)); 2793 tmp = bytes_new(&PyBytes_Type, args, kwds); 2794 if (tmp == NULL) 2795 return NULL; 2796 assert(PyBytes_Check(tmp)); 2797 n = PyBytes_GET_SIZE(tmp); 2798 pnew = type->tp_alloc(type, n); 2799 if (pnew != NULL) { 2800 memcpy(PyBytes_AS_STRING(pnew), 2801 PyBytes_AS_STRING(tmp), n+1); 2802 ((PyBytesObject *)pnew)->ob_shash = 2803 ((PyBytesObject *)tmp)->ob_shash; 2804 } 2805 Py_DECREF(tmp); 2806 return pnew; 2807 } 2808 2809 PyDoc_STRVAR(bytes_doc, 2810 "bytes(iterable_of_ints) -> bytes\n\ 2811 bytes(string, encoding[, errors]) -> bytes\n\ 2812 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\ 2813 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\ 2814 bytes() -> empty bytes object\n\ 2815 \n\ 2816 Construct an immutable array of bytes from:\n\ 2817 - an iterable yielding integers in range(256)\n\ 2818 - a text string encoded using the specified encoding\n\ 2819 - any object implementing the buffer API.\n\ 2820 - an integer"); 2821 2822 static PyObject *bytes_iter(PyObject *seq); 2823 2824 PyTypeObject PyBytes_Type = { 2825 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2826 "bytes", 2827 PyBytesObject_SIZE, 2828 sizeof(char), 2829 bytes_dealloc, /* tp_dealloc */ 2830 0, /* tp_print */ 2831 0, /* tp_getattr */ 2832 0, /* tp_setattr */ 2833 0, /* tp_reserved */ 2834 (reprfunc)bytes_repr, /* tp_repr */ 2835 &bytes_as_number, /* tp_as_number */ 2836 &bytes_as_sequence, /* tp_as_sequence */ 2837 &bytes_as_mapping, /* tp_as_mapping */ 2838 (hashfunc)bytes_hash, /* tp_hash */ 2839 0, /* tp_call */ 2840 bytes_str, /* tp_str */ 2841 PyObject_GenericGetAttr, /* tp_getattro */ 2842 0, /* tp_setattro */ 2843 &bytes_as_buffer, /* tp_as_buffer */ 2844 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 2845 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */ 2846 bytes_doc, /* tp_doc */ 2847 0, /* tp_traverse */ 2848 0, /* tp_clear */ 2849 (richcmpfunc)bytes_richcompare, /* tp_richcompare */ 2850 0, /* tp_weaklistoffset */ 2851 bytes_iter, /* tp_iter */ 2852 0, /* tp_iternext */ 2853 bytes_methods, /* tp_methods */ 2854 0, /* tp_members */ 2855 0, /* tp_getset */ 2856 &PyBaseObject_Type, /* tp_base */ 2857 0, /* tp_dict */ 2858 0, /* tp_descr_get */ 2859 0, /* tp_descr_set */ 2860 0, /* tp_dictoffset */ 2861 0, /* tp_init */ 2862 0, /* tp_alloc */ 2863 bytes_new, /* tp_new */ 2864 PyObject_Del, /* tp_free */ 2865 }; 2866 2867 void 2868 PyBytes_Concat(PyObject **pv, PyObject *w) 2869 { 2870 assert(pv != NULL); 2871 if (*pv == NULL) 2872 return; 2873 if (w == NULL) { 2874 Py_CLEAR(*pv); 2875 return; 2876 } 2877 2878 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) { 2879 /* Only one reference, so we can resize in place */ 2880 Py_ssize_t oldsize; 2881 Py_buffer wb; 2882 2883 wb.len = -1; 2884 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) { 2885 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s", 2886 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name); 2887 Py_CLEAR(*pv); 2888 return; 2889 } 2890 2891 oldsize = PyBytes_GET_SIZE(*pv); 2892 if (oldsize > PY_SSIZE_T_MAX - wb.len) { 2893 PyErr_NoMemory(); 2894 goto error; 2895 } 2896 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0) 2897 goto error; 2898 2899 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len); 2900 PyBuffer_Release(&wb); 2901 return; 2902 2903 error: 2904 PyBuffer_Release(&wb); 2905 Py_CLEAR(*pv); 2906 return; 2907 } 2908 2909 else { 2910 /* Multiple references, need to create new object */ 2911 PyObject *v; 2912 v = bytes_concat(*pv, w); 2913 Py_SETREF(*pv, v); 2914 } 2915 } 2916 2917 void 2918 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) 2919 { 2920 PyBytes_Concat(pv, w); 2921 Py_XDECREF(w); 2922 } 2923 2924 2925 /* The following function breaks the notion that bytes are immutable: 2926 it changes the size of a bytes object. We get away with this only if there 2927 is only one module referencing the object. You can also think of it 2928 as creating a new bytes object and destroying the old one, only 2929 more efficiently. In any case, don't use this if the bytes object may 2930 already be known to some other part of the code... 2931 Note that if there's not enough memory to resize the bytes object, the 2932 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of 2933 memory" exception is set, and -1 is returned. Else (on success) 0 is 2934 returned, and the value in *pv may or may not be the same as on input. 2935 As always, an extra byte is allocated for a trailing \0 byte (newsize 2936 does *not* include that), and a trailing \0 byte is stored. 2937 */ 2938 2939 int 2940 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) 2941 { 2942 PyObject *v; 2943 PyBytesObject *sv; 2944 v = *pv; 2945 if (!PyBytes_Check(v) || newsize < 0) { 2946 goto error; 2947 } 2948 if (Py_SIZE(v) == newsize) { 2949 /* return early if newsize equals to v->ob_size */ 2950 return 0; 2951 } 2952 if (Py_REFCNT(v) != 1) { 2953 goto error; 2954 } 2955 /* XXX UNREF/NEWREF interface should be more symmetrical */ 2956 _Py_DEC_REFTOTAL; 2957 _Py_ForgetReference(v); 2958 *pv = (PyObject *) 2959 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize); 2960 if (*pv == NULL) { 2961 PyObject_Del(v); 2962 PyErr_NoMemory(); 2963 return -1; 2964 } 2965 _Py_NewReference(*pv); 2966 sv = (PyBytesObject *) *pv; 2967 Py_SIZE(sv) = newsize; 2968 sv->ob_sval[newsize] = '\0'; 2969 sv->ob_shash = -1; /* invalidate cached hash value */ 2970 return 0; 2971 error: 2972 *pv = 0; 2973 Py_DECREF(v); 2974 PyErr_BadInternalCall(); 2975 return -1; 2976 } 2977 2978 void 2979 PyBytes_Fini(void) 2980 { 2981 int i; 2982 for (i = 0; i < UCHAR_MAX + 1; i++) 2983 Py_CLEAR(characters[i]); 2984 Py_CLEAR(nullstring); 2985 } 2986 2987 /*********************** Bytes Iterator ****************************/ 2988 2989 typedef struct { 2990 PyObject_HEAD 2991 Py_ssize_t it_index; 2992 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */ 2993 } striterobject; 2994 2995 static void 2996 striter_dealloc(striterobject *it) 2997 { 2998 _PyObject_GC_UNTRACK(it); 2999 Py_XDECREF(it->it_seq); 3000 PyObject_GC_Del(it); 3001 } 3002 3003 static int 3004 striter_traverse(striterobject *it, visitproc visit, void *arg) 3005 { 3006 Py_VISIT(it->it_seq); 3007 return 0; 3008 } 3009 3010 static PyObject * 3011 striter_next(striterobject *it) 3012 { 3013 PyBytesObject *seq; 3014 PyObject *item; 3015 3016 assert(it != NULL); 3017 seq = it->it_seq; 3018 if (seq == NULL) 3019 return NULL; 3020 assert(PyBytes_Check(seq)); 3021 3022 if (it->it_index < PyBytes_GET_SIZE(seq)) { 3023 item = PyLong_FromLong( 3024 (unsigned char)seq->ob_sval[it->it_index]); 3025 if (item != NULL) 3026 ++it->it_index; 3027 return item; 3028 } 3029 3030 it->it_seq = NULL; 3031 Py_DECREF(seq); 3032 return NULL; 3033 } 3034 3035 static PyObject * 3036 striter_len(striterobject *it) 3037 { 3038 Py_ssize_t len = 0; 3039 if (it->it_seq) 3040 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index; 3041 return PyLong_FromSsize_t(len); 3042 } 3043 3044 PyDoc_STRVAR(length_hint_doc, 3045 "Private method returning an estimate of len(list(it))."); 3046 3047 static PyObject * 3048 striter_reduce(striterobject *it) 3049 { 3050 if (it->it_seq != NULL) { 3051 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"), 3052 it->it_seq, it->it_index); 3053 } else { 3054 PyObject *u = PyUnicode_FromUnicode(NULL, 0); 3055 if (u == NULL) 3056 return NULL; 3057 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u); 3058 } 3059 } 3060 3061 PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); 3062 3063 static PyObject * 3064 striter_setstate(striterobject *it, PyObject *state) 3065 { 3066 Py_ssize_t index = PyLong_AsSsize_t(state); 3067 if (index == -1 && PyErr_Occurred()) 3068 return NULL; 3069 if (it->it_seq != NULL) { 3070 if (index < 0) 3071 index = 0; 3072 else if (index > PyBytes_GET_SIZE(it->it_seq)) 3073 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */ 3074 it->it_index = index; 3075 } 3076 Py_RETURN_NONE; 3077 } 3078 3079 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling."); 3080 3081 static PyMethodDef striter_methods[] = { 3082 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS, 3083 length_hint_doc}, 3084 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS, 3085 reduce_doc}, 3086 {"__setstate__", (PyCFunction)striter_setstate, METH_O, 3087 setstate_doc}, 3088 {NULL, NULL} /* sentinel */ 3089 }; 3090 3091 PyTypeObject PyBytesIter_Type = { 3092 PyVarObject_HEAD_INIT(&PyType_Type, 0) 3093 "bytes_iterator", /* tp_name */ 3094 sizeof(striterobject), /* tp_basicsize */ 3095 0, /* tp_itemsize */ 3096 /* methods */ 3097 (destructor)striter_dealloc, /* tp_dealloc */ 3098 0, /* tp_print */ 3099 0, /* tp_getattr */ 3100 0, /* tp_setattr */ 3101 0, /* tp_reserved */ 3102 0, /* tp_repr */ 3103 0, /* tp_as_number */ 3104 0, /* tp_as_sequence */ 3105 0, /* tp_as_mapping */ 3106 0, /* tp_hash */ 3107 0, /* tp_call */ 3108 0, /* tp_str */ 3109 PyObject_GenericGetAttr, /* tp_getattro */ 3110 0, /* tp_setattro */ 3111 0, /* tp_as_buffer */ 3112 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ 3113 0, /* tp_doc */ 3114 (traverseproc)striter_traverse, /* tp_traverse */ 3115 0, /* tp_clear */ 3116 0, /* tp_richcompare */ 3117 0, /* tp_weaklistoffset */ 3118 PyObject_SelfIter, /* tp_iter */ 3119 (iternextfunc)striter_next, /* tp_iternext */ 3120 striter_methods, /* tp_methods */ 3121 0, 3122 }; 3123 3124 static PyObject * 3125 bytes_iter(PyObject *seq) 3126 { 3127 striterobject *it; 3128 3129 if (!PyBytes_Check(seq)) { 3130 PyErr_BadInternalCall(); 3131 return NULL; 3132 } 3133 it = PyObject_GC_New(striterobject, &PyBytesIter_Type); 3134 if (it == NULL) 3135 return NULL; 3136 it->it_index = 0; 3137 Py_INCREF(seq); 3138 it->it_seq = (PyBytesObject *)seq; 3139 _PyObject_GC_TRACK(it); 3140 return (PyObject *)it; 3141 } 3142 3143 3144 /* _PyBytesWriter API */ 3145 3146 #ifdef MS_WINDOWS 3147 /* On Windows, overallocate by 50% is the best factor */ 3148 # define OVERALLOCATE_FACTOR 2 3149 #else 3150 /* On Linux, overallocate by 25% is the best factor */ 3151 # define OVERALLOCATE_FACTOR 4 3152 #endif 3153 3154 void 3155 _PyBytesWriter_Init(_PyBytesWriter *writer) 3156 { 3157 /* Set all attributes before small_buffer to 0 */ 3158 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer)); 3159 #ifdef Py_DEBUG 3160 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer)); 3161 #endif 3162 } 3163 3164 void 3165 _PyBytesWriter_Dealloc(_PyBytesWriter *writer) 3166 { 3167 Py_CLEAR(writer->buffer); 3168 } 3169 3170 Py_LOCAL_INLINE(char*) 3171 _PyBytesWriter_AsString(_PyBytesWriter *writer) 3172 { 3173 if (writer->use_small_buffer) { 3174 assert(writer->buffer == NULL); 3175 return writer->small_buffer; 3176 } 3177 else if (writer->use_bytearray) { 3178 assert(writer->buffer != NULL); 3179 return PyByteArray_AS_STRING(writer->buffer); 3180 } 3181 else { 3182 assert(writer->buffer != NULL); 3183 return PyBytes_AS_STRING(writer->buffer); 3184 } 3185 } 3186 3187 Py_LOCAL_INLINE(Py_ssize_t) 3188 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str) 3189 { 3190 char *start = _PyBytesWriter_AsString(writer); 3191 assert(str != NULL); 3192 assert(str >= start); 3193 assert(str - start <= writer->allocated); 3194 return str - start; 3195 } 3196 3197 Py_LOCAL_INLINE(void) 3198 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str) 3199 { 3200 #ifdef Py_DEBUG 3201 char *start, *end; 3202 3203 if (writer->use_small_buffer) { 3204 assert(writer->buffer == NULL); 3205 } 3206 else { 3207 assert(writer->buffer != NULL); 3208 if (writer->use_bytearray) 3209 assert(PyByteArray_CheckExact(writer->buffer)); 3210 else 3211 assert(PyBytes_CheckExact(writer->buffer)); 3212 assert(Py_REFCNT(writer->buffer) == 1); 3213 } 3214 3215 if (writer->use_bytearray) { 3216 /* bytearray has its own overallocation algorithm, 3217 writer overallocation must be disabled */ 3218 assert(!writer->overallocate); 3219 } 3220 3221 assert(0 <= writer->allocated); 3222 assert(0 <= writer->min_size && writer->min_size <= writer->allocated); 3223 /* the last byte must always be null */ 3224 start = _PyBytesWriter_AsString(writer); 3225 assert(start[writer->allocated] == 0); 3226 3227 end = start + writer->allocated; 3228 assert(str != NULL); 3229 assert(start <= str && str <= end); 3230 #endif 3231 } 3232 3233 void* 3234 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size) 3235 { 3236 Py_ssize_t allocated, pos; 3237 3238 _PyBytesWriter_CheckConsistency(writer, str); 3239 assert(writer->allocated < size); 3240 3241 allocated = size; 3242 if (writer->overallocate 3243 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) { 3244 /* overallocate to limit the number of realloc() */ 3245 allocated += allocated / OVERALLOCATE_FACTOR; 3246 } 3247 3248 pos = _PyBytesWriter_GetSize(writer, str); 3249 if (!writer->use_small_buffer) { 3250 if (writer->use_bytearray) { 3251 if (PyByteArray_Resize(writer->buffer, allocated)) 3252 goto error; 3253 /* writer->allocated can be smaller than writer->buffer->ob_alloc, 3254 but we cannot use ob_alloc because bytes may need to be moved 3255 to use the whole buffer. bytearray uses an internal optimization 3256 to avoid moving or copying bytes when bytes are removed at the 3257 beginning (ex: del bytearray[:1]). */ 3258 } 3259 else { 3260 if (_PyBytes_Resize(&writer->buffer, allocated)) 3261 goto error; 3262 } 3263 } 3264 else { 3265 /* convert from stack buffer to bytes object buffer */ 3266 assert(writer->buffer == NULL); 3267 3268 if (writer->use_bytearray) 3269 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated); 3270 else 3271 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); 3272 if (writer->buffer == NULL) 3273 goto error; 3274 3275 if (pos != 0) { 3276 char *dest; 3277 if (writer->use_bytearray) 3278 dest = PyByteArray_AS_STRING(writer->buffer); 3279 else 3280 dest = PyBytes_AS_STRING(writer->buffer); 3281 memcpy(dest, 3282 writer->small_buffer, 3283 pos); 3284 } 3285 3286 writer->use_small_buffer = 0; 3287 #ifdef Py_DEBUG 3288 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer)); 3289 #endif 3290 } 3291 writer->allocated = allocated; 3292 3293 str = _PyBytesWriter_AsString(writer) + pos; 3294 _PyBytesWriter_CheckConsistency(writer, str); 3295 return str; 3296 3297 error: 3298 _PyBytesWriter_Dealloc(writer); 3299 return NULL; 3300 } 3301 3302 void* 3303 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size) 3304 { 3305 Py_ssize_t new_min_size; 3306 3307 _PyBytesWriter_CheckConsistency(writer, str); 3308 assert(size >= 0); 3309 3310 if (size == 0) { 3311 /* nothing to do */ 3312 return str; 3313 } 3314 3315 if (writer->min_size > PY_SSIZE_T_MAX - size) { 3316 PyErr_NoMemory(); 3317 _PyBytesWriter_Dealloc(writer); 3318 return NULL; 3319 } 3320 new_min_size = writer->min_size + size; 3321 3322 if (new_min_size > writer->allocated) 3323 str = _PyBytesWriter_Resize(writer, str, new_min_size); 3324 3325 writer->min_size = new_min_size; 3326 return str; 3327 } 3328 3329 /* Allocate the buffer to write size bytes. 3330 Return the pointer to the beginning of buffer data. 3331 Raise an exception and return NULL on error. */ 3332 void* 3333 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size) 3334 { 3335 /* ensure that _PyBytesWriter_Alloc() is only called once */ 3336 assert(writer->min_size == 0 && writer->buffer == NULL); 3337 assert(size >= 0); 3338 3339 writer->use_small_buffer = 1; 3340 #ifdef Py_DEBUG 3341 writer->allocated = sizeof(writer->small_buffer) - 1; 3342 /* In debug mode, don't use the full small buffer because it is less 3343 efficient than bytes and bytearray objects to detect buffer underflow 3344 and buffer overflow. Use 10 bytes of the small buffer to test also 3345 code using the smaller buffer in debug mode. 3346 3347 Don't modify the _PyBytesWriter structure (use a shorter small buffer) 3348 in debug mode to also be able to detect stack overflow when running 3349 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes), 3350 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a 3351 stack overflow. */ 3352 writer->allocated = Py_MIN(writer->allocated, 10); 3353 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0, 3354 to detect buffer overflow */ 3355 writer->small_buffer[writer->allocated] = 0; 3356 #else 3357 writer->allocated = sizeof(writer->small_buffer); 3358 #endif 3359 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size); 3360 } 3361 3362 PyObject * 3363 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) 3364 { 3365 Py_ssize_t size; 3366 PyObject *result; 3367 3368 _PyBytesWriter_CheckConsistency(writer, str); 3369 3370 size = _PyBytesWriter_GetSize(writer, str); 3371 if (size == 0 && !writer->use_bytearray) { 3372 Py_CLEAR(writer->buffer); 3373 /* Get the empty byte string singleton */ 3374 result = PyBytes_FromStringAndSize(NULL, 0); 3375 } 3376 else if (writer->use_small_buffer) { 3377 if (writer->use_bytearray) { 3378 result = PyByteArray_FromStringAndSize(writer->small_buffer, size); 3379 } 3380 else { 3381 result = PyBytes_FromStringAndSize(writer->small_buffer, size); 3382 } 3383 } 3384 else { 3385 result = writer->buffer; 3386 writer->buffer = NULL; 3387 3388 if (size != writer->allocated) { 3389 if (writer->use_bytearray) { 3390 if (PyByteArray_Resize(result, size)) { 3391 Py_DECREF(result); 3392 return NULL; 3393 } 3394 } 3395 else { 3396 if (_PyBytes_Resize(&result, size)) { 3397 assert(result == NULL); 3398 return NULL; 3399 } 3400 } 3401 } 3402 } 3403 return result; 3404 } 3405 3406 void* 3407 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr, 3408 const void *bytes, Py_ssize_t size) 3409 { 3410 char *str = (char *)ptr; 3411 3412 str = _PyBytesWriter_Prepare(writer, str, size); 3413 if (str == NULL) 3414 return NULL; 3415 3416 memcpy(str, bytes, size); 3417 str += size; 3418 3419 return str; 3420 } 3421