Home | History | Annotate | Download | only in Objects
      1 /* bytes object implementation */
      2 
      3 #define PY_SSIZE_T_CLEAN
      4 
      5 #include "Python.h"
      6 #include "internal/mem.h"
      7 #include "internal/pystate.h"
      8 
      9 #include "bytes_methods.h"
     10 #include "pystrhex.h"
     11 #include <stddef.h>
     12 
     13 /*[clinic input]
     14 class bytes "PyBytesObject *" "&PyBytes_Type"
     15 [clinic start generated code]*/
     16 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
     17 
     18 #include "clinic/bytesobject.c.h"
     19 
     20 #ifdef COUNT_ALLOCS
     21 Py_ssize_t null_strings, one_strings;
     22 #endif
     23 
     24 static PyBytesObject *characters[UCHAR_MAX + 1];
     25 static PyBytesObject *nullstring;
     26 
     27 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
     28    for a string of length n should request PyBytesObject_SIZE + n bytes.
     29 
     30    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
     31    3 bytes per string allocation on a typical system.
     32 */
     33 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
     34 
     35 /* Forward declaration */
     36 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
     37                                                    char *str);
     38 
     39 /*
     40    For PyBytes_FromString(), the parameter `str' points to a null-terminated
     41    string containing exactly `size' bytes.
     42 
     43    For PyBytes_FromStringAndSize(), the parameter `str' is
     44    either NULL or else points to a string containing at least `size' bytes.
     45    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
     46    not have to be null-terminated.  (Therefore it is safe to construct a
     47    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
     48    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
     49    bytes (setting the last byte to the null terminating character) and you can
     50    fill in the data yourself.  If `str' is non-NULL then the resulting
     51    PyBytes object must be treated as immutable and you must not fill in nor
     52    alter the data yourself, since the strings may be shared.
     53 
     54    The PyObject member `op->ob_size', which denotes the number of "extra
     55    items" in a variable-size object, will contain the number of bytes
     56    allocated for string data, not counting the null terminating character.
     57    It is therefore equal to the `size' parameter (for
     58    PyBytes_FromStringAndSize()) or the length of the string in the `str'
     59    parameter (for PyBytes_FromString()).
     60 */
     61 static PyObject *
     62 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
     63 {
     64     PyBytesObject *op;
     65     assert(size >= 0);
     66 
     67     if (size == 0 && (op = nullstring) != NULL) {
     68 #ifdef COUNT_ALLOCS
     69         null_strings++;
     70 #endif
     71         Py_INCREF(op);
     72         return (PyObject *)op;
     73     }
     74 
     75     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
     76         PyErr_SetString(PyExc_OverflowError,
     77                         "byte string is too large");
     78         return NULL;
     79     }
     80 
     81     /* Inline PyObject_NewVar */
     82     if (use_calloc)
     83         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
     84     else
     85         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
     86     if (op == NULL)
     87         return PyErr_NoMemory();
     88     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
     89     op->ob_shash = -1;
     90     if (!use_calloc)
     91         op->ob_sval[size] = '\0';
     92     /* empty byte string singleton */
     93     if (size == 0) {
     94         nullstring = op;
     95         Py_INCREF(op);
     96     }
     97     return (PyObject *) op;
     98 }
     99 
    100 PyObject *
    101 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
    102 {
    103     PyBytesObject *op;
    104     if (size < 0) {
    105         PyErr_SetString(PyExc_SystemError,
    106             "Negative size passed to PyBytes_FromStringAndSize");
    107         return NULL;
    108     }
    109     if (size == 1 && str != NULL &&
    110         (op = characters[*str & UCHAR_MAX]) != NULL)
    111     {
    112 #ifdef COUNT_ALLOCS
    113         one_strings++;
    114 #endif
    115         Py_INCREF(op);
    116         return (PyObject *)op;
    117     }
    118 
    119     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
    120     if (op == NULL)
    121         return NULL;
    122     if (str == NULL)
    123         return (PyObject *) op;
    124 
    125     memcpy(op->ob_sval, str, size);
    126     /* share short strings */
    127     if (size == 1) {
    128         characters[*str & UCHAR_MAX] = op;
    129         Py_INCREF(op);
    130     }
    131     return (PyObject *) op;
    132 }
    133 
    134 PyObject *
    135 PyBytes_FromString(const char *str)
    136 {
    137     size_t size;
    138     PyBytesObject *op;
    139 
    140     assert(str != NULL);
    141     size = strlen(str);
    142     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
    143         PyErr_SetString(PyExc_OverflowError,
    144             "byte string is too long");
    145         return NULL;
    146     }
    147     if (size == 0 && (op = nullstring) != NULL) {
    148 #ifdef COUNT_ALLOCS
    149         null_strings++;
    150 #endif
    151         Py_INCREF(op);
    152         return (PyObject *)op;
    153     }
    154     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
    155 #ifdef COUNT_ALLOCS
    156         one_strings++;
    157 #endif
    158         Py_INCREF(op);
    159         return (PyObject *)op;
    160     }
    161 
    162     /* Inline PyObject_NewVar */
    163     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
    164     if (op == NULL)
    165         return PyErr_NoMemory();
    166     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
    167     op->ob_shash = -1;
    168     memcpy(op->ob_sval, str, size+1);
    169     /* share short strings */
    170     if (size == 0) {
    171         nullstring = op;
    172         Py_INCREF(op);
    173     } else if (size == 1) {
    174         characters[*str & UCHAR_MAX] = op;
    175         Py_INCREF(op);
    176     }
    177     return (PyObject *) op;
    178 }
    179 
    180 PyObject *
    181 PyBytes_FromFormatV(const char *format, va_list vargs)
    182 {
    183     char *s;
    184     const char *f;
    185     const char *p;
    186     Py_ssize_t prec;
    187     int longflag;
    188     int size_tflag;
    189     /* Longest 64-bit formatted numbers:
    190        - "18446744073709551615\0" (21 bytes)
    191        - "-9223372036854775808\0" (21 bytes)
    192        Decimal takes the most space (it isn't enough for octal.)
    193 
    194        Longest 64-bit pointer representation:
    195        "0xffffffffffffffff\0" (19 bytes). */
    196     char buffer[21];
    197     _PyBytesWriter writer;
    198 
    199     _PyBytesWriter_Init(&writer);
    200 
    201     s = _PyBytesWriter_Alloc(&writer, strlen(format));
    202     if (s == NULL)
    203         return NULL;
    204     writer.overallocate = 1;
    205 
    206 #define WRITE_BYTES(str) \
    207     do { \
    208         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
    209         if (s == NULL) \
    210             goto error; \
    211     } while (0)
    212 
    213     for (f = format; *f; f++) {
    214         if (*f != '%') {
    215             *s++ = *f;
    216             continue;
    217         }
    218 
    219         p = f++;
    220 
    221         /* ignore the width (ex: 10 in "%10s") */
    222         while (Py_ISDIGIT(*f))
    223             f++;
    224 
    225         /* parse the precision (ex: 10 in "%.10s") */
    226         prec = 0;
    227         if (*f == '.') {
    228             f++;
    229             for (; Py_ISDIGIT(*f); f++) {
    230                 prec = (prec * 10) + (*f - '0');
    231             }
    232         }
    233 
    234         while (*f && *f != '%' && !Py_ISALPHA(*f))
    235             f++;
    236 
    237         /* handle the long flag ('l'), but only for %ld and %lu.
    238            others can be added when necessary. */
    239         longflag = 0;
    240         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
    241             longflag = 1;
    242             ++f;
    243         }
    244 
    245         /* handle the size_t flag ('z'). */
    246         size_tflag = 0;
    247         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
    248             size_tflag = 1;
    249             ++f;
    250         }
    251 
    252         /* subtract bytes preallocated for the format string
    253            (ex: 2 for "%s") */
    254         writer.min_size -= (f - p + 1);
    255 
    256         switch (*f) {
    257         case 'c':
    258         {
    259             int c = va_arg(vargs, int);
    260             if (c < 0 || c > 255) {
    261                 PyErr_SetString(PyExc_OverflowError,
    262                                 "PyBytes_FromFormatV(): %c format "
    263                                 "expects an integer in range [0; 255]");
    264                 goto error;
    265             }
    266             writer.min_size++;
    267             *s++ = (unsigned char)c;
    268             break;
    269         }
    270 
    271         case 'd':
    272             if (longflag)
    273                 sprintf(buffer, "%ld", va_arg(vargs, long));
    274             else if (size_tflag)
    275                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
    276                     va_arg(vargs, Py_ssize_t));
    277             else
    278                 sprintf(buffer, "%d", va_arg(vargs, int));
    279             assert(strlen(buffer) < sizeof(buffer));
    280             WRITE_BYTES(buffer);
    281             break;
    282 
    283         case 'u':
    284             if (longflag)
    285                 sprintf(buffer, "%lu",
    286                     va_arg(vargs, unsigned long));
    287             else if (size_tflag)
    288                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
    289                     va_arg(vargs, size_t));
    290             else
    291                 sprintf(buffer, "%u",
    292                     va_arg(vargs, unsigned int));
    293             assert(strlen(buffer) < sizeof(buffer));
    294             WRITE_BYTES(buffer);
    295             break;
    296 
    297         case 'i':
    298             sprintf(buffer, "%i", va_arg(vargs, int));
    299             assert(strlen(buffer) < sizeof(buffer));
    300             WRITE_BYTES(buffer);
    301             break;
    302 
    303         case 'x':
    304             sprintf(buffer, "%x", va_arg(vargs, int));
    305             assert(strlen(buffer) < sizeof(buffer));
    306             WRITE_BYTES(buffer);
    307             break;
    308 
    309         case 's':
    310         {
    311             Py_ssize_t i;
    312 
    313             p = va_arg(vargs, const char*);
    314             if (prec <= 0) {
    315                 i = strlen(p);
    316             }
    317             else {
    318                 i = 0;
    319                 while (i < prec && p[i]) {
    320                     i++;
    321                 }
    322             }
    323             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
    324             if (s == NULL)
    325                 goto error;
    326             break;
    327         }
    328 
    329         case 'p':
    330             sprintf(buffer, "%p", va_arg(vargs, void*));
    331             assert(strlen(buffer) < sizeof(buffer));
    332             /* %p is ill-defined:  ensure leading 0x. */
    333             if (buffer[1] == 'X')
    334                 buffer[1] = 'x';
    335             else if (buffer[1] != 'x') {
    336                 memmove(buffer+2, buffer, strlen(buffer)+1);
    337                 buffer[0] = '0';
    338                 buffer[1] = 'x';
    339             }
    340             WRITE_BYTES(buffer);
    341             break;
    342 
    343         case '%':
    344             writer.min_size++;
    345             *s++ = '%';
    346             break;
    347 
    348         default:
    349             if (*f == 0) {
    350                 /* fix min_size if we reached the end of the format string */
    351                 writer.min_size++;
    352             }
    353 
    354             /* invalid format string: copy unformatted string and exit */
    355             WRITE_BYTES(p);
    356             return _PyBytesWriter_Finish(&writer, s);
    357         }
    358     }
    359 
    360 #undef WRITE_BYTES
    361 
    362     return _PyBytesWriter_Finish(&writer, s);
    363 
    364  error:
    365     _PyBytesWriter_Dealloc(&writer);
    366     return NULL;
    367 }
    368 
    369 PyObject *
    370 PyBytes_FromFormat(const char *format, ...)
    371 {
    372     PyObject* ret;
    373     va_list vargs;
    374 
    375 #ifdef HAVE_STDARG_PROTOTYPES
    376     va_start(vargs, format);
    377 #else
    378     va_start(vargs);
    379 #endif
    380     ret = PyBytes_FromFormatV(format, vargs);
    381     va_end(vargs);
    382     return ret;
    383 }
    384 
    385 /* Helpers for formatstring */
    386 
    387 Py_LOCAL_INLINE(PyObject *)
    388 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
    389 {
    390     Py_ssize_t argidx = *p_argidx;
    391     if (argidx < arglen) {
    392         (*p_argidx)++;
    393         if (arglen < 0)
    394             return args;
    395         else
    396             return PyTuple_GetItem(args, argidx);
    397     }
    398     PyErr_SetString(PyExc_TypeError,
    399                     "not enough arguments for format string");
    400     return NULL;
    401 }
    402 
    403 /* Format codes
    404  * F_LJUST      '-'
    405  * F_SIGN       '+'
    406  * F_BLANK      ' '
    407  * F_ALT        '#'
    408  * F_ZERO       '0'
    409  */
    410 #define F_LJUST (1<<0)
    411 #define F_SIGN  (1<<1)
    412 #define F_BLANK (1<<2)
    413 #define F_ALT   (1<<3)
    414 #define F_ZERO  (1<<4)
    415 
    416 /* Returns a new reference to a PyBytes object, or NULL on failure. */
    417 
    418 static char*
    419 formatfloat(PyObject *v, int flags, int prec, int type,
    420             PyObject **p_result, _PyBytesWriter *writer, char *str)
    421 {
    422     char *p;
    423     PyObject *result;
    424     double x;
    425     size_t len;
    426 
    427     x = PyFloat_AsDouble(v);
    428     if (x == -1.0 && PyErr_Occurred()) {
    429         PyErr_Format(PyExc_TypeError, "float argument required, "
    430                      "not %.200s", Py_TYPE(v)->tp_name);
    431         return NULL;
    432     }
    433 
    434     if (prec < 0)
    435         prec = 6;
    436 
    437     p = PyOS_double_to_string(x, type, prec,
    438                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
    439 
    440     if (p == NULL)
    441         return NULL;
    442 
    443     len = strlen(p);
    444     if (writer != NULL) {
    445         str = _PyBytesWriter_Prepare(writer, str, len);
    446         if (str == NULL)
    447             return NULL;
    448         memcpy(str, p, len);
    449         PyMem_Free(p);
    450         str += len;
    451         return str;
    452     }
    453 
    454     result = PyBytes_FromStringAndSize(p, len);
    455     PyMem_Free(p);
    456     *p_result = result;
    457     return result != NULL ? str : NULL;
    458 }
    459 
    460 static PyObject *
    461 formatlong(PyObject *v, int flags, int prec, int type)
    462 {
    463     PyObject *result, *iobj;
    464     if (type == 'i')
    465         type = 'd';
    466     if (PyLong_Check(v))
    467         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
    468     if (PyNumber_Check(v)) {
    469         /* make sure number is a type of integer for o, x, and X */
    470         if (type == 'o' || type == 'x' || type == 'X')
    471             iobj = PyNumber_Index(v);
    472         else
    473             iobj = PyNumber_Long(v);
    474         if (iobj == NULL) {
    475             if (!PyErr_ExceptionMatches(PyExc_TypeError))
    476                 return NULL;
    477         }
    478         else if (!PyLong_Check(iobj))
    479             Py_CLEAR(iobj);
    480         if (iobj != NULL) {
    481             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
    482             Py_DECREF(iobj);
    483             return result;
    484         }
    485     }
    486     PyErr_Format(PyExc_TypeError,
    487         "%%%c format: %s is required, not %.200s", type,
    488         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
    489                                                     : "a number",
    490         Py_TYPE(v)->tp_name);
    491     return NULL;
    492 }
    493 
    494 static int
    495 byte_converter(PyObject *arg, char *p)
    496 {
    497     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
    498         *p = PyBytes_AS_STRING(arg)[0];
    499         return 1;
    500     }
    501     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
    502         *p = PyByteArray_AS_STRING(arg)[0];
    503         return 1;
    504     }
    505     else {
    506         PyObject *iobj;
    507         long ival;
    508         int overflow;
    509         /* make sure number is a type of integer */
    510         if (PyLong_Check(arg)) {
    511             ival = PyLong_AsLongAndOverflow(arg, &overflow);
    512         }
    513         else {
    514             iobj = PyNumber_Index(arg);
    515             if (iobj == NULL) {
    516                 if (!PyErr_ExceptionMatches(PyExc_TypeError))
    517                     return 0;
    518                 goto onError;
    519             }
    520             ival = PyLong_AsLongAndOverflow(iobj, &overflow);
    521             Py_DECREF(iobj);
    522         }
    523         if (!overflow && ival == -1 && PyErr_Occurred())
    524             goto onError;
    525         if (overflow || !(0 <= ival && ival <= 255)) {
    526             PyErr_SetString(PyExc_OverflowError,
    527                             "%c arg not in range(256)");
    528             return 0;
    529         }
    530         *p = (char)ival;
    531         return 1;
    532     }
    533   onError:
    534     PyErr_SetString(PyExc_TypeError,
    535         "%c requires an integer in range(256) or a single byte");
    536     return 0;
    537 }
    538 
    539 static PyObject *_PyBytes_FromBuffer(PyObject *x);
    540 
    541 static PyObject *
    542 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
    543 {
    544     PyObject *func, *result;
    545     _Py_IDENTIFIER(__bytes__);
    546     /* is it a bytes object? */
    547     if (PyBytes_Check(v)) {
    548         *pbuf = PyBytes_AS_STRING(v);
    549         *plen = PyBytes_GET_SIZE(v);
    550         Py_INCREF(v);
    551         return v;
    552     }
    553     if (PyByteArray_Check(v)) {
    554         *pbuf = PyByteArray_AS_STRING(v);
    555         *plen = PyByteArray_GET_SIZE(v);
    556         Py_INCREF(v);
    557         return v;
    558     }
    559     /* does it support __bytes__? */
    560     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
    561     if (func != NULL) {
    562         result = _PyObject_CallNoArg(func);
    563         Py_DECREF(func);
    564         if (result == NULL)
    565             return NULL;
    566         if (!PyBytes_Check(result)) {
    567             PyErr_Format(PyExc_TypeError,
    568                          "__bytes__ returned non-bytes (type %.200s)",
    569                          Py_TYPE(result)->tp_name);
    570             Py_DECREF(result);
    571             return NULL;
    572         }
    573         *pbuf = PyBytes_AS_STRING(result);
    574         *plen = PyBytes_GET_SIZE(result);
    575         return result;
    576     }
    577     /* does it support buffer protocol? */
    578     if (PyObject_CheckBuffer(v)) {
    579         /* maybe we can avoid making a copy of the buffer object here? */
    580         result = _PyBytes_FromBuffer(v);
    581         if (result == NULL)
    582             return NULL;
    583         *pbuf = PyBytes_AS_STRING(result);
    584         *plen = PyBytes_GET_SIZE(result);
    585         return result;
    586     }
    587     PyErr_Format(PyExc_TypeError,
    588                  "%%b requires a bytes-like object, "
    589                  "or an object that implements __bytes__, not '%.100s'",
    590                  Py_TYPE(v)->tp_name);
    591     return NULL;
    592 }
    593 
    594 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
    595 
    596 PyObject *
    597 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
    598                   PyObject *args, int use_bytearray)
    599 {
    600     const char *fmt;
    601     char *res;
    602     Py_ssize_t arglen, argidx;
    603     Py_ssize_t fmtcnt;
    604     int args_owned = 0;
    605     PyObject *dict = NULL;
    606     _PyBytesWriter writer;
    607 
    608     if (args == NULL) {
    609         PyErr_BadInternalCall();
    610         return NULL;
    611     }
    612     fmt = format;
    613     fmtcnt = format_len;
    614 
    615     _PyBytesWriter_Init(&writer);
    616     writer.use_bytearray = use_bytearray;
    617 
    618     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
    619     if (res == NULL)
    620         return NULL;
    621     if (!use_bytearray)
    622         writer.overallocate = 1;
    623 
    624     if (PyTuple_Check(args)) {
    625         arglen = PyTuple_GET_SIZE(args);
    626         argidx = 0;
    627     }
    628     else {
    629         arglen = -1;
    630         argidx = -2;
    631     }
    632     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
    633         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
    634         !PyByteArray_Check(args)) {
    635             dict = args;
    636     }
    637 
    638     while (--fmtcnt >= 0) {
    639         if (*fmt != '%') {
    640             Py_ssize_t len;
    641             char *pos;
    642 
    643             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
    644             if (pos != NULL)
    645                 len = pos - fmt;
    646             else
    647                 len = fmtcnt + 1;
    648             assert(len != 0);
    649 
    650             memcpy(res, fmt, len);
    651             res += len;
    652             fmt += len;
    653             fmtcnt -= (len - 1);
    654         }
    655         else {
    656             /* Got a format specifier */
    657             int flags = 0;
    658             Py_ssize_t width = -1;
    659             int prec = -1;
    660             int c = '\0';
    661             int fill;
    662             PyObject *v = NULL;
    663             PyObject *temp = NULL;
    664             const char *pbuf = NULL;
    665             int sign;
    666             Py_ssize_t len = 0;
    667             char onechar; /* For byte_converter() */
    668             Py_ssize_t alloc;
    669 #ifdef Py_DEBUG
    670             char *before;
    671 #endif
    672 
    673             fmt++;
    674             if (*fmt == '%') {
    675                 *res++ = '%';
    676                 fmt++;
    677                 fmtcnt--;
    678                 continue;
    679             }
    680             if (*fmt == '(') {
    681                 const char *keystart;
    682                 Py_ssize_t keylen;
    683                 PyObject *key;
    684                 int pcount = 1;
    685 
    686                 if (dict == NULL) {
    687                     PyErr_SetString(PyExc_TypeError,
    688                              "format requires a mapping");
    689                     goto error;
    690                 }
    691                 ++fmt;
    692                 --fmtcnt;
    693                 keystart = fmt;
    694                 /* Skip over balanced parentheses */
    695                 while (pcount > 0 && --fmtcnt >= 0) {
    696                     if (*fmt == ')')
    697                         --pcount;
    698                     else if (*fmt == '(')
    699                         ++pcount;
    700                     fmt++;
    701                 }
    702                 keylen = fmt - keystart - 1;
    703                 if (fmtcnt < 0 || pcount > 0) {
    704                     PyErr_SetString(PyExc_ValueError,
    705                                "incomplete format key");
    706                     goto error;
    707                 }
    708                 key = PyBytes_FromStringAndSize(keystart,
    709                                                  keylen);
    710                 if (key == NULL)
    711                     goto error;
    712                 if (args_owned) {
    713                     Py_DECREF(args);
    714                     args_owned = 0;
    715                 }
    716                 args = PyObject_GetItem(dict, key);
    717                 Py_DECREF(key);
    718                 if (args == NULL) {
    719                     goto error;
    720                 }
    721                 args_owned = 1;
    722                 arglen = -1;
    723                 argidx = -2;
    724             }
    725 
    726             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
    727             while (--fmtcnt >= 0) {
    728                 switch (c = *fmt++) {
    729                 case '-': flags |= F_LJUST; continue;
    730                 case '+': flags |= F_SIGN; continue;
    731                 case ' ': flags |= F_BLANK; continue;
    732                 case '#': flags |= F_ALT; continue;
    733                 case '0': flags |= F_ZERO; continue;
    734                 }
    735                 break;
    736             }
    737 
    738             /* Parse width. Example: "%10s" => width=10 */
    739             if (c == '*') {
    740                 v = getnextarg(args, arglen, &argidx);
    741                 if (v == NULL)
    742                     goto error;
    743                 if (!PyLong_Check(v)) {
    744                     PyErr_SetString(PyExc_TypeError,
    745                                     "* wants int");
    746                     goto error;
    747                 }
    748                 width = PyLong_AsSsize_t(v);
    749                 if (width == -1 && PyErr_Occurred())
    750                     goto error;
    751                 if (width < 0) {
    752                     flags |= F_LJUST;
    753                     width = -width;
    754                 }
    755                 if (--fmtcnt >= 0)
    756                     c = *fmt++;
    757             }
    758             else if (c >= 0 && isdigit(c)) {
    759                 width = c - '0';
    760                 while (--fmtcnt >= 0) {
    761                     c = Py_CHARMASK(*fmt++);
    762                     if (!isdigit(c))
    763                         break;
    764                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
    765                         PyErr_SetString(
    766                             PyExc_ValueError,
    767                             "width too big");
    768                         goto error;
    769                     }
    770                     width = width*10 + (c - '0');
    771                 }
    772             }
    773 
    774             /* Parse precision. Example: "%.3f" => prec=3 */
    775             if (c == '.') {
    776                 prec = 0;
    777                 if (--fmtcnt >= 0)
    778                     c = *fmt++;
    779                 if (c == '*') {
    780                     v = getnextarg(args, arglen, &argidx);
    781                     if (v == NULL)
    782                         goto error;
    783                     if (!PyLong_Check(v)) {
    784                         PyErr_SetString(
    785                             PyExc_TypeError,
    786                             "* wants int");
    787                         goto error;
    788                     }
    789                     prec = _PyLong_AsInt(v);
    790                     if (prec == -1 && PyErr_Occurred())
    791                         goto error;
    792                     if (prec < 0)
    793                         prec = 0;
    794                     if (--fmtcnt >= 0)
    795                         c = *fmt++;
    796                 }
    797                 else if (c >= 0 && isdigit(c)) {
    798                     prec = c - '0';
    799                     while (--fmtcnt >= 0) {
    800                         c = Py_CHARMASK(*fmt++);
    801                         if (!isdigit(c))
    802                             break;
    803                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
    804                             PyErr_SetString(
    805                                 PyExc_ValueError,
    806                                 "prec too big");
    807                             goto error;
    808                         }
    809                         prec = prec*10 + (c - '0');
    810                     }
    811                 }
    812             } /* prec */
    813             if (fmtcnt >= 0) {
    814                 if (c == 'h' || c == 'l' || c == 'L') {
    815                     if (--fmtcnt >= 0)
    816                         c = *fmt++;
    817                 }
    818             }
    819             if (fmtcnt < 0) {
    820                 PyErr_SetString(PyExc_ValueError,
    821                                 "incomplete format");
    822                 goto error;
    823             }
    824             v = getnextarg(args, arglen, &argidx);
    825             if (v == NULL)
    826                 goto error;
    827 
    828             if (fmtcnt == 0) {
    829                 /* last write: disable writer overallocation */
    830                 writer.overallocate = 0;
    831             }
    832 
    833             sign = 0;
    834             fill = ' ';
    835             switch (c) {
    836             case 'r':
    837                 // %r is only for 2/3 code; 3 only code should use %a
    838             case 'a':
    839                 temp = PyObject_ASCII(v);
    840                 if (temp == NULL)
    841                     goto error;
    842                 assert(PyUnicode_IS_ASCII(temp));
    843                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
    844                 len = PyUnicode_GET_LENGTH(temp);
    845                 if (prec >= 0 && len > prec)
    846                     len = prec;
    847                 break;
    848 
    849             case 's':
    850                 // %s is only for 2/3 code; 3 only code should use %b
    851             case 'b':
    852                 temp = format_obj(v, &pbuf, &len);
    853                 if (temp == NULL)
    854                     goto error;
    855                 if (prec >= 0 && len > prec)
    856                     len = prec;
    857                 break;
    858 
    859             case 'i':
    860             case 'd':
    861             case 'u':
    862             case 'o':
    863             case 'x':
    864             case 'X':
    865                 if (PyLong_CheckExact(v)
    866                     && width == -1 && prec == -1
    867                     && !(flags & (F_SIGN | F_BLANK))
    868                     && c != 'X')
    869                 {
    870                     /* Fast path */
    871                     int alternate = flags & F_ALT;
    872                     int base;
    873 
    874                     switch(c)
    875                     {
    876                         default:
    877                             Py_UNREACHABLE();
    878                         case 'd':
    879                         case 'i':
    880                         case 'u':
    881                             base = 10;
    882                             break;
    883                         case 'o':
    884                             base = 8;
    885                             break;
    886                         case 'x':
    887                         case 'X':
    888                             base = 16;
    889                             break;
    890                     }
    891 
    892                     /* Fast path */
    893                     writer.min_size -= 2; /* size preallocated for "%d" */
    894                     res = _PyLong_FormatBytesWriter(&writer, res,
    895                                                     v, base, alternate);
    896                     if (res == NULL)
    897                         goto error;
    898                     continue;
    899                 }
    900 
    901                 temp = formatlong(v, flags, prec, c);
    902                 if (!temp)
    903                     goto error;
    904                 assert(PyUnicode_IS_ASCII(temp));
    905                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
    906                 len = PyUnicode_GET_LENGTH(temp);
    907                 sign = 1;
    908                 if (flags & F_ZERO)
    909                     fill = '0';
    910                 break;
    911 
    912             case 'e':
    913             case 'E':
    914             case 'f':
    915             case 'F':
    916             case 'g':
    917             case 'G':
    918                 if (width == -1 && prec == -1
    919                     && !(flags & (F_SIGN | F_BLANK)))
    920                 {
    921                     /* Fast path */
    922                     writer.min_size -= 2; /* size preallocated for "%f" */
    923                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
    924                     if (res == NULL)
    925                         goto error;
    926                     continue;
    927                 }
    928 
    929                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
    930                     goto error;
    931                 pbuf = PyBytes_AS_STRING(temp);
    932                 len = PyBytes_GET_SIZE(temp);
    933                 sign = 1;
    934                 if (flags & F_ZERO)
    935                     fill = '0';
    936                 break;
    937 
    938             case 'c':
    939                 pbuf = &onechar;
    940                 len = byte_converter(v, &onechar);
    941                 if (!len)
    942                     goto error;
    943                 if (width == -1) {
    944                     /* Fast path */
    945                     *res++ = onechar;
    946                     continue;
    947                 }
    948                 break;
    949 
    950             default:
    951                 PyErr_Format(PyExc_ValueError,
    952                   "unsupported format character '%c' (0x%x) "
    953                   "at index %zd",
    954                   c, c,
    955                   (Py_ssize_t)(fmt - 1 - format));
    956                 goto error;
    957             }
    958 
    959             if (sign) {
    960                 if (*pbuf == '-' || *pbuf == '+') {
    961                     sign = *pbuf++;
    962                     len--;
    963                 }
    964                 else if (flags & F_SIGN)
    965                     sign = '+';
    966                 else if (flags & F_BLANK)
    967                     sign = ' ';
    968                 else
    969                     sign = 0;
    970             }
    971             if (width < len)
    972                 width = len;
    973 
    974             alloc = width;
    975             if (sign != 0 && len == width)
    976                 alloc++;
    977             /* 2: size preallocated for %s */
    978             if (alloc > 2) {
    979                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
    980                 if (res == NULL)
    981                     goto error;
    982             }
    983 #ifdef Py_DEBUG
    984             before = res;
    985 #endif
    986 
    987             /* Write the sign if needed */
    988             if (sign) {
    989                 if (fill != ' ')
    990                     *res++ = sign;
    991                 if (width > len)
    992                     width--;
    993             }
    994 
    995             /* Write the numeric prefix for "x", "X" and "o" formats
    996                if the alternate form is used.
    997                For example, write "0x" for the "%#x" format. */
    998             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
    999                 assert(pbuf[0] == '0');
   1000                 assert(pbuf[1] == c);
   1001                 if (fill != ' ') {
   1002                     *res++ = *pbuf++;
   1003                     *res++ = *pbuf++;
   1004                 }
   1005                 width -= 2;
   1006                 if (width < 0)
   1007                     width = 0;
   1008                 len -= 2;
   1009             }
   1010 
   1011             /* Pad left with the fill character if needed */
   1012             if (width > len && !(flags & F_LJUST)) {
   1013                 memset(res, fill, width - len);
   1014                 res += (width - len);
   1015                 width = len;
   1016             }
   1017 
   1018             /* If padding with spaces: write sign if needed and/or numeric
   1019                prefix if the alternate form is used */
   1020             if (fill == ' ') {
   1021                 if (sign)
   1022                     *res++ = sign;
   1023                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
   1024                     assert(pbuf[0] == '0');
   1025                     assert(pbuf[1] == c);
   1026                     *res++ = *pbuf++;
   1027                     *res++ = *pbuf++;
   1028                 }
   1029             }
   1030 
   1031             /* Copy bytes */
   1032             memcpy(res, pbuf, len);
   1033             res += len;
   1034 
   1035             /* Pad right with the fill character if needed */
   1036             if (width > len) {
   1037                 memset(res, ' ', width - len);
   1038                 res += (width - len);
   1039             }
   1040 
   1041             if (dict && (argidx < arglen)) {
   1042                 PyErr_SetString(PyExc_TypeError,
   1043                            "not all arguments converted during bytes formatting");
   1044                 Py_XDECREF(temp);
   1045                 goto error;
   1046             }
   1047             Py_XDECREF(temp);
   1048 
   1049 #ifdef Py_DEBUG
   1050             /* check that we computed the exact size for this write */
   1051             assert((res - before) == alloc);
   1052 #endif
   1053         } /* '%' */
   1054 
   1055         /* If overallocation was disabled, ensure that it was the last
   1056            write. Otherwise, we missed an optimization */
   1057         assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
   1058     } /* until end */
   1059 
   1060     if (argidx < arglen && !dict) {
   1061         PyErr_SetString(PyExc_TypeError,
   1062                         "not all arguments converted during bytes formatting");
   1063         goto error;
   1064     }
   1065 
   1066     if (args_owned) {
   1067         Py_DECREF(args);
   1068     }
   1069     return _PyBytesWriter_Finish(&writer, res);
   1070 
   1071  error:
   1072     _PyBytesWriter_Dealloc(&writer);
   1073     if (args_owned) {
   1074         Py_DECREF(args);
   1075     }
   1076     return NULL;
   1077 }
   1078 
   1079 /* =-= */
   1080 
   1081 static void
   1082 bytes_dealloc(PyObject *op)
   1083 {
   1084     Py_TYPE(op)->tp_free(op);
   1085 }
   1086 
   1087 /* Unescape a backslash-escaped string. If unicode is non-zero,
   1088    the string is a u-literal. If recode_encoding is non-zero,
   1089    the string is UTF-8 encoded and should be re-encoded in the
   1090    specified encoding.  */
   1091 
   1092 static char *
   1093 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
   1094                             const char *errors, const char *recode_encoding,
   1095                             _PyBytesWriter *writer, char *p)
   1096 {
   1097     PyObject *u, *w;
   1098     const char* t;
   1099 
   1100     t = *s;
   1101     /* Decode non-ASCII bytes as UTF-8. */
   1102     while (t < end && (*t & 0x80))
   1103         t++;
   1104     u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
   1105     if (u == NULL)
   1106         return NULL;
   1107 
   1108     /* Recode them in target encoding. */
   1109     w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
   1110     Py_DECREF(u);
   1111     if  (w == NULL)
   1112         return NULL;
   1113     assert(PyBytes_Check(w));
   1114 
   1115     /* Append bytes to output buffer. */
   1116     writer->min_size--;   /* subtract 1 preallocated byte */
   1117     p = _PyBytesWriter_WriteBytes(writer, p,
   1118                                   PyBytes_AS_STRING(w),
   1119                                   PyBytes_GET_SIZE(w));
   1120     Py_DECREF(w);
   1121     if (p == NULL)
   1122         return NULL;
   1123 
   1124     *s = t;
   1125     return p;
   1126 }
   1127 
   1128 PyObject *_PyBytes_DecodeEscape(const char *s,
   1129                                 Py_ssize_t len,
   1130                                 const char *errors,
   1131                                 Py_ssize_t unicode,
   1132                                 const char *recode_encoding,
   1133                                 const char **first_invalid_escape)
   1134 {
   1135     int c;
   1136     char *p;
   1137     const char *end;
   1138     _PyBytesWriter writer;
   1139 
   1140     _PyBytesWriter_Init(&writer);
   1141 
   1142     p = _PyBytesWriter_Alloc(&writer, len);
   1143     if (p == NULL)
   1144         return NULL;
   1145     writer.overallocate = 1;
   1146 
   1147     *first_invalid_escape = NULL;
   1148 
   1149     end = s + len;
   1150     while (s < end) {
   1151         if (*s != '\\') {
   1152           non_esc:
   1153             if (!(recode_encoding && (*s & 0x80))) {
   1154                 *p++ = *s++;
   1155             }
   1156             else {
   1157                 /* non-ASCII character and need to recode */
   1158                 p = _PyBytes_DecodeEscapeRecode(&s, end,
   1159                                                 errors, recode_encoding,
   1160                                                 &writer, p);
   1161                 if (p == NULL)
   1162                     goto failed;
   1163             }
   1164             continue;
   1165         }
   1166 
   1167         s++;
   1168         if (s == end) {
   1169             PyErr_SetString(PyExc_ValueError,
   1170                             "Trailing \\ in string");
   1171             goto failed;
   1172         }
   1173 
   1174         switch (*s++) {
   1175         /* XXX This assumes ASCII! */
   1176         case '\n': break;
   1177         case '\\': *p++ = '\\'; break;
   1178         case '\'': *p++ = '\''; break;
   1179         case '\"': *p++ = '\"'; break;
   1180         case 'b': *p++ = '\b'; break;
   1181         case 'f': *p++ = '\014'; break; /* FF */
   1182         case 't': *p++ = '\t'; break;
   1183         case 'n': *p++ = '\n'; break;
   1184         case 'r': *p++ = '\r'; break;
   1185         case 'v': *p++ = '\013'; break; /* VT */
   1186         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
   1187         case '0': case '1': case '2': case '3':
   1188         case '4': case '5': case '6': case '7':
   1189             c = s[-1] - '0';
   1190             if (s < end && '0' <= *s && *s <= '7') {
   1191                 c = (c<<3) + *s++ - '0';
   1192                 if (s < end && '0' <= *s && *s <= '7')
   1193                     c = (c<<3) + *s++ - '0';
   1194             }
   1195             *p++ = c;
   1196             break;
   1197         case 'x':
   1198             if (s+1 < end) {
   1199                 int digit1, digit2;
   1200                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
   1201                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
   1202                 if (digit1 < 16 && digit2 < 16) {
   1203                     *p++ = (unsigned char)((digit1 << 4) + digit2);
   1204                     s += 2;
   1205                     break;
   1206                 }
   1207             }
   1208             /* invalid hexadecimal digits */
   1209 
   1210             if (!errors || strcmp(errors, "strict") == 0) {
   1211                 PyErr_Format(PyExc_ValueError,
   1212                              "invalid \\x escape at position %d",
   1213                              s - 2 - (end - len));
   1214                 goto failed;
   1215             }
   1216             if (strcmp(errors, "replace") == 0) {
   1217                 *p++ = '?';
   1218             } else if (strcmp(errors, "ignore") == 0)
   1219                 /* do nothing */;
   1220             else {
   1221                 PyErr_Format(PyExc_ValueError,
   1222                              "decoding error; unknown "
   1223                              "error handling code: %.400s",
   1224                              errors);
   1225                 goto failed;
   1226             }
   1227             /* skip \x */
   1228             if (s < end && Py_ISXDIGIT(s[0]))
   1229                 s++; /* and a hexdigit */
   1230             break;
   1231 
   1232         default:
   1233             if (*first_invalid_escape == NULL) {
   1234                 *first_invalid_escape = s-1; /* Back up one char, since we've
   1235                                                 already incremented s. */
   1236             }
   1237             *p++ = '\\';
   1238             s--;
   1239             goto non_esc; /* an arbitrary number of unescaped
   1240                              UTF-8 bytes may follow. */
   1241         }
   1242     }
   1243 
   1244     return _PyBytesWriter_Finish(&writer, p);
   1245 
   1246   failed:
   1247     _PyBytesWriter_Dealloc(&writer);
   1248     return NULL;
   1249 }
   1250 
   1251 PyObject *PyBytes_DecodeEscape(const char *s,
   1252                                 Py_ssize_t len,
   1253                                 const char *errors,
   1254                                 Py_ssize_t unicode,
   1255                                 const char *recode_encoding)
   1256 {
   1257     const char* first_invalid_escape;
   1258     PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
   1259                                              recode_encoding,
   1260                                              &first_invalid_escape);
   1261     if (result == NULL)
   1262         return NULL;
   1263     if (first_invalid_escape != NULL) {
   1264         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
   1265                              "invalid escape sequence '\\%c'",
   1266                              (unsigned char)*first_invalid_escape) < 0) {
   1267             Py_DECREF(result);
   1268             return NULL;
   1269         }
   1270     }
   1271     return result;
   1272 
   1273 }
   1274 /* -------------------------------------------------------------------- */
   1275 /* object api */
   1276 
   1277 Py_ssize_t
   1278 PyBytes_Size(PyObject *op)
   1279 {
   1280     if (!PyBytes_Check(op)) {
   1281         PyErr_Format(PyExc_TypeError,
   1282              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
   1283         return -1;
   1284     }
   1285     return Py_SIZE(op);
   1286 }
   1287 
   1288 char *
   1289 PyBytes_AsString(PyObject *op)
   1290 {
   1291     if (!PyBytes_Check(op)) {
   1292         PyErr_Format(PyExc_TypeError,
   1293              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
   1294         return NULL;
   1295     }
   1296     return ((PyBytesObject *)op)->ob_sval;
   1297 }
   1298 
   1299 int
   1300 PyBytes_AsStringAndSize(PyObject *obj,
   1301                          char **s,
   1302                          Py_ssize_t *len)
   1303 {
   1304     if (s == NULL) {
   1305         PyErr_BadInternalCall();
   1306         return -1;
   1307     }
   1308 
   1309     if (!PyBytes_Check(obj)) {
   1310         PyErr_Format(PyExc_TypeError,
   1311              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
   1312         return -1;
   1313     }
   1314 
   1315     *s = PyBytes_AS_STRING(obj);
   1316     if (len != NULL)
   1317         *len = PyBytes_GET_SIZE(obj);
   1318     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
   1319         PyErr_SetString(PyExc_ValueError,
   1320                         "embedded null byte");
   1321         return -1;
   1322     }
   1323     return 0;
   1324 }
   1325 
   1326 /* -------------------------------------------------------------------- */
   1327 /* Methods */
   1328 
   1329 #include "stringlib/stringdefs.h"
   1330 
   1331 #include "stringlib/fastsearch.h"
   1332 #include "stringlib/count.h"
   1333 #include "stringlib/find.h"
   1334 #include "stringlib/join.h"
   1335 #include "stringlib/partition.h"
   1336 #include "stringlib/split.h"
   1337 #include "stringlib/ctype.h"
   1338 
   1339 #include "stringlib/transmogrify.h"
   1340 
   1341 PyObject *
   1342 PyBytes_Repr(PyObject *obj, int smartquotes)
   1343 {
   1344     PyBytesObject* op = (PyBytesObject*) obj;
   1345     Py_ssize_t i, length = Py_SIZE(op);
   1346     Py_ssize_t newsize, squotes, dquotes;
   1347     PyObject *v;
   1348     unsigned char quote, *s, *p;
   1349 
   1350     /* Compute size of output string */
   1351     squotes = dquotes = 0;
   1352     newsize = 3; /* b'' */
   1353     s = (unsigned char*)op->ob_sval;
   1354     for (i = 0; i < length; i++) {
   1355         Py_ssize_t incr = 1;
   1356         switch(s[i]) {
   1357         case '\'': squotes++; break;
   1358         case '"':  dquotes++; break;
   1359         case '\\': case '\t': case '\n': case '\r':
   1360             incr = 2; break; /* \C */
   1361         default:
   1362             if (s[i] < ' ' || s[i] >= 0x7f)
   1363                 incr = 4; /* \xHH */
   1364         }
   1365         if (newsize > PY_SSIZE_T_MAX - incr)
   1366             goto overflow;
   1367         newsize += incr;
   1368     }
   1369     quote = '\'';
   1370     if (smartquotes && squotes && !dquotes)
   1371         quote = '"';
   1372     if (squotes && quote == '\'') {
   1373         if (newsize > PY_SSIZE_T_MAX - squotes)
   1374             goto overflow;
   1375         newsize += squotes;
   1376     }
   1377 
   1378     v = PyUnicode_New(newsize, 127);
   1379     if (v == NULL) {
   1380         return NULL;
   1381     }
   1382     p = PyUnicode_1BYTE_DATA(v);
   1383 
   1384     *p++ = 'b', *p++ = quote;
   1385     for (i = 0; i < length; i++) {
   1386         unsigned char c = op->ob_sval[i];
   1387         if (c == quote || c == '\\')
   1388             *p++ = '\\', *p++ = c;
   1389         else if (c == '\t')
   1390             *p++ = '\\', *p++ = 't';
   1391         else if (c == '\n')
   1392             *p++ = '\\', *p++ = 'n';
   1393         else if (c == '\r')
   1394             *p++ = '\\', *p++ = 'r';
   1395         else if (c < ' ' || c >= 0x7f) {
   1396             *p++ = '\\';
   1397             *p++ = 'x';
   1398             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
   1399             *p++ = Py_hexdigits[c & 0xf];
   1400         }
   1401         else
   1402             *p++ = c;
   1403     }
   1404     *p++ = quote;
   1405     assert(_PyUnicode_CheckConsistency(v, 1));
   1406     return v;
   1407 
   1408   overflow:
   1409     PyErr_SetString(PyExc_OverflowError,
   1410                     "bytes object is too large to make repr");
   1411     return NULL;
   1412 }
   1413 
   1414 static PyObject *
   1415 bytes_repr(PyObject *op)
   1416 {
   1417     return PyBytes_Repr(op, 1);
   1418 }
   1419 
   1420 static PyObject *
   1421 bytes_str(PyObject *op)
   1422 {
   1423     if (Py_BytesWarningFlag) {
   1424         if (PyErr_WarnEx(PyExc_BytesWarning,
   1425                          "str() on a bytes instance", 1))
   1426             return NULL;
   1427     }
   1428     return bytes_repr(op);
   1429 }
   1430 
   1431 static Py_ssize_t
   1432 bytes_length(PyBytesObject *a)
   1433 {
   1434     return Py_SIZE(a);
   1435 }
   1436 
   1437 /* This is also used by PyBytes_Concat() */
   1438 static PyObject *
   1439 bytes_concat(PyObject *a, PyObject *b)
   1440 {
   1441     Py_buffer va, vb;
   1442     PyObject *result = NULL;
   1443 
   1444     va.len = -1;
   1445     vb.len = -1;
   1446     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
   1447         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
   1448         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
   1449                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
   1450         goto done;
   1451     }
   1452 
   1453     /* Optimize end cases */
   1454     if (va.len == 0 && PyBytes_CheckExact(b)) {
   1455         result = b;
   1456         Py_INCREF(result);
   1457         goto done;
   1458     }
   1459     if (vb.len == 0 && PyBytes_CheckExact(a)) {
   1460         result = a;
   1461         Py_INCREF(result);
   1462         goto done;
   1463     }
   1464 
   1465     if (va.len > PY_SSIZE_T_MAX - vb.len) {
   1466         PyErr_NoMemory();
   1467         goto done;
   1468     }
   1469 
   1470     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
   1471     if (result != NULL) {
   1472         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
   1473         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
   1474     }
   1475 
   1476   done:
   1477     if (va.len != -1)
   1478         PyBuffer_Release(&va);
   1479     if (vb.len != -1)
   1480         PyBuffer_Release(&vb);
   1481     return result;
   1482 }
   1483 
   1484 static PyObject *
   1485 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
   1486 {
   1487     Py_ssize_t i;
   1488     Py_ssize_t j;
   1489     Py_ssize_t size;
   1490     PyBytesObject *op;
   1491     size_t nbytes;
   1492     if (n < 0)
   1493         n = 0;
   1494     /* watch out for overflows:  the size can overflow int,
   1495      * and the # of bytes needed can overflow size_t
   1496      */
   1497     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
   1498         PyErr_SetString(PyExc_OverflowError,
   1499             "repeated bytes are too long");
   1500         return NULL;
   1501     }
   1502     size = Py_SIZE(a) * n;
   1503     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
   1504         Py_INCREF(a);
   1505         return (PyObject *)a;
   1506     }
   1507     nbytes = (size_t)size;
   1508     if (nbytes + PyBytesObject_SIZE <= nbytes) {
   1509         PyErr_SetString(PyExc_OverflowError,
   1510             "repeated bytes are too long");
   1511         return NULL;
   1512     }
   1513     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
   1514     if (op == NULL)
   1515         return PyErr_NoMemory();
   1516     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
   1517     op->ob_shash = -1;
   1518     op->ob_sval[size] = '\0';
   1519     if (Py_SIZE(a) == 1 && n > 0) {
   1520         memset(op->ob_sval, a->ob_sval[0] , n);
   1521         return (PyObject *) op;
   1522     }
   1523     i = 0;
   1524     if (i < size) {
   1525         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
   1526         i = Py_SIZE(a);
   1527     }
   1528     while (i < size) {
   1529         j = (i <= size-i)  ?  i  :  size-i;
   1530         memcpy(op->ob_sval+i, op->ob_sval, j);
   1531         i += j;
   1532     }
   1533     return (PyObject *) op;
   1534 }
   1535 
   1536 static int
   1537 bytes_contains(PyObject *self, PyObject *arg)
   1538 {
   1539     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
   1540 }
   1541 
   1542 static PyObject *
   1543 bytes_item(PyBytesObject *a, Py_ssize_t i)
   1544 {
   1545     if (i < 0 || i >= Py_SIZE(a)) {
   1546         PyErr_SetString(PyExc_IndexError, "index out of range");
   1547         return NULL;
   1548     }
   1549     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
   1550 }
   1551 
   1552 static int
   1553 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
   1554 {
   1555     int cmp;
   1556     Py_ssize_t len;
   1557 
   1558     len = Py_SIZE(a);
   1559     if (Py_SIZE(b) != len)
   1560         return 0;
   1561 
   1562     if (a->ob_sval[0] != b->ob_sval[0])
   1563         return 0;
   1564 
   1565     cmp = memcmp(a->ob_sval, b->ob_sval, len);
   1566     return (cmp == 0);
   1567 }
   1568 
   1569 static PyObject*
   1570 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
   1571 {
   1572     int c;
   1573     Py_ssize_t len_a, len_b;
   1574     Py_ssize_t min_len;
   1575     int rc;
   1576 
   1577     /* Make sure both arguments are strings. */
   1578     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
   1579         if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
   1580             rc = PyObject_IsInstance((PyObject*)a,
   1581                                      (PyObject*)&PyUnicode_Type);
   1582             if (!rc)
   1583                 rc = PyObject_IsInstance((PyObject*)b,
   1584                                          (PyObject*)&PyUnicode_Type);
   1585             if (rc < 0)
   1586                 return NULL;
   1587             if (rc) {
   1588                 if (PyErr_WarnEx(PyExc_BytesWarning,
   1589                                  "Comparison between bytes and string", 1))
   1590                     return NULL;
   1591             }
   1592             else {
   1593                 rc = PyObject_IsInstance((PyObject*)a,
   1594                                          (PyObject*)&PyLong_Type);
   1595                 if (!rc)
   1596                     rc = PyObject_IsInstance((PyObject*)b,
   1597                                              (PyObject*)&PyLong_Type);
   1598                 if (rc < 0)
   1599                     return NULL;
   1600                 if (rc) {
   1601                     if (PyErr_WarnEx(PyExc_BytesWarning,
   1602                                      "Comparison between bytes and int", 1))
   1603                         return NULL;
   1604                 }
   1605             }
   1606         }
   1607         Py_RETURN_NOTIMPLEMENTED;
   1608     }
   1609     else if (a == b) {
   1610         switch (op) {
   1611         case Py_EQ:
   1612         case Py_LE:
   1613         case Py_GE:
   1614             /* a string is equal to itself */
   1615             Py_RETURN_TRUE;
   1616             break;
   1617         case Py_NE:
   1618         case Py_LT:
   1619         case Py_GT:
   1620             Py_RETURN_FALSE;
   1621             break;
   1622         default:
   1623             PyErr_BadArgument();
   1624             return NULL;
   1625         }
   1626     }
   1627     else if (op == Py_EQ || op == Py_NE) {
   1628         int eq = bytes_compare_eq(a, b);
   1629         eq ^= (op == Py_NE);
   1630         return PyBool_FromLong(eq);
   1631     }
   1632     else {
   1633         len_a = Py_SIZE(a);
   1634         len_b = Py_SIZE(b);
   1635         min_len = Py_MIN(len_a, len_b);
   1636         if (min_len > 0) {
   1637             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
   1638             if (c == 0)
   1639                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
   1640         }
   1641         else
   1642             c = 0;
   1643         if (c != 0)
   1644             Py_RETURN_RICHCOMPARE(c, 0, op);
   1645         Py_RETURN_RICHCOMPARE(len_a, len_b, op);
   1646     }
   1647 }
   1648 
   1649 static Py_hash_t
   1650 bytes_hash(PyBytesObject *a)
   1651 {
   1652     if (a->ob_shash == -1) {
   1653         /* Can't fail */
   1654         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
   1655     }
   1656     return a->ob_shash;
   1657 }
   1658 
   1659 static PyObject*
   1660 bytes_subscript(PyBytesObject* self, PyObject* item)
   1661 {
   1662     if (PyIndex_Check(item)) {
   1663         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
   1664         if (i == -1 && PyErr_Occurred())
   1665             return NULL;
   1666         if (i < 0)
   1667             i += PyBytes_GET_SIZE(self);
   1668         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
   1669             PyErr_SetString(PyExc_IndexError,
   1670                             "index out of range");
   1671             return NULL;
   1672         }
   1673         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
   1674     }
   1675     else if (PySlice_Check(item)) {
   1676         Py_ssize_t start, stop, step, slicelength, cur, i;
   1677         char* source_buf;
   1678         char* result_buf;
   1679         PyObject* result;
   1680 
   1681         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
   1682             return NULL;
   1683         }
   1684         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
   1685                                             &stop, step);
   1686 
   1687         if (slicelength <= 0) {
   1688             return PyBytes_FromStringAndSize("", 0);
   1689         }
   1690         else if (start == 0 && step == 1 &&
   1691                  slicelength == PyBytes_GET_SIZE(self) &&
   1692                  PyBytes_CheckExact(self)) {
   1693             Py_INCREF(self);
   1694             return (PyObject *)self;
   1695         }
   1696         else if (step == 1) {
   1697             return PyBytes_FromStringAndSize(
   1698                 PyBytes_AS_STRING(self) + start,
   1699                 slicelength);
   1700         }
   1701         else {
   1702             source_buf = PyBytes_AS_STRING(self);
   1703             result = PyBytes_FromStringAndSize(NULL, slicelength);
   1704             if (result == NULL)
   1705                 return NULL;
   1706 
   1707             result_buf = PyBytes_AS_STRING(result);
   1708             for (cur = start, i = 0; i < slicelength;
   1709                  cur += step, i++) {
   1710                 result_buf[i] = source_buf[cur];
   1711             }
   1712 
   1713             return result;
   1714         }
   1715     }
   1716     else {
   1717         PyErr_Format(PyExc_TypeError,
   1718                      "byte indices must be integers or slices, not %.200s",
   1719                      Py_TYPE(item)->tp_name);
   1720         return NULL;
   1721     }
   1722 }
   1723 
   1724 static int
   1725 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
   1726 {
   1727     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
   1728                              1, flags);
   1729 }
   1730 
   1731 static PySequenceMethods bytes_as_sequence = {
   1732     (lenfunc)bytes_length, /*sq_length*/
   1733     (binaryfunc)bytes_concat, /*sq_concat*/
   1734     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
   1735     (ssizeargfunc)bytes_item, /*sq_item*/
   1736     0,                  /*sq_slice*/
   1737     0,                  /*sq_ass_item*/
   1738     0,                  /*sq_ass_slice*/
   1739     (objobjproc)bytes_contains /*sq_contains*/
   1740 };
   1741 
   1742 static PyMappingMethods bytes_as_mapping = {
   1743     (lenfunc)bytes_length,
   1744     (binaryfunc)bytes_subscript,
   1745     0,
   1746 };
   1747 
   1748 static PyBufferProcs bytes_as_buffer = {
   1749     (getbufferproc)bytes_buffer_getbuffer,
   1750     NULL,
   1751 };
   1752 
   1753 
   1754 #define LEFTSTRIP 0
   1755 #define RIGHTSTRIP 1
   1756 #define BOTHSTRIP 2
   1757 
   1758 /*[clinic input]
   1759 bytes.split
   1760 
   1761     sep: object = None
   1762         The delimiter according which to split the bytes.
   1763         None (the default value) means split on ASCII whitespace characters
   1764         (space, tab, return, newline, formfeed, vertical tab).
   1765     maxsplit: Py_ssize_t = -1
   1766         Maximum number of splits to do.
   1767         -1 (the default value) means no limit.
   1768 
   1769 Return a list of the sections in the bytes, using sep as the delimiter.
   1770 [clinic start generated code]*/
   1771 
   1772 static PyObject *
   1773 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
   1774 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
   1775 {
   1776     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
   1777     const char *s = PyBytes_AS_STRING(self), *sub;
   1778     Py_buffer vsub;
   1779     PyObject *list;
   1780 
   1781     if (maxsplit < 0)
   1782         maxsplit = PY_SSIZE_T_MAX;
   1783     if (sep == Py_None)
   1784         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
   1785     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
   1786         return NULL;
   1787     sub = vsub.buf;
   1788     n = vsub.len;
   1789 
   1790     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
   1791     PyBuffer_Release(&vsub);
   1792     return list;
   1793 }
   1794 
   1795 /*[clinic input]
   1796 bytes.partition
   1797 
   1798     sep: Py_buffer
   1799     /
   1800 
   1801 Partition the bytes into three parts using the given separator.
   1802 
   1803 This will search for the separator sep in the bytes. If the separator is found,
   1804 returns a 3-tuple containing the part before the separator, the separator
   1805 itself, and the part after it.
   1806 
   1807 If the separator is not found, returns a 3-tuple containing the original bytes
   1808 object and two empty bytes objects.
   1809 [clinic start generated code]*/
   1810 
   1811 static PyObject *
   1812 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
   1813 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
   1814 {
   1815     return stringlib_partition(
   1816         (PyObject*) self,
   1817         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
   1818         sep->obj, (const char *)sep->buf, sep->len
   1819         );
   1820 }
   1821 
   1822 /*[clinic input]
   1823 bytes.rpartition
   1824 
   1825     sep: Py_buffer
   1826     /
   1827 
   1828 Partition the bytes into three parts using the given separator.
   1829 
   1830 This will search for the separator sep in the bytes, starting at the end. If
   1831 the separator is found, returns a 3-tuple containing the part before the
   1832 separator, the separator itself, and the part after it.
   1833 
   1834 If the separator is not found, returns a 3-tuple containing two empty bytes
   1835 objects and the original bytes object.
   1836 [clinic start generated code]*/
   1837 
   1838 static PyObject *
   1839 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
   1840 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
   1841 {
   1842     return stringlib_rpartition(
   1843         (PyObject*) self,
   1844         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
   1845         sep->obj, (const char *)sep->buf, sep->len
   1846         );
   1847 }
   1848 
   1849 /*[clinic input]
   1850 bytes.rsplit = bytes.split
   1851 
   1852 Return a list of the sections in the bytes, using sep as the delimiter.
   1853 
   1854 Splitting is done starting at the end of the bytes and working to the front.
   1855 [clinic start generated code]*/
   1856 
   1857 static PyObject *
   1858 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
   1859 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
   1860 {
   1861     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
   1862     const char *s = PyBytes_AS_STRING(self), *sub;
   1863     Py_buffer vsub;
   1864     PyObject *list;
   1865 
   1866     if (maxsplit < 0)
   1867         maxsplit = PY_SSIZE_T_MAX;
   1868     if (sep == Py_None)
   1869         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
   1870     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
   1871         return NULL;
   1872     sub = vsub.buf;
   1873     n = vsub.len;
   1874 
   1875     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
   1876     PyBuffer_Release(&vsub);
   1877     return list;
   1878 }
   1879 
   1880 
   1881 /*[clinic input]
   1882 bytes.join
   1883 
   1884     iterable_of_bytes: object
   1885     /
   1886 
   1887 Concatenate any number of bytes objects.
   1888 
   1889 The bytes whose method is called is inserted in between each pair.
   1890 
   1891 The result is returned as a new bytes object.
   1892 
   1893 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
   1894 [clinic start generated code]*/
   1895 
   1896 static PyObject *
   1897 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
   1898 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
   1899 {
   1900     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
   1901 }
   1902 
   1903 PyObject *
   1904 _PyBytes_Join(PyObject *sep, PyObject *x)
   1905 {
   1906     assert(sep != NULL && PyBytes_Check(sep));
   1907     assert(x != NULL);
   1908     return bytes_join((PyBytesObject*)sep, x);
   1909 }
   1910 
   1911 static PyObject *
   1912 bytes_find(PyBytesObject *self, PyObject *args)
   1913 {
   1914     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1915 }
   1916 
   1917 static PyObject *
   1918 bytes_index(PyBytesObject *self, PyObject *args)
   1919 {
   1920     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1921 }
   1922 
   1923 
   1924 static PyObject *
   1925 bytes_rfind(PyBytesObject *self, PyObject *args)
   1926 {
   1927     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1928 }
   1929 
   1930 
   1931 static PyObject *
   1932 bytes_rindex(PyBytesObject *self, PyObject *args)
   1933 {
   1934     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1935 }
   1936 
   1937 
   1938 Py_LOCAL_INLINE(PyObject *)
   1939 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
   1940 {
   1941     Py_buffer vsep;
   1942     char *s = PyBytes_AS_STRING(self);
   1943     Py_ssize_t len = PyBytes_GET_SIZE(self);
   1944     char *sep;
   1945     Py_ssize_t seplen;
   1946     Py_ssize_t i, j;
   1947 
   1948     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
   1949         return NULL;
   1950     sep = vsep.buf;
   1951     seplen = vsep.len;
   1952 
   1953     i = 0;
   1954     if (striptype != RIGHTSTRIP) {
   1955         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
   1956             i++;
   1957         }
   1958     }
   1959 
   1960     j = len;
   1961     if (striptype != LEFTSTRIP) {
   1962         do {
   1963             j--;
   1964         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
   1965         j++;
   1966     }
   1967 
   1968     PyBuffer_Release(&vsep);
   1969 
   1970     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
   1971         Py_INCREF(self);
   1972         return (PyObject*)self;
   1973     }
   1974     else
   1975         return PyBytes_FromStringAndSize(s+i, j-i);
   1976 }
   1977 
   1978 
   1979 Py_LOCAL_INLINE(PyObject *)
   1980 do_strip(PyBytesObject *self, int striptype)
   1981 {
   1982     char *s = PyBytes_AS_STRING(self);
   1983     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
   1984 
   1985     i = 0;
   1986     if (striptype != RIGHTSTRIP) {
   1987         while (i < len && Py_ISSPACE(s[i])) {
   1988             i++;
   1989         }
   1990     }
   1991 
   1992     j = len;
   1993     if (striptype != LEFTSTRIP) {
   1994         do {
   1995             j--;
   1996         } while (j >= i && Py_ISSPACE(s[j]));
   1997         j++;
   1998     }
   1999 
   2000     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
   2001         Py_INCREF(self);
   2002         return (PyObject*)self;
   2003     }
   2004     else
   2005         return PyBytes_FromStringAndSize(s+i, j-i);
   2006 }
   2007 
   2008 
   2009 Py_LOCAL_INLINE(PyObject *)
   2010 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
   2011 {
   2012     if (bytes != NULL && bytes != Py_None) {
   2013         return do_xstrip(self, striptype, bytes);
   2014     }
   2015     return do_strip(self, striptype);
   2016 }
   2017 
   2018 /*[clinic input]
   2019 bytes.strip
   2020 
   2021     bytes: object = None
   2022     /
   2023 
   2024 Strip leading and trailing bytes contained in the argument.
   2025 
   2026 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
   2027 [clinic start generated code]*/
   2028 
   2029 static PyObject *
   2030 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
   2031 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
   2032 {
   2033     return do_argstrip(self, BOTHSTRIP, bytes);
   2034 }
   2035 
   2036 /*[clinic input]
   2037 bytes.lstrip
   2038 
   2039     bytes: object = None
   2040     /
   2041 
   2042 Strip leading bytes contained in the argument.
   2043 
   2044 If the argument is omitted or None, strip leading  ASCII whitespace.
   2045 [clinic start generated code]*/
   2046 
   2047 static PyObject *
   2048 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
   2049 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
   2050 {
   2051     return do_argstrip(self, LEFTSTRIP, bytes);
   2052 }
   2053 
   2054 /*[clinic input]
   2055 bytes.rstrip
   2056 
   2057     bytes: object = None
   2058     /
   2059 
   2060 Strip trailing bytes contained in the argument.
   2061 
   2062 If the argument is omitted or None, strip trailing ASCII whitespace.
   2063 [clinic start generated code]*/
   2064 
   2065 static PyObject *
   2066 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
   2067 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
   2068 {
   2069     return do_argstrip(self, RIGHTSTRIP, bytes);
   2070 }
   2071 
   2072 
   2073 static PyObject *
   2074 bytes_count(PyBytesObject *self, PyObject *args)
   2075 {
   2076     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   2077 }
   2078 
   2079 
   2080 /*[clinic input]
   2081 bytes.translate
   2082 
   2083     table: object
   2084         Translation table, which must be a bytes object of length 256.
   2085     /
   2086     delete as deletechars: object(c_default="NULL") = b''
   2087 
   2088 Return a copy with each character mapped by the given translation table.
   2089 
   2090 All characters occurring in the optional argument delete are removed.
   2091 The remaining characters are mapped through the given translation table.
   2092 [clinic start generated code]*/
   2093 
   2094 static PyObject *
   2095 bytes_translate_impl(PyBytesObject *self, PyObject *table,
   2096                      PyObject *deletechars)
   2097 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
   2098 {
   2099     char *input, *output;
   2100     Py_buffer table_view = {NULL, NULL};
   2101     Py_buffer del_table_view = {NULL, NULL};
   2102     const char *table_chars;
   2103     Py_ssize_t i, c, changed = 0;
   2104     PyObject *input_obj = (PyObject*)self;
   2105     const char *output_start, *del_table_chars=NULL;
   2106     Py_ssize_t inlen, tablen, dellen = 0;
   2107     PyObject *result;
   2108     int trans_table[256];
   2109 
   2110     if (PyBytes_Check(table)) {
   2111         table_chars = PyBytes_AS_STRING(table);
   2112         tablen = PyBytes_GET_SIZE(table);
   2113     }
   2114     else if (table == Py_None) {
   2115         table_chars = NULL;
   2116         tablen = 256;
   2117     }
   2118     else {
   2119         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
   2120             return NULL;
   2121         table_chars = table_view.buf;
   2122         tablen = table_view.len;
   2123     }
   2124 
   2125     if (tablen != 256) {
   2126         PyErr_SetString(PyExc_ValueError,
   2127           "translation table must be 256 characters long");
   2128         PyBuffer_Release(&table_view);
   2129         return NULL;
   2130     }
   2131 
   2132     if (deletechars != NULL) {
   2133         if (PyBytes_Check(deletechars)) {
   2134             del_table_chars = PyBytes_AS_STRING(deletechars);
   2135             dellen = PyBytes_GET_SIZE(deletechars);
   2136         }
   2137         else {
   2138             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
   2139                 PyBuffer_Release(&table_view);
   2140                 return NULL;
   2141             }
   2142             del_table_chars = del_table_view.buf;
   2143             dellen = del_table_view.len;
   2144         }
   2145     }
   2146     else {
   2147         del_table_chars = NULL;
   2148         dellen = 0;
   2149     }
   2150 
   2151     inlen = PyBytes_GET_SIZE(input_obj);
   2152     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
   2153     if (result == NULL) {
   2154         PyBuffer_Release(&del_table_view);
   2155         PyBuffer_Release(&table_view);
   2156         return NULL;
   2157     }
   2158     output_start = output = PyBytes_AS_STRING(result);
   2159     input = PyBytes_AS_STRING(input_obj);
   2160 
   2161     if (dellen == 0 && table_chars != NULL) {
   2162         /* If no deletions are required, use faster code */
   2163         for (i = inlen; --i >= 0; ) {
   2164             c = Py_CHARMASK(*input++);
   2165             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
   2166                 changed = 1;
   2167         }
   2168         if (!changed && PyBytes_CheckExact(input_obj)) {
   2169             Py_INCREF(input_obj);
   2170             Py_DECREF(result);
   2171             result = input_obj;
   2172         }
   2173         PyBuffer_Release(&del_table_view);
   2174         PyBuffer_Release(&table_view);
   2175         return result;
   2176     }
   2177 
   2178     if (table_chars == NULL) {
   2179         for (i = 0; i < 256; i++)
   2180             trans_table[i] = Py_CHARMASK(i);
   2181     } else {
   2182         for (i = 0; i < 256; i++)
   2183             trans_table[i] = Py_CHARMASK(table_chars[i]);
   2184     }
   2185     PyBuffer_Release(&table_view);
   2186 
   2187     for (i = 0; i < dellen; i++)
   2188         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
   2189     PyBuffer_Release(&del_table_view);
   2190 
   2191     for (i = inlen; --i >= 0; ) {
   2192         c = Py_CHARMASK(*input++);
   2193         if (trans_table[c] != -1)
   2194             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
   2195                 continue;
   2196         changed = 1;
   2197     }
   2198     if (!changed && PyBytes_CheckExact(input_obj)) {
   2199         Py_DECREF(result);
   2200         Py_INCREF(input_obj);
   2201         return input_obj;
   2202     }
   2203     /* Fix the size of the resulting string */
   2204     if (inlen > 0)
   2205         _PyBytes_Resize(&result, output - output_start);
   2206     return result;
   2207 }
   2208 
   2209 
   2210 /*[clinic input]
   2211 
   2212 @staticmethod
   2213 bytes.maketrans
   2214 
   2215     frm: Py_buffer
   2216     to: Py_buffer
   2217     /
   2218 
   2219 Return a translation table useable for the bytes or bytearray translate method.
   2220 
   2221 The returned table will be one where each byte in frm is mapped to the byte at
   2222 the same position in to.
   2223 
   2224 The bytes objects frm and to must be of the same length.
   2225 [clinic start generated code]*/
   2226 
   2227 static PyObject *
   2228 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
   2229 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
   2230 {
   2231     return _Py_bytes_maketrans(frm, to);
   2232 }
   2233 
   2234 
   2235 /*[clinic input]
   2236 bytes.replace
   2237 
   2238     old: Py_buffer
   2239     new: Py_buffer
   2240     count: Py_ssize_t = -1
   2241         Maximum number of occurrences to replace.
   2242         -1 (the default value) means replace all occurrences.
   2243     /
   2244 
   2245 Return a copy with all occurrences of substring old replaced by new.
   2246 
   2247 If the optional argument count is given, only the first count occurrences are
   2248 replaced.
   2249 [clinic start generated code]*/
   2250 
   2251 static PyObject *
   2252 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
   2253                    Py_ssize_t count)
   2254 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
   2255 {
   2256     return stringlib_replace((PyObject *)self,
   2257                              (const char *)old->buf, old->len,
   2258                              (const char *)new->buf, new->len, count);
   2259 }
   2260 
   2261 /** End DALKE **/
   2262 
   2263 
   2264 static PyObject *
   2265 bytes_startswith(PyBytesObject *self, PyObject *args)
   2266 {
   2267     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   2268 }
   2269 
   2270 static PyObject *
   2271 bytes_endswith(PyBytesObject *self, PyObject *args)
   2272 {
   2273     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   2274 }
   2275 
   2276 
   2277 /*[clinic input]
   2278 bytes.decode
   2279 
   2280     encoding: str(c_default="NULL") = 'utf-8'
   2281         The encoding with which to decode the bytes.
   2282     errors: str(c_default="NULL") = 'strict'
   2283         The error handling scheme to use for the handling of decoding errors.
   2284         The default is 'strict' meaning that decoding errors raise a
   2285         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
   2286         as well as any other name registered with codecs.register_error that
   2287         can handle UnicodeDecodeErrors.
   2288 
   2289 Decode the bytes using the codec registered for encoding.
   2290 [clinic start generated code]*/
   2291 
   2292 static PyObject *
   2293 bytes_decode_impl(PyBytesObject *self, const char *encoding,
   2294                   const char *errors)
   2295 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
   2296 {
   2297     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
   2298 }
   2299 
   2300 
   2301 /*[clinic input]
   2302 bytes.splitlines
   2303 
   2304     keepends: bool(accept={int}) = False
   2305 
   2306 Return a list of the lines in the bytes, breaking at line boundaries.
   2307 
   2308 Line breaks are not included in the resulting list unless keepends is given and
   2309 true.
   2310 [clinic start generated code]*/
   2311 
   2312 static PyObject *
   2313 bytes_splitlines_impl(PyBytesObject *self, int keepends)
   2314 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
   2315 {
   2316     return stringlib_splitlines(
   2317         (PyObject*) self, PyBytes_AS_STRING(self),
   2318         PyBytes_GET_SIZE(self), keepends
   2319         );
   2320 }
   2321 
   2322 /*[clinic input]
   2323 @classmethod
   2324 bytes.fromhex
   2325 
   2326     string: unicode
   2327     /
   2328 
   2329 Create a bytes object from a string of hexadecimal numbers.
   2330 
   2331 Spaces between two numbers are accepted.
   2332 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
   2333 [clinic start generated code]*/
   2334 
   2335 static PyObject *
   2336 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
   2337 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
   2338 {
   2339     PyObject *result = _PyBytes_FromHex(string, 0);
   2340     if (type != &PyBytes_Type && result != NULL) {
   2341         Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
   2342                                                        result, NULL));
   2343     }
   2344     return result;
   2345 }
   2346 
   2347 PyObject*
   2348 _PyBytes_FromHex(PyObject *string, int use_bytearray)
   2349 {
   2350     char *buf;
   2351     Py_ssize_t hexlen, invalid_char;
   2352     unsigned int top, bot;
   2353     Py_UCS1 *str, *end;
   2354     _PyBytesWriter writer;
   2355 
   2356     _PyBytesWriter_Init(&writer);
   2357     writer.use_bytearray = use_bytearray;
   2358 
   2359     assert(PyUnicode_Check(string));
   2360     if (PyUnicode_READY(string))
   2361         return NULL;
   2362     hexlen = PyUnicode_GET_LENGTH(string);
   2363 
   2364     if (!PyUnicode_IS_ASCII(string)) {
   2365         void *data = PyUnicode_DATA(string);
   2366         unsigned int kind = PyUnicode_KIND(string);
   2367         Py_ssize_t i;
   2368 
   2369         /* search for the first non-ASCII character */
   2370         for (i = 0; i < hexlen; i++) {
   2371             if (PyUnicode_READ(kind, data, i) >= 128)
   2372                 break;
   2373         }
   2374         invalid_char = i;
   2375         goto error;
   2376     }
   2377 
   2378     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
   2379     str = PyUnicode_1BYTE_DATA(string);
   2380 
   2381     /* This overestimates if there are spaces */
   2382     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
   2383     if (buf == NULL)
   2384         return NULL;
   2385 
   2386     end = str + hexlen;
   2387     while (str < end) {
   2388         /* skip over spaces in the input */
   2389         if (Py_ISSPACE(*str)) {
   2390             do {
   2391                 str++;
   2392             } while (Py_ISSPACE(*str));
   2393             if (str >= end)
   2394                 break;
   2395         }
   2396 
   2397         top = _PyLong_DigitValue[*str];
   2398         if (top >= 16) {
   2399             invalid_char = str - PyUnicode_1BYTE_DATA(string);
   2400             goto error;
   2401         }
   2402         str++;
   2403 
   2404         bot = _PyLong_DigitValue[*str];
   2405         if (bot >= 16) {
   2406             invalid_char = str - PyUnicode_1BYTE_DATA(string);
   2407             goto error;
   2408         }
   2409         str++;
   2410 
   2411         *buf++ = (unsigned char)((top << 4) + bot);
   2412     }
   2413 
   2414     return _PyBytesWriter_Finish(&writer, buf);
   2415 
   2416   error:
   2417     PyErr_Format(PyExc_ValueError,
   2418                  "non-hexadecimal number found in "
   2419                  "fromhex() arg at position %zd", invalid_char);
   2420     _PyBytesWriter_Dealloc(&writer);
   2421     return NULL;
   2422 }
   2423 
   2424 PyDoc_STRVAR(hex__doc__,
   2425 "B.hex() -> string\n\
   2426 \n\
   2427 Create a string of hexadecimal numbers from a bytes object.\n\
   2428 Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
   2429 
   2430 static PyObject *
   2431 bytes_hex(PyBytesObject *self)
   2432 {
   2433     char* argbuf = PyBytes_AS_STRING(self);
   2434     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
   2435     return _Py_strhex(argbuf, arglen);
   2436 }
   2437 
   2438 static PyObject *
   2439 bytes_getnewargs(PyBytesObject *v)
   2440 {
   2441     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
   2442 }
   2443 
   2444 
   2445 static PyMethodDef
   2446 bytes_methods[] = {
   2447     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
   2448     {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
   2449      _Py_capitalize__doc__},
   2450     {"center", (PyCFunction)stringlib_center, METH_VARARGS,
   2451      _Py_center__doc__},
   2452     {"count", (PyCFunction)bytes_count, METH_VARARGS,
   2453      _Py_count__doc__},
   2454     BYTES_DECODE_METHODDEF
   2455     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
   2456      _Py_endswith__doc__},
   2457     {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
   2458      _Py_expandtabs__doc__},
   2459     {"find", (PyCFunction)bytes_find, METH_VARARGS,
   2460      _Py_find__doc__},
   2461     BYTES_FROMHEX_METHODDEF
   2462     {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
   2463     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
   2464     {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
   2465      _Py_isalnum__doc__},
   2466     {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
   2467      _Py_isalpha__doc__},
   2468     {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
   2469      _Py_isascii__doc__},
   2470     {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
   2471      _Py_isdigit__doc__},
   2472     {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
   2473      _Py_islower__doc__},
   2474     {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
   2475      _Py_isspace__doc__},
   2476     {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
   2477      _Py_istitle__doc__},
   2478     {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
   2479      _Py_isupper__doc__},
   2480     BYTES_JOIN_METHODDEF
   2481     {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
   2482     {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
   2483     BYTES_LSTRIP_METHODDEF
   2484     BYTES_MAKETRANS_METHODDEF
   2485     BYTES_PARTITION_METHODDEF
   2486     BYTES_REPLACE_METHODDEF
   2487     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
   2488     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
   2489     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
   2490     BYTES_RPARTITION_METHODDEF
   2491     BYTES_RSPLIT_METHODDEF
   2492     BYTES_RSTRIP_METHODDEF
   2493     BYTES_SPLIT_METHODDEF
   2494     BYTES_SPLITLINES_METHODDEF
   2495     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
   2496      _Py_startswith__doc__},
   2497     BYTES_STRIP_METHODDEF
   2498     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
   2499      _Py_swapcase__doc__},
   2500     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
   2501     BYTES_TRANSLATE_METHODDEF
   2502     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
   2503     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
   2504     {NULL,     NULL}                         /* sentinel */
   2505 };
   2506 
   2507 static PyObject *
   2508 bytes_mod(PyObject *self, PyObject *arg)
   2509 {
   2510     if (!PyBytes_Check(self)) {
   2511         Py_RETURN_NOTIMPLEMENTED;
   2512     }
   2513     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
   2514                              arg, 0);
   2515 }
   2516 
   2517 static PyNumberMethods bytes_as_number = {
   2518     0,              /*nb_add*/
   2519     0,              /*nb_subtract*/
   2520     0,              /*nb_multiply*/
   2521     bytes_mod,      /*nb_remainder*/
   2522 };
   2523 
   2524 static PyObject *
   2525 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
   2526 
   2527 static PyObject *
   2528 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   2529 {
   2530     PyObject *x = NULL;
   2531     const char *encoding = NULL;
   2532     const char *errors = NULL;
   2533     PyObject *new = NULL;
   2534     PyObject *func;
   2535     Py_ssize_t size;
   2536     static char *kwlist[] = {"source", "encoding", "errors", 0};
   2537     _Py_IDENTIFIER(__bytes__);
   2538 
   2539     if (type != &PyBytes_Type)
   2540         return bytes_subtype_new(type, args, kwds);
   2541     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
   2542                                      &encoding, &errors))
   2543         return NULL;
   2544     if (x == NULL) {
   2545         if (encoding != NULL || errors != NULL) {
   2546             PyErr_SetString(PyExc_TypeError,
   2547                             "encoding or errors without sequence "
   2548                             "argument");
   2549             return NULL;
   2550         }
   2551         return PyBytes_FromStringAndSize(NULL, 0);
   2552     }
   2553 
   2554     if (encoding != NULL) {
   2555         /* Encode via the codec registry */
   2556         if (!PyUnicode_Check(x)) {
   2557             PyErr_SetString(PyExc_TypeError,
   2558                             "encoding without a string argument");
   2559             return NULL;
   2560         }
   2561         new = PyUnicode_AsEncodedString(x, encoding, errors);
   2562         if (new == NULL)
   2563             return NULL;
   2564         assert(PyBytes_Check(new));
   2565         return new;
   2566     }
   2567 
   2568     if (errors != NULL) {
   2569         PyErr_SetString(PyExc_TypeError,
   2570                         PyUnicode_Check(x) ?
   2571                         "string argument without an encoding" :
   2572                         "errors without a string argument");
   2573         return NULL;
   2574     }
   2575 
   2576     /* We'd like to call PyObject_Bytes here, but we need to check for an
   2577        integer argument before deferring to PyBytes_FromObject, something
   2578        PyObject_Bytes doesn't do. */
   2579     func = _PyObject_LookupSpecial(x, &PyId___bytes__);
   2580     if (func != NULL) {
   2581         new = _PyObject_CallNoArg(func);
   2582         Py_DECREF(func);
   2583         if (new == NULL)
   2584             return NULL;
   2585         if (!PyBytes_Check(new)) {
   2586             PyErr_Format(PyExc_TypeError,
   2587                          "__bytes__ returned non-bytes (type %.200s)",
   2588                          Py_TYPE(new)->tp_name);
   2589             Py_DECREF(new);
   2590             return NULL;
   2591         }
   2592         return new;
   2593     }
   2594     else if (PyErr_Occurred())
   2595         return NULL;
   2596 
   2597     if (PyUnicode_Check(x)) {
   2598         PyErr_SetString(PyExc_TypeError,
   2599                         "string argument without an encoding");
   2600         return NULL;
   2601     }
   2602     /* Is it an integer? */
   2603     if (PyIndex_Check(x)) {
   2604         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
   2605         if (size == -1 && PyErr_Occurred()) {
   2606             if (!PyErr_ExceptionMatches(PyExc_TypeError))
   2607                 return NULL;
   2608             PyErr_Clear();  /* fall through */
   2609         }
   2610         else {
   2611             if (size < 0) {
   2612                 PyErr_SetString(PyExc_ValueError, "negative count");
   2613                 return NULL;
   2614             }
   2615             new = _PyBytes_FromSize(size, 1);
   2616             if (new == NULL)
   2617                 return NULL;
   2618             return new;
   2619         }
   2620     }
   2621 
   2622     return PyBytes_FromObject(x);
   2623 }
   2624 
   2625 static PyObject*
   2626 _PyBytes_FromBuffer(PyObject *x)
   2627 {
   2628     PyObject *new;
   2629     Py_buffer view;
   2630 
   2631     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
   2632         return NULL;
   2633 
   2634     new = PyBytes_FromStringAndSize(NULL, view.len);
   2635     if (!new)
   2636         goto fail;
   2637     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
   2638                 &view, view.len, 'C') < 0)
   2639         goto fail;
   2640     PyBuffer_Release(&view);
   2641     return new;
   2642 
   2643 fail:
   2644     Py_XDECREF(new);
   2645     PyBuffer_Release(&view);
   2646     return NULL;
   2647 }
   2648 
   2649 static PyObject*
   2650 _PyBytes_FromList(PyObject *x)
   2651 {
   2652     Py_ssize_t i, size = PyList_GET_SIZE(x);
   2653     Py_ssize_t value;
   2654     char *str;
   2655     PyObject *item;
   2656     _PyBytesWriter writer;
   2657 
   2658     _PyBytesWriter_Init(&writer);
   2659     str = _PyBytesWriter_Alloc(&writer, size);
   2660     if (str == NULL)
   2661         return NULL;
   2662     writer.overallocate = 1;
   2663     size = writer.allocated;
   2664 
   2665     for (i = 0; i < PyList_GET_SIZE(x); i++) {
   2666         item = PyList_GET_ITEM(x, i);
   2667         Py_INCREF(item);
   2668         value = PyNumber_AsSsize_t(item, NULL);
   2669         Py_DECREF(item);
   2670         if (value == -1 && PyErr_Occurred())
   2671             goto error;
   2672 
   2673         if (value < 0 || value >= 256) {
   2674             PyErr_SetString(PyExc_ValueError,
   2675                             "bytes must be in range(0, 256)");
   2676             goto error;
   2677         }
   2678 
   2679         if (i >= size) {
   2680             str = _PyBytesWriter_Resize(&writer, str, size+1);
   2681             if (str == NULL)
   2682                 return NULL;
   2683             size = writer.allocated;
   2684         }
   2685         *str++ = (char) value;
   2686     }
   2687     return _PyBytesWriter_Finish(&writer, str);
   2688 
   2689   error:
   2690     _PyBytesWriter_Dealloc(&writer);
   2691     return NULL;
   2692 }
   2693 
   2694 static PyObject*
   2695 _PyBytes_FromTuple(PyObject *x)
   2696 {
   2697     PyObject *bytes;
   2698     Py_ssize_t i, size = PyTuple_GET_SIZE(x);
   2699     Py_ssize_t value;
   2700     char *str;
   2701     PyObject *item;
   2702 
   2703     bytes = PyBytes_FromStringAndSize(NULL, size);
   2704     if (bytes == NULL)
   2705         return NULL;
   2706     str = ((PyBytesObject *)bytes)->ob_sval;
   2707 
   2708     for (i = 0; i < size; i++) {
   2709         item = PyTuple_GET_ITEM(x, i);
   2710         value = PyNumber_AsSsize_t(item, NULL);
   2711         if (value == -1 && PyErr_Occurred())
   2712             goto error;
   2713 
   2714         if (value < 0 || value >= 256) {
   2715             PyErr_SetString(PyExc_ValueError,
   2716                             "bytes must be in range(0, 256)");
   2717             goto error;
   2718         }
   2719         *str++ = (char) value;
   2720     }
   2721     return bytes;
   2722 
   2723   error:
   2724     Py_DECREF(bytes);
   2725     return NULL;
   2726 }
   2727 
   2728 static PyObject *
   2729 _PyBytes_FromIterator(PyObject *it, PyObject *x)
   2730 {
   2731     char *str;
   2732     Py_ssize_t i, size;
   2733     _PyBytesWriter writer;
   2734 
   2735     /* For iterator version, create a string object and resize as needed */
   2736     size = PyObject_LengthHint(x, 64);
   2737     if (size == -1 && PyErr_Occurred())
   2738         return NULL;
   2739 
   2740     _PyBytesWriter_Init(&writer);
   2741     str = _PyBytesWriter_Alloc(&writer, size);
   2742     if (str == NULL)
   2743         return NULL;
   2744     writer.overallocate = 1;
   2745     size = writer.allocated;
   2746 
   2747     /* Run the iterator to exhaustion */
   2748     for (i = 0; ; i++) {
   2749         PyObject *item;
   2750         Py_ssize_t value;
   2751 
   2752         /* Get the next item */
   2753         item = PyIter_Next(it);
   2754         if (item == NULL) {
   2755             if (PyErr_Occurred())
   2756                 goto error;
   2757             break;
   2758         }
   2759 
   2760         /* Interpret it as an int (__index__) */
   2761         value = PyNumber_AsSsize_t(item, NULL);
   2762         Py_DECREF(item);
   2763         if (value == -1 && PyErr_Occurred())
   2764             goto error;
   2765 
   2766         /* Range check */
   2767         if (value < 0 || value >= 256) {
   2768             PyErr_SetString(PyExc_ValueError,
   2769                             "bytes must be in range(0, 256)");
   2770             goto error;
   2771         }
   2772 
   2773         /* Append the byte */
   2774         if (i >= size) {
   2775             str = _PyBytesWriter_Resize(&writer, str, size+1);
   2776             if (str == NULL)
   2777                 return NULL;
   2778             size = writer.allocated;
   2779         }
   2780         *str++ = (char) value;
   2781     }
   2782 
   2783     return _PyBytesWriter_Finish(&writer, str);
   2784 
   2785   error:
   2786     _PyBytesWriter_Dealloc(&writer);
   2787     return NULL;
   2788 }
   2789 
   2790 PyObject *
   2791 PyBytes_FromObject(PyObject *x)
   2792 {
   2793     PyObject *it, *result;
   2794 
   2795     if (x == NULL) {
   2796         PyErr_BadInternalCall();
   2797         return NULL;
   2798     }
   2799 
   2800     if (PyBytes_CheckExact(x)) {
   2801         Py_INCREF(x);
   2802         return x;
   2803     }
   2804 
   2805     /* Use the modern buffer interface */
   2806     if (PyObject_CheckBuffer(x))
   2807         return _PyBytes_FromBuffer(x);
   2808 
   2809     if (PyList_CheckExact(x))
   2810         return _PyBytes_FromList(x);
   2811 
   2812     if (PyTuple_CheckExact(x))
   2813         return _PyBytes_FromTuple(x);
   2814 
   2815     if (!PyUnicode_Check(x)) {
   2816         it = PyObject_GetIter(x);
   2817         if (it != NULL) {
   2818             result = _PyBytes_FromIterator(it, x);
   2819             Py_DECREF(it);
   2820             return result;
   2821         }
   2822         if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
   2823             return NULL;
   2824         }
   2825     }
   2826 
   2827     PyErr_Format(PyExc_TypeError,
   2828                  "cannot convert '%.200s' object to bytes",
   2829                  x->ob_type->tp_name);
   2830     return NULL;
   2831 }
   2832 
   2833 static PyObject *
   2834 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   2835 {
   2836     PyObject *tmp, *pnew;
   2837     Py_ssize_t n;
   2838 
   2839     assert(PyType_IsSubtype(type, &PyBytes_Type));
   2840     tmp = bytes_new(&PyBytes_Type, args, kwds);
   2841     if (tmp == NULL)
   2842         return NULL;
   2843     assert(PyBytes_Check(tmp));
   2844     n = PyBytes_GET_SIZE(tmp);
   2845     pnew = type->tp_alloc(type, n);
   2846     if (pnew != NULL) {
   2847         memcpy(PyBytes_AS_STRING(pnew),
   2848                   PyBytes_AS_STRING(tmp), n+1);
   2849         ((PyBytesObject *)pnew)->ob_shash =
   2850             ((PyBytesObject *)tmp)->ob_shash;
   2851     }
   2852     Py_DECREF(tmp);
   2853     return pnew;
   2854 }
   2855 
   2856 PyDoc_STRVAR(bytes_doc,
   2857 "bytes(iterable_of_ints) -> bytes\n\
   2858 bytes(string, encoding[, errors]) -> bytes\n\
   2859 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
   2860 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
   2861 bytes() -> empty bytes object\n\
   2862 \n\
   2863 Construct an immutable array of bytes from:\n\
   2864   - an iterable yielding integers in range(256)\n\
   2865   - a text string encoded using the specified encoding\n\
   2866   - any object implementing the buffer API.\n\
   2867   - an integer");
   2868 
   2869 static PyObject *bytes_iter(PyObject *seq);
   2870 
   2871 PyTypeObject PyBytes_Type = {
   2872     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   2873     "bytes",
   2874     PyBytesObject_SIZE,
   2875     sizeof(char),
   2876     bytes_dealloc,                      /* tp_dealloc */
   2877     0,                                          /* tp_print */
   2878     0,                                          /* tp_getattr */
   2879     0,                                          /* tp_setattr */
   2880     0,                                          /* tp_reserved */
   2881     (reprfunc)bytes_repr,                       /* tp_repr */
   2882     &bytes_as_number,                           /* tp_as_number */
   2883     &bytes_as_sequence,                         /* tp_as_sequence */
   2884     &bytes_as_mapping,                          /* tp_as_mapping */
   2885     (hashfunc)bytes_hash,                       /* tp_hash */
   2886     0,                                          /* tp_call */
   2887     bytes_str,                                  /* tp_str */
   2888     PyObject_GenericGetAttr,                    /* tp_getattro */
   2889     0,                                          /* tp_setattro */
   2890     &bytes_as_buffer,                           /* tp_as_buffer */
   2891     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
   2892         Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
   2893     bytes_doc,                                  /* tp_doc */
   2894     0,                                          /* tp_traverse */
   2895     0,                                          /* tp_clear */
   2896     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
   2897     0,                                          /* tp_weaklistoffset */
   2898     bytes_iter,                                 /* tp_iter */
   2899     0,                                          /* tp_iternext */
   2900     bytes_methods,                              /* tp_methods */
   2901     0,                                          /* tp_members */
   2902     0,                                          /* tp_getset */
   2903     &PyBaseObject_Type,                         /* tp_base */
   2904     0,                                          /* tp_dict */
   2905     0,                                          /* tp_descr_get */
   2906     0,                                          /* tp_descr_set */
   2907     0,                                          /* tp_dictoffset */
   2908     0,                                          /* tp_init */
   2909     0,                                          /* tp_alloc */
   2910     bytes_new,                                  /* tp_new */
   2911     PyObject_Del,                               /* tp_free */
   2912 };
   2913 
   2914 void
   2915 PyBytes_Concat(PyObject **pv, PyObject *w)
   2916 {
   2917     assert(pv != NULL);
   2918     if (*pv == NULL)
   2919         return;
   2920     if (w == NULL) {
   2921         Py_CLEAR(*pv);
   2922         return;
   2923     }
   2924 
   2925     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
   2926         /* Only one reference, so we can resize in place */
   2927         Py_ssize_t oldsize;
   2928         Py_buffer wb;
   2929 
   2930         wb.len = -1;
   2931         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
   2932             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
   2933                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
   2934             Py_CLEAR(*pv);
   2935             return;
   2936         }
   2937 
   2938         oldsize = PyBytes_GET_SIZE(*pv);
   2939         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
   2940             PyErr_NoMemory();
   2941             goto error;
   2942         }
   2943         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
   2944             goto error;
   2945 
   2946         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
   2947         PyBuffer_Release(&wb);
   2948         return;
   2949 
   2950       error:
   2951         PyBuffer_Release(&wb);
   2952         Py_CLEAR(*pv);
   2953         return;
   2954     }
   2955 
   2956     else {
   2957         /* Multiple references, need to create new object */
   2958         PyObject *v;
   2959         v = bytes_concat(*pv, w);
   2960         Py_SETREF(*pv, v);
   2961     }
   2962 }
   2963 
   2964 void
   2965 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
   2966 {
   2967     PyBytes_Concat(pv, w);
   2968     Py_XDECREF(w);
   2969 }
   2970 
   2971 
   2972 /* The following function breaks the notion that bytes are immutable:
   2973    it changes the size of a bytes object.  We get away with this only if there
   2974    is only one module referencing the object.  You can also think of it
   2975    as creating a new bytes object and destroying the old one, only
   2976    more efficiently.  In any case, don't use this if the bytes object may
   2977    already be known to some other part of the code...
   2978    Note that if there's not enough memory to resize the bytes object, the
   2979    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
   2980    memory" exception is set, and -1 is returned.  Else (on success) 0 is
   2981    returned, and the value in *pv may or may not be the same as on input.
   2982    As always, an extra byte is allocated for a trailing \0 byte (newsize
   2983    does *not* include that), and a trailing \0 byte is stored.
   2984 */
   2985 
   2986 int
   2987 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
   2988 {
   2989     PyObject *v;
   2990     PyBytesObject *sv;
   2991     v = *pv;
   2992     if (!PyBytes_Check(v) || newsize < 0) {
   2993         goto error;
   2994     }
   2995     if (Py_SIZE(v) == newsize) {
   2996         /* return early if newsize equals to v->ob_size */
   2997         return 0;
   2998     }
   2999     if (Py_SIZE(v) == 0) {
   3000         if (newsize == 0) {
   3001             return 0;
   3002         }
   3003         *pv = _PyBytes_FromSize(newsize, 0);
   3004         Py_DECREF(v);
   3005         return (*pv == NULL) ? -1 : 0;
   3006     }
   3007     if (Py_REFCNT(v) != 1) {
   3008         goto error;
   3009     }
   3010     if (newsize == 0) {
   3011         *pv = _PyBytes_FromSize(0, 0);
   3012         Py_DECREF(v);
   3013         return (*pv == NULL) ? -1 : 0;
   3014     }
   3015     /* XXX UNREF/NEWREF interface should be more symmetrical */
   3016     _Py_DEC_REFTOTAL;
   3017     _Py_ForgetReference(v);
   3018     *pv = (PyObject *)
   3019         PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
   3020     if (*pv == NULL) {
   3021         PyObject_Del(v);
   3022         PyErr_NoMemory();
   3023         return -1;
   3024     }
   3025     _Py_NewReference(*pv);
   3026     sv = (PyBytesObject *) *pv;
   3027     Py_SIZE(sv) = newsize;
   3028     sv->ob_sval[newsize] = '\0';
   3029     sv->ob_shash = -1;          /* invalidate cached hash value */
   3030     return 0;
   3031 error:
   3032     *pv = 0;
   3033     Py_DECREF(v);
   3034     PyErr_BadInternalCall();
   3035     return -1;
   3036 }
   3037 
   3038 void
   3039 PyBytes_Fini(void)
   3040 {
   3041     int i;
   3042     for (i = 0; i < UCHAR_MAX + 1; i++)
   3043         Py_CLEAR(characters[i]);
   3044     Py_CLEAR(nullstring);
   3045 }
   3046 
   3047 /*********************** Bytes Iterator ****************************/
   3048 
   3049 typedef struct {
   3050     PyObject_HEAD
   3051     Py_ssize_t it_index;
   3052     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
   3053 } striterobject;
   3054 
   3055 static void
   3056 striter_dealloc(striterobject *it)
   3057 {
   3058     _PyObject_GC_UNTRACK(it);
   3059     Py_XDECREF(it->it_seq);
   3060     PyObject_GC_Del(it);
   3061 }
   3062 
   3063 static int
   3064 striter_traverse(striterobject *it, visitproc visit, void *arg)
   3065 {
   3066     Py_VISIT(it->it_seq);
   3067     return 0;
   3068 }
   3069 
   3070 static PyObject *
   3071 striter_next(striterobject *it)
   3072 {
   3073     PyBytesObject *seq;
   3074     PyObject *item;
   3075 
   3076     assert(it != NULL);
   3077     seq = it->it_seq;
   3078     if (seq == NULL)
   3079         return NULL;
   3080     assert(PyBytes_Check(seq));
   3081 
   3082     if (it->it_index < PyBytes_GET_SIZE(seq)) {
   3083         item = PyLong_FromLong(
   3084             (unsigned char)seq->ob_sval[it->it_index]);
   3085         if (item != NULL)
   3086             ++it->it_index;
   3087         return item;
   3088     }
   3089 
   3090     it->it_seq = NULL;
   3091     Py_DECREF(seq);
   3092     return NULL;
   3093 }
   3094 
   3095 static PyObject *
   3096 striter_len(striterobject *it)
   3097 {
   3098     Py_ssize_t len = 0;
   3099     if (it->it_seq)
   3100         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
   3101     return PyLong_FromSsize_t(len);
   3102 }
   3103 
   3104 PyDoc_STRVAR(length_hint_doc,
   3105              "Private method returning an estimate of len(list(it)).");
   3106 
   3107 static PyObject *
   3108 striter_reduce(striterobject *it)
   3109 {
   3110     if (it->it_seq != NULL) {
   3111         return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
   3112                              it->it_seq, it->it_index);
   3113     } else {
   3114         return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
   3115     }
   3116 }
   3117 
   3118 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
   3119 
   3120 static PyObject *
   3121 striter_setstate(striterobject *it, PyObject *state)
   3122 {
   3123     Py_ssize_t index = PyLong_AsSsize_t(state);
   3124     if (index == -1 && PyErr_Occurred())
   3125         return NULL;
   3126     if (it->it_seq != NULL) {
   3127         if (index < 0)
   3128             index = 0;
   3129         else if (index > PyBytes_GET_SIZE(it->it_seq))
   3130             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
   3131         it->it_index = index;
   3132     }
   3133     Py_RETURN_NONE;
   3134 }
   3135 
   3136 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
   3137 
   3138 static PyMethodDef striter_methods[] = {
   3139     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
   3140      length_hint_doc},
   3141     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
   3142      reduce_doc},
   3143     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
   3144      setstate_doc},
   3145     {NULL,              NULL}           /* sentinel */
   3146 };
   3147 
   3148 PyTypeObject PyBytesIter_Type = {
   3149     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   3150     "bytes_iterator",                           /* tp_name */
   3151     sizeof(striterobject),                      /* tp_basicsize */
   3152     0,                                          /* tp_itemsize */
   3153     /* methods */
   3154     (destructor)striter_dealloc,                /* tp_dealloc */
   3155     0,                                          /* tp_print */
   3156     0,                                          /* tp_getattr */
   3157     0,                                          /* tp_setattr */
   3158     0,                                          /* tp_reserved */
   3159     0,                                          /* tp_repr */
   3160     0,                                          /* tp_as_number */
   3161     0,                                          /* tp_as_sequence */
   3162     0,                                          /* tp_as_mapping */
   3163     0,                                          /* tp_hash */
   3164     0,                                          /* tp_call */
   3165     0,                                          /* tp_str */
   3166     PyObject_GenericGetAttr,                    /* tp_getattro */
   3167     0,                                          /* tp_setattro */
   3168     0,                                          /* tp_as_buffer */
   3169     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
   3170     0,                                          /* tp_doc */
   3171     (traverseproc)striter_traverse,     /* tp_traverse */
   3172     0,                                          /* tp_clear */
   3173     0,                                          /* tp_richcompare */
   3174     0,                                          /* tp_weaklistoffset */
   3175     PyObject_SelfIter,                          /* tp_iter */
   3176     (iternextfunc)striter_next,                 /* tp_iternext */
   3177     striter_methods,                            /* tp_methods */
   3178     0,
   3179 };
   3180 
   3181 static PyObject *
   3182 bytes_iter(PyObject *seq)
   3183 {
   3184     striterobject *it;
   3185 
   3186     if (!PyBytes_Check(seq)) {
   3187         PyErr_BadInternalCall();
   3188         return NULL;
   3189     }
   3190     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
   3191     if (it == NULL)
   3192         return NULL;
   3193     it->it_index = 0;
   3194     Py_INCREF(seq);
   3195     it->it_seq = (PyBytesObject *)seq;
   3196     _PyObject_GC_TRACK(it);
   3197     return (PyObject *)it;
   3198 }
   3199 
   3200 
   3201 /* _PyBytesWriter API */
   3202 
   3203 #ifdef MS_WINDOWS
   3204    /* On Windows, overallocate by 50% is the best factor */
   3205 #  define OVERALLOCATE_FACTOR 2
   3206 #else
   3207    /* On Linux, overallocate by 25% is the best factor */
   3208 #  define OVERALLOCATE_FACTOR 4
   3209 #endif
   3210 
   3211 void
   3212 _PyBytesWriter_Init(_PyBytesWriter *writer)
   3213 {
   3214     /* Set all attributes before small_buffer to 0 */
   3215     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
   3216 #ifdef Py_DEBUG
   3217     memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
   3218 #endif
   3219 }
   3220 
   3221 void
   3222 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
   3223 {
   3224     Py_CLEAR(writer->buffer);
   3225 }
   3226 
   3227 Py_LOCAL_INLINE(char*)
   3228 _PyBytesWriter_AsString(_PyBytesWriter *writer)
   3229 {
   3230     if (writer->use_small_buffer) {
   3231         assert(writer->buffer == NULL);
   3232         return writer->small_buffer;
   3233     }
   3234     else if (writer->use_bytearray) {
   3235         assert(writer->buffer != NULL);
   3236         return PyByteArray_AS_STRING(writer->buffer);
   3237     }
   3238     else {
   3239         assert(writer->buffer != NULL);
   3240         return PyBytes_AS_STRING(writer->buffer);
   3241     }
   3242 }
   3243 
   3244 Py_LOCAL_INLINE(Py_ssize_t)
   3245 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
   3246 {
   3247     char *start = _PyBytesWriter_AsString(writer);
   3248     assert(str != NULL);
   3249     assert(str >= start);
   3250     assert(str - start <= writer->allocated);
   3251     return str - start;
   3252 }
   3253 
   3254 Py_LOCAL_INLINE(void)
   3255 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
   3256 {
   3257 #ifdef Py_DEBUG
   3258     char *start, *end;
   3259 
   3260     if (writer->use_small_buffer) {
   3261         assert(writer->buffer == NULL);
   3262     }
   3263     else {
   3264         assert(writer->buffer != NULL);
   3265         if (writer->use_bytearray)
   3266             assert(PyByteArray_CheckExact(writer->buffer));
   3267         else
   3268             assert(PyBytes_CheckExact(writer->buffer));
   3269         assert(Py_REFCNT(writer->buffer) == 1);
   3270     }
   3271 
   3272     if (writer->use_bytearray) {
   3273         /* bytearray has its own overallocation algorithm,
   3274            writer overallocation must be disabled */
   3275         assert(!writer->overallocate);
   3276     }
   3277 
   3278     assert(0 <= writer->allocated);
   3279     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
   3280     /* the last byte must always be null */
   3281     start = _PyBytesWriter_AsString(writer);
   3282     assert(start[writer->allocated] == 0);
   3283 
   3284     end = start + writer->allocated;
   3285     assert(str != NULL);
   3286     assert(start <= str && str <= end);
   3287 #endif
   3288 }
   3289 
   3290 void*
   3291 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
   3292 {
   3293     Py_ssize_t allocated, pos;
   3294 
   3295     _PyBytesWriter_CheckConsistency(writer, str);
   3296     assert(writer->allocated < size);
   3297 
   3298     allocated = size;
   3299     if (writer->overallocate
   3300         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
   3301         /* overallocate to limit the number of realloc() */
   3302         allocated += allocated / OVERALLOCATE_FACTOR;
   3303     }
   3304 
   3305     pos = _PyBytesWriter_GetSize(writer, str);
   3306     if (!writer->use_small_buffer) {
   3307         if (writer->use_bytearray) {
   3308             if (PyByteArray_Resize(writer->buffer, allocated))
   3309                 goto error;
   3310             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
   3311                but we cannot use ob_alloc because bytes may need to be moved
   3312                to use the whole buffer. bytearray uses an internal optimization
   3313                to avoid moving or copying bytes when bytes are removed at the
   3314                beginning (ex: del bytearray[:1]). */
   3315         }
   3316         else {
   3317             if (_PyBytes_Resize(&writer->buffer, allocated))
   3318                 goto error;
   3319         }
   3320     }
   3321     else {
   3322         /* convert from stack buffer to bytes object buffer */
   3323         assert(writer->buffer == NULL);
   3324 
   3325         if (writer->use_bytearray)
   3326             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
   3327         else
   3328             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
   3329         if (writer->buffer == NULL)
   3330             goto error;
   3331 
   3332         if (pos != 0) {
   3333             char *dest;
   3334             if (writer->use_bytearray)
   3335                 dest = PyByteArray_AS_STRING(writer->buffer);
   3336             else
   3337                 dest = PyBytes_AS_STRING(writer->buffer);
   3338             memcpy(dest,
   3339                       writer->small_buffer,
   3340                       pos);
   3341         }
   3342 
   3343         writer->use_small_buffer = 0;
   3344 #ifdef Py_DEBUG
   3345         memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
   3346 #endif
   3347     }
   3348     writer->allocated = allocated;
   3349 
   3350     str = _PyBytesWriter_AsString(writer) + pos;
   3351     _PyBytesWriter_CheckConsistency(writer, str);
   3352     return str;
   3353 
   3354 error:
   3355     _PyBytesWriter_Dealloc(writer);
   3356     return NULL;
   3357 }
   3358 
   3359 void*
   3360 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
   3361 {
   3362     Py_ssize_t new_min_size;
   3363 
   3364     _PyBytesWriter_CheckConsistency(writer, str);
   3365     assert(size >= 0);
   3366 
   3367     if (size == 0) {
   3368         /* nothing to do */
   3369         return str;
   3370     }
   3371 
   3372     if (writer->min_size > PY_SSIZE_T_MAX - size) {
   3373         PyErr_NoMemory();
   3374         _PyBytesWriter_Dealloc(writer);
   3375         return NULL;
   3376     }
   3377     new_min_size = writer->min_size + size;
   3378 
   3379     if (new_min_size > writer->allocated)
   3380         str = _PyBytesWriter_Resize(writer, str, new_min_size);
   3381 
   3382     writer->min_size = new_min_size;
   3383     return str;
   3384 }
   3385 
   3386 /* Allocate the buffer to write size bytes.
   3387    Return the pointer to the beginning of buffer data.
   3388    Raise an exception and return NULL on error. */
   3389 void*
   3390 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
   3391 {
   3392     /* ensure that _PyBytesWriter_Alloc() is only called once */
   3393     assert(writer->min_size == 0 && writer->buffer == NULL);
   3394     assert(size >= 0);
   3395 
   3396     writer->use_small_buffer = 1;
   3397 #ifdef Py_DEBUG
   3398     writer->allocated = sizeof(writer->small_buffer) - 1;
   3399     /* In debug mode, don't use the full small buffer because it is less
   3400        efficient than bytes and bytearray objects to detect buffer underflow
   3401        and buffer overflow. Use 10 bytes of the small buffer to test also
   3402        code using the smaller buffer in debug mode.
   3403 
   3404        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
   3405        in debug mode to also be able to detect stack overflow when running
   3406        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
   3407        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
   3408        stack overflow. */
   3409     writer->allocated = Py_MIN(writer->allocated, 10);
   3410     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
   3411        to detect buffer overflow */
   3412     writer->small_buffer[writer->allocated] = 0;
   3413 #else
   3414     writer->allocated = sizeof(writer->small_buffer);
   3415 #endif
   3416     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
   3417 }
   3418 
   3419 PyObject *
   3420 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
   3421 {
   3422     Py_ssize_t size;
   3423     PyObject *result;
   3424 
   3425     _PyBytesWriter_CheckConsistency(writer, str);
   3426 
   3427     size = _PyBytesWriter_GetSize(writer, str);
   3428     if (size == 0 && !writer->use_bytearray) {
   3429         Py_CLEAR(writer->buffer);
   3430         /* Get the empty byte string singleton */
   3431         result = PyBytes_FromStringAndSize(NULL, 0);
   3432     }
   3433     else if (writer->use_small_buffer) {
   3434         if (writer->use_bytearray) {
   3435             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
   3436         }
   3437         else {
   3438             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
   3439         }
   3440     }
   3441     else {
   3442         result = writer->buffer;
   3443         writer->buffer = NULL;
   3444 
   3445         if (size != writer->allocated) {
   3446             if (writer->use_bytearray) {
   3447                 if (PyByteArray_Resize(result, size)) {
   3448                     Py_DECREF(result);
   3449                     return NULL;
   3450                 }
   3451             }
   3452             else {
   3453                 if (_PyBytes_Resize(&result, size)) {
   3454                     assert(result == NULL);
   3455                     return NULL;
   3456                 }
   3457             }
   3458         }
   3459     }
   3460     return result;
   3461 }
   3462 
   3463 void*
   3464 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
   3465                           const void *bytes, Py_ssize_t size)
   3466 {
   3467     char *str = (char *)ptr;
   3468 
   3469     str = _PyBytesWriter_Prepare(writer, str, size);
   3470     if (str == NULL)
   3471         return NULL;
   3472 
   3473     memcpy(str, bytes, size);
   3474     str += size;
   3475 
   3476     return str;
   3477 }
   3478