Home | History | Annotate | Download | only in Objects
      1 /* bytes object implementation */
      2 
      3 #define PY_SSIZE_T_CLEAN
      4 
      5 #include "Python.h"
      6 
      7 #include "bytes_methods.h"
      8 #include "pystrhex.h"
      9 #include <stddef.h>
     10 
     11 /*[clinic input]
     12 class bytes "PyBytesObject *" "&PyBytes_Type"
     13 [clinic start generated code]*/
     14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
     15 
     16 #include "clinic/bytesobject.c.h"
     17 
     18 #ifdef COUNT_ALLOCS
     19 Py_ssize_t null_strings, one_strings;
     20 #endif
     21 
     22 static PyBytesObject *characters[UCHAR_MAX + 1];
     23 static PyBytesObject *nullstring;
     24 
     25 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
     26    for a string of length n should request PyBytesObject_SIZE + n bytes.
     27 
     28    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
     29    3 bytes per string allocation on a typical system.
     30 */
     31 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
     32 
     33 /* Forward declaration */
     34 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
     35                                                    char *str);
     36 
     37 /*
     38    For PyBytes_FromString(), the parameter `str' points to a null-terminated
     39    string containing exactly `size' bytes.
     40 
     41    For PyBytes_FromStringAndSize(), the parameter `str' is
     42    either NULL or else points to a string containing at least `size' bytes.
     43    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
     44    not have to be null-terminated.  (Therefore it is safe to construct a
     45    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
     46    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
     47    bytes (setting the last byte to the null terminating character) and you can
     48    fill in the data yourself.  If `str' is non-NULL then the resulting
     49    PyBytes object must be treated as immutable and you must not fill in nor
     50    alter the data yourself, since the strings may be shared.
     51 
     52    The PyObject member `op->ob_size', which denotes the number of "extra
     53    items" in a variable-size object, will contain the number of bytes
     54    allocated for string data, not counting the null terminating character.
     55    It is therefore equal to the `size' parameter (for
     56    PyBytes_FromStringAndSize()) or the length of the string in the `str'
     57    parameter (for PyBytes_FromString()).
     58 */
     59 static PyObject *
     60 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
     61 {
     62     PyBytesObject *op;
     63     assert(size >= 0);
     64 
     65     if (size == 0 && (op = nullstring) != NULL) {
     66 #ifdef COUNT_ALLOCS
     67         null_strings++;
     68 #endif
     69         Py_INCREF(op);
     70         return (PyObject *)op;
     71     }
     72 
     73     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
     74         PyErr_SetString(PyExc_OverflowError,
     75                         "byte string is too large");
     76         return NULL;
     77     }
     78 
     79     /* Inline PyObject_NewVar */
     80     if (use_calloc)
     81         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
     82     else
     83         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
     84     if (op == NULL)
     85         return PyErr_NoMemory();
     86     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
     87     op->ob_shash = -1;
     88     if (!use_calloc)
     89         op->ob_sval[size] = '\0';
     90     /* empty byte string singleton */
     91     if (size == 0) {
     92         nullstring = op;
     93         Py_INCREF(op);
     94     }
     95     return (PyObject *) op;
     96 }
     97 
     98 PyObject *
     99 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
    100 {
    101     PyBytesObject *op;
    102     if (size < 0) {
    103         PyErr_SetString(PyExc_SystemError,
    104             "Negative size passed to PyBytes_FromStringAndSize");
    105         return NULL;
    106     }
    107     if (size == 1 && str != NULL &&
    108         (op = characters[*str & UCHAR_MAX]) != NULL)
    109     {
    110 #ifdef COUNT_ALLOCS
    111         one_strings++;
    112 #endif
    113         Py_INCREF(op);
    114         return (PyObject *)op;
    115     }
    116 
    117     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
    118     if (op == NULL)
    119         return NULL;
    120     if (str == NULL)
    121         return (PyObject *) op;
    122 
    123     memcpy(op->ob_sval, str, size);
    124     /* share short strings */
    125     if (size == 1) {
    126         characters[*str & UCHAR_MAX] = op;
    127         Py_INCREF(op);
    128     }
    129     return (PyObject *) op;
    130 }
    131 
    132 PyObject *
    133 PyBytes_FromString(const char *str)
    134 {
    135     size_t size;
    136     PyBytesObject *op;
    137 
    138     assert(str != NULL);
    139     size = strlen(str);
    140     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
    141         PyErr_SetString(PyExc_OverflowError,
    142             "byte string is too long");
    143         return NULL;
    144     }
    145     if (size == 0 && (op = nullstring) != NULL) {
    146 #ifdef COUNT_ALLOCS
    147         null_strings++;
    148 #endif
    149         Py_INCREF(op);
    150         return (PyObject *)op;
    151     }
    152     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
    153 #ifdef COUNT_ALLOCS
    154         one_strings++;
    155 #endif
    156         Py_INCREF(op);
    157         return (PyObject *)op;
    158     }
    159 
    160     /* Inline PyObject_NewVar */
    161     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
    162     if (op == NULL)
    163         return PyErr_NoMemory();
    164     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
    165     op->ob_shash = -1;
    166     memcpy(op->ob_sval, str, size+1);
    167     /* share short strings */
    168     if (size == 0) {
    169         nullstring = op;
    170         Py_INCREF(op);
    171     } else if (size == 1) {
    172         characters[*str & UCHAR_MAX] = op;
    173         Py_INCREF(op);
    174     }
    175     return (PyObject *) op;
    176 }
    177 
    178 PyObject *
    179 PyBytes_FromFormatV(const char *format, va_list vargs)
    180 {
    181     char *s;
    182     const char *f;
    183     const char *p;
    184     Py_ssize_t prec;
    185     int longflag;
    186     int size_tflag;
    187     /* Longest 64-bit formatted numbers:
    188        - "18446744073709551615\0" (21 bytes)
    189        - "-9223372036854775808\0" (21 bytes)
    190        Decimal takes the most space (it isn't enough for octal.)
    191 
    192        Longest 64-bit pointer representation:
    193        "0xffffffffffffffff\0" (19 bytes). */
    194     char buffer[21];
    195     _PyBytesWriter writer;
    196 
    197     _PyBytesWriter_Init(&writer);
    198 
    199     s = _PyBytesWriter_Alloc(&writer, strlen(format));
    200     if (s == NULL)
    201         return NULL;
    202     writer.overallocate = 1;
    203 
    204 #define WRITE_BYTES(str) \
    205     do { \
    206         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
    207         if (s == NULL) \
    208             goto error; \
    209     } while (0)
    210 
    211     for (f = format; *f; f++) {
    212         if (*f != '%') {
    213             *s++ = *f;
    214             continue;
    215         }
    216 
    217         p = f++;
    218 
    219         /* ignore the width (ex: 10 in "%10s") */
    220         while (Py_ISDIGIT(*f))
    221             f++;
    222 
    223         /* parse the precision (ex: 10 in "%.10s") */
    224         prec = 0;
    225         if (*f == '.') {
    226             f++;
    227             for (; Py_ISDIGIT(*f); f++) {
    228                 prec = (prec * 10) + (*f - '0');
    229             }
    230         }
    231 
    232         while (*f && *f != '%' && !Py_ISALPHA(*f))
    233             f++;
    234 
    235         /* handle the long flag ('l'), but only for %ld and %lu.
    236            others can be added when necessary. */
    237         longflag = 0;
    238         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
    239             longflag = 1;
    240             ++f;
    241         }
    242 
    243         /* handle the size_t flag ('z'). */
    244         size_tflag = 0;
    245         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
    246             size_tflag = 1;
    247             ++f;
    248         }
    249 
    250         /* subtract bytes preallocated for the format string
    251            (ex: 2 for "%s") */
    252         writer.min_size -= (f - p + 1);
    253 
    254         switch (*f) {
    255         case 'c':
    256         {
    257             int c = va_arg(vargs, int);
    258             if (c < 0 || c > 255) {
    259                 PyErr_SetString(PyExc_OverflowError,
    260                                 "PyBytes_FromFormatV(): %c format "
    261                                 "expects an integer in range [0; 255]");
    262                 goto error;
    263             }
    264             writer.min_size++;
    265             *s++ = (unsigned char)c;
    266             break;
    267         }
    268 
    269         case 'd':
    270             if (longflag)
    271                 sprintf(buffer, "%ld", va_arg(vargs, long));
    272             else if (size_tflag)
    273                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
    274                     va_arg(vargs, Py_ssize_t));
    275             else
    276                 sprintf(buffer, "%d", va_arg(vargs, int));
    277             assert(strlen(buffer) < sizeof(buffer));
    278             WRITE_BYTES(buffer);
    279             break;
    280 
    281         case 'u':
    282             if (longflag)
    283                 sprintf(buffer, "%lu",
    284                     va_arg(vargs, unsigned long));
    285             else if (size_tflag)
    286                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
    287                     va_arg(vargs, size_t));
    288             else
    289                 sprintf(buffer, "%u",
    290                     va_arg(vargs, unsigned int));
    291             assert(strlen(buffer) < sizeof(buffer));
    292             WRITE_BYTES(buffer);
    293             break;
    294 
    295         case 'i':
    296             sprintf(buffer, "%i", va_arg(vargs, int));
    297             assert(strlen(buffer) < sizeof(buffer));
    298             WRITE_BYTES(buffer);
    299             break;
    300 
    301         case 'x':
    302             sprintf(buffer, "%x", va_arg(vargs, int));
    303             assert(strlen(buffer) < sizeof(buffer));
    304             WRITE_BYTES(buffer);
    305             break;
    306 
    307         case 's':
    308         {
    309             Py_ssize_t i;
    310 
    311             p = va_arg(vargs, const char*);
    312             i = strlen(p);
    313             if (prec > 0 && i > prec)
    314                 i = prec;
    315             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
    316             if (s == NULL)
    317                 goto error;
    318             break;
    319         }
    320 
    321         case 'p':
    322             sprintf(buffer, "%p", va_arg(vargs, void*));
    323             assert(strlen(buffer) < sizeof(buffer));
    324             /* %p is ill-defined:  ensure leading 0x. */
    325             if (buffer[1] == 'X')
    326                 buffer[1] = 'x';
    327             else if (buffer[1] != 'x') {
    328                 memmove(buffer+2, buffer, strlen(buffer)+1);
    329                 buffer[0] = '0';
    330                 buffer[1] = 'x';
    331             }
    332             WRITE_BYTES(buffer);
    333             break;
    334 
    335         case '%':
    336             writer.min_size++;
    337             *s++ = '%';
    338             break;
    339 
    340         default:
    341             if (*f == 0) {
    342                 /* fix min_size if we reached the end of the format string */
    343                 writer.min_size++;
    344             }
    345 
    346             /* invalid format string: copy unformatted string and exit */
    347             WRITE_BYTES(p);
    348             return _PyBytesWriter_Finish(&writer, s);
    349         }
    350     }
    351 
    352 #undef WRITE_BYTES
    353 
    354     return _PyBytesWriter_Finish(&writer, s);
    355 
    356  error:
    357     _PyBytesWriter_Dealloc(&writer);
    358     return NULL;
    359 }
    360 
    361 PyObject *
    362 PyBytes_FromFormat(const char *format, ...)
    363 {
    364     PyObject* ret;
    365     va_list vargs;
    366 
    367 #ifdef HAVE_STDARG_PROTOTYPES
    368     va_start(vargs, format);
    369 #else
    370     va_start(vargs);
    371 #endif
    372     ret = PyBytes_FromFormatV(format, vargs);
    373     va_end(vargs);
    374     return ret;
    375 }
    376 
    377 /* Helpers for formatstring */
    378 
    379 Py_LOCAL_INLINE(PyObject *)
    380 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
    381 {
    382     Py_ssize_t argidx = *p_argidx;
    383     if (argidx < arglen) {
    384         (*p_argidx)++;
    385         if (arglen < 0)
    386             return args;
    387         else
    388             return PyTuple_GetItem(args, argidx);
    389     }
    390     PyErr_SetString(PyExc_TypeError,
    391                     "not enough arguments for format string");
    392     return NULL;
    393 }
    394 
    395 /* Format codes
    396  * F_LJUST      '-'
    397  * F_SIGN       '+'
    398  * F_BLANK      ' '
    399  * F_ALT        '#'
    400  * F_ZERO       '0'
    401  */
    402 #define F_LJUST (1<<0)
    403 #define F_SIGN  (1<<1)
    404 #define F_BLANK (1<<2)
    405 #define F_ALT   (1<<3)
    406 #define F_ZERO  (1<<4)
    407 
    408 /* Returns a new reference to a PyBytes object, or NULL on failure. */
    409 
    410 static char*
    411 formatfloat(PyObject *v, int flags, int prec, int type,
    412             PyObject **p_result, _PyBytesWriter *writer, char *str)
    413 {
    414     char *p;
    415     PyObject *result;
    416     double x;
    417     size_t len;
    418 
    419     x = PyFloat_AsDouble(v);
    420     if (x == -1.0 && PyErr_Occurred()) {
    421         PyErr_Format(PyExc_TypeError, "float argument required, "
    422                      "not %.200s", Py_TYPE(v)->tp_name);
    423         return NULL;
    424     }
    425 
    426     if (prec < 0)
    427         prec = 6;
    428 
    429     p = PyOS_double_to_string(x, type, prec,
    430                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
    431 
    432     if (p == NULL)
    433         return NULL;
    434 
    435     len = strlen(p);
    436     if (writer != NULL) {
    437         str = _PyBytesWriter_Prepare(writer, str, len);
    438         if (str == NULL)
    439             return NULL;
    440         memcpy(str, p, len);
    441         PyMem_Free(p);
    442         str += len;
    443         return str;
    444     }
    445 
    446     result = PyBytes_FromStringAndSize(p, len);
    447     PyMem_Free(p);
    448     *p_result = result;
    449     return str;
    450 }
    451 
    452 static PyObject *
    453 formatlong(PyObject *v, int flags, int prec, int type)
    454 {
    455     PyObject *result, *iobj;
    456     if (type == 'i')
    457         type = 'd';
    458     if (PyLong_Check(v))
    459         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
    460     if (PyNumber_Check(v)) {
    461         /* make sure number is a type of integer for o, x, and X */
    462         if (type == 'o' || type == 'x' || type == 'X')
    463             iobj = PyNumber_Index(v);
    464         else
    465             iobj = PyNumber_Long(v);
    466         if (iobj == NULL) {
    467             if (!PyErr_ExceptionMatches(PyExc_TypeError))
    468                 return NULL;
    469         }
    470         else if (!PyLong_Check(iobj))
    471             Py_CLEAR(iobj);
    472         if (iobj != NULL) {
    473             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
    474             Py_DECREF(iobj);
    475             return result;
    476         }
    477     }
    478     PyErr_Format(PyExc_TypeError,
    479         "%%%c format: %s is required, not %.200s", type,
    480         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
    481                                                     : "a number",
    482         Py_TYPE(v)->tp_name);
    483     return NULL;
    484 }
    485 
    486 static int
    487 byte_converter(PyObject *arg, char *p)
    488 {
    489     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
    490         *p = PyBytes_AS_STRING(arg)[0];
    491         return 1;
    492     }
    493     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
    494         *p = PyByteArray_AS_STRING(arg)[0];
    495         return 1;
    496     }
    497     else {
    498         PyObject *iobj;
    499         long ival;
    500         int overflow;
    501         /* make sure number is a type of integer */
    502         if (PyLong_Check(arg)) {
    503             ival = PyLong_AsLongAndOverflow(arg, &overflow);
    504         }
    505         else {
    506             iobj = PyNumber_Index(arg);
    507             if (iobj == NULL) {
    508                 if (!PyErr_ExceptionMatches(PyExc_TypeError))
    509                     return 0;
    510                 goto onError;
    511             }
    512             ival = PyLong_AsLongAndOverflow(iobj, &overflow);
    513             Py_DECREF(iobj);
    514         }
    515         if (!overflow && ival == -1 && PyErr_Occurred())
    516             goto onError;
    517         if (overflow || !(0 <= ival && ival <= 255)) {
    518             PyErr_SetString(PyExc_OverflowError,
    519                             "%c arg not in range(256)");
    520             return 0;
    521         }
    522         *p = (char)ival;
    523         return 1;
    524     }
    525   onError:
    526     PyErr_SetString(PyExc_TypeError,
    527         "%c requires an integer in range(256) or a single byte");
    528     return 0;
    529 }
    530 
    531 static PyObject *
    532 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
    533 {
    534     PyObject *func, *result;
    535     _Py_IDENTIFIER(__bytes__);
    536     /* is it a bytes object? */
    537     if (PyBytes_Check(v)) {
    538         *pbuf = PyBytes_AS_STRING(v);
    539         *plen = PyBytes_GET_SIZE(v);
    540         Py_INCREF(v);
    541         return v;
    542     }
    543     if (PyByteArray_Check(v)) {
    544         *pbuf = PyByteArray_AS_STRING(v);
    545         *plen = PyByteArray_GET_SIZE(v);
    546         Py_INCREF(v);
    547         return v;
    548     }
    549     /* does it support __bytes__? */
    550     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
    551     if (func != NULL) {
    552         result = PyObject_CallFunctionObjArgs(func, NULL);
    553         Py_DECREF(func);
    554         if (result == NULL)
    555             return NULL;
    556         if (!PyBytes_Check(result)) {
    557             PyErr_Format(PyExc_TypeError,
    558                          "__bytes__ returned non-bytes (type %.200s)",
    559                          Py_TYPE(result)->tp_name);
    560             Py_DECREF(result);
    561             return NULL;
    562         }
    563         *pbuf = PyBytes_AS_STRING(result);
    564         *plen = PyBytes_GET_SIZE(result);
    565         return result;
    566     }
    567     PyErr_Format(PyExc_TypeError,
    568                  "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
    569                  Py_TYPE(v)->tp_name);
    570     return NULL;
    571 }
    572 
    573 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
    574 
    575 PyObject *
    576 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
    577                   PyObject *args, int use_bytearray)
    578 {
    579     const char *fmt;
    580     char *res;
    581     Py_ssize_t arglen, argidx;
    582     Py_ssize_t fmtcnt;
    583     int args_owned = 0;
    584     PyObject *dict = NULL;
    585     _PyBytesWriter writer;
    586 
    587     if (args == NULL) {
    588         PyErr_BadInternalCall();
    589         return NULL;
    590     }
    591     fmt = format;
    592     fmtcnt = format_len;
    593 
    594     _PyBytesWriter_Init(&writer);
    595     writer.use_bytearray = use_bytearray;
    596 
    597     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
    598     if (res == NULL)
    599         return NULL;
    600     if (!use_bytearray)
    601         writer.overallocate = 1;
    602 
    603     if (PyTuple_Check(args)) {
    604         arglen = PyTuple_GET_SIZE(args);
    605         argidx = 0;
    606     }
    607     else {
    608         arglen = -1;
    609         argidx = -2;
    610     }
    611     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
    612         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
    613         !PyByteArray_Check(args)) {
    614             dict = args;
    615     }
    616 
    617     while (--fmtcnt >= 0) {
    618         if (*fmt != '%') {
    619             Py_ssize_t len;
    620             char *pos;
    621 
    622             pos = strchr(fmt + 1, '%');
    623             if (pos != NULL)
    624                 len = pos - fmt;
    625             else
    626                 len = format_len - (fmt - format);
    627             assert(len != 0);
    628 
    629             memcpy(res, fmt, len);
    630             res += len;
    631             fmt += len;
    632             fmtcnt -= (len - 1);
    633         }
    634         else {
    635             /* Got a format specifier */
    636             int flags = 0;
    637             Py_ssize_t width = -1;
    638             int prec = -1;
    639             int c = '\0';
    640             int fill;
    641             PyObject *v = NULL;
    642             PyObject *temp = NULL;
    643             const char *pbuf = NULL;
    644             int sign;
    645             Py_ssize_t len = 0;
    646             char onechar; /* For byte_converter() */
    647             Py_ssize_t alloc;
    648 #ifdef Py_DEBUG
    649             char *before;
    650 #endif
    651 
    652             fmt++;
    653             if (*fmt == '(') {
    654                 const char *keystart;
    655                 Py_ssize_t keylen;
    656                 PyObject *key;
    657                 int pcount = 1;
    658 
    659                 if (dict == NULL) {
    660                     PyErr_SetString(PyExc_TypeError,
    661                              "format requires a mapping");
    662                     goto error;
    663                 }
    664                 ++fmt;
    665                 --fmtcnt;
    666                 keystart = fmt;
    667                 /* Skip over balanced parentheses */
    668                 while (pcount > 0 && --fmtcnt >= 0) {
    669                     if (*fmt == ')')
    670                         --pcount;
    671                     else if (*fmt == '(')
    672                         ++pcount;
    673                     fmt++;
    674                 }
    675                 keylen = fmt - keystart - 1;
    676                 if (fmtcnt < 0 || pcount > 0) {
    677                     PyErr_SetString(PyExc_ValueError,
    678                                "incomplete format key");
    679                     goto error;
    680                 }
    681                 key = PyBytes_FromStringAndSize(keystart,
    682                                                  keylen);
    683                 if (key == NULL)
    684                     goto error;
    685                 if (args_owned) {
    686                     Py_DECREF(args);
    687                     args_owned = 0;
    688                 }
    689                 args = PyObject_GetItem(dict, key);
    690                 Py_DECREF(key);
    691                 if (args == NULL) {
    692                     goto error;
    693                 }
    694                 args_owned = 1;
    695                 arglen = -1;
    696                 argidx = -2;
    697             }
    698 
    699             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
    700             while (--fmtcnt >= 0) {
    701                 switch (c = *fmt++) {
    702                 case '-': flags |= F_LJUST; continue;
    703                 case '+': flags |= F_SIGN; continue;
    704                 case ' ': flags |= F_BLANK; continue;
    705                 case '#': flags |= F_ALT; continue;
    706                 case '0': flags |= F_ZERO; continue;
    707                 }
    708                 break;
    709             }
    710 
    711             /* Parse width. Example: "%10s" => width=10 */
    712             if (c == '*') {
    713                 v = getnextarg(args, arglen, &argidx);
    714                 if (v == NULL)
    715                     goto error;
    716                 if (!PyLong_Check(v)) {
    717                     PyErr_SetString(PyExc_TypeError,
    718                                     "* wants int");
    719                     goto error;
    720                 }
    721                 width = PyLong_AsSsize_t(v);
    722                 if (width == -1 && PyErr_Occurred())
    723                     goto error;
    724                 if (width < 0) {
    725                     flags |= F_LJUST;
    726                     width = -width;
    727                 }
    728                 if (--fmtcnt >= 0)
    729                     c = *fmt++;
    730             }
    731             else if (c >= 0 && isdigit(c)) {
    732                 width = c - '0';
    733                 while (--fmtcnt >= 0) {
    734                     c = Py_CHARMASK(*fmt++);
    735                     if (!isdigit(c))
    736                         break;
    737                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
    738                         PyErr_SetString(
    739                             PyExc_ValueError,
    740                             "width too big");
    741                         goto error;
    742                     }
    743                     width = width*10 + (c - '0');
    744                 }
    745             }
    746 
    747             /* Parse precision. Example: "%.3f" => prec=3 */
    748             if (c == '.') {
    749                 prec = 0;
    750                 if (--fmtcnt >= 0)
    751                     c = *fmt++;
    752                 if (c == '*') {
    753                     v = getnextarg(args, arglen, &argidx);
    754                     if (v == NULL)
    755                         goto error;
    756                     if (!PyLong_Check(v)) {
    757                         PyErr_SetString(
    758                             PyExc_TypeError,
    759                             "* wants int");
    760                         goto error;
    761                     }
    762                     prec = _PyLong_AsInt(v);
    763                     if (prec == -1 && PyErr_Occurred())
    764                         goto error;
    765                     if (prec < 0)
    766                         prec = 0;
    767                     if (--fmtcnt >= 0)
    768                         c = *fmt++;
    769                 }
    770                 else if (c >= 0 && isdigit(c)) {
    771                     prec = c - '0';
    772                     while (--fmtcnt >= 0) {
    773                         c = Py_CHARMASK(*fmt++);
    774                         if (!isdigit(c))
    775                             break;
    776                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
    777                             PyErr_SetString(
    778                                 PyExc_ValueError,
    779                                 "prec too big");
    780                             goto error;
    781                         }
    782                         prec = prec*10 + (c - '0');
    783                     }
    784                 }
    785             } /* prec */
    786             if (fmtcnt >= 0) {
    787                 if (c == 'h' || c == 'l' || c == 'L') {
    788                     if (--fmtcnt >= 0)
    789                         c = *fmt++;
    790                 }
    791             }
    792             if (fmtcnt < 0) {
    793                 PyErr_SetString(PyExc_ValueError,
    794                                 "incomplete format");
    795                 goto error;
    796             }
    797             if (c != '%') {
    798                 v = getnextarg(args, arglen, &argidx);
    799                 if (v == NULL)
    800                     goto error;
    801             }
    802 
    803             if (fmtcnt < 0) {
    804                 /* last writer: disable writer overallocation */
    805                 writer.overallocate = 0;
    806             }
    807 
    808             sign = 0;
    809             fill = ' ';
    810             switch (c) {
    811             case '%':
    812                 *res++ = '%';
    813                 continue;
    814 
    815             case 'r':
    816                 // %r is only for 2/3 code; 3 only code should use %a
    817             case 'a':
    818                 temp = PyObject_ASCII(v);
    819                 if (temp == NULL)
    820                     goto error;
    821                 assert(PyUnicode_IS_ASCII(temp));
    822                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
    823                 len = PyUnicode_GET_LENGTH(temp);
    824                 if (prec >= 0 && len > prec)
    825                     len = prec;
    826                 break;
    827 
    828             case 's':
    829                 // %s is only for 2/3 code; 3 only code should use %b
    830             case 'b':
    831                 temp = format_obj(v, &pbuf, &len);
    832                 if (temp == NULL)
    833                     goto error;
    834                 if (prec >= 0 && len > prec)
    835                     len = prec;
    836                 break;
    837 
    838             case 'i':
    839             case 'd':
    840             case 'u':
    841             case 'o':
    842             case 'x':
    843             case 'X':
    844                 if (PyLong_CheckExact(v)
    845                     && width == -1 && prec == -1
    846                     && !(flags & (F_SIGN | F_BLANK))
    847                     && c != 'X')
    848                 {
    849                     /* Fast path */
    850                     int alternate = flags & F_ALT;
    851                     int base;
    852 
    853                     switch(c)
    854                     {
    855                         default:
    856                             assert(0 && "'type' not in [diuoxX]");
    857                         case 'd':
    858                         case 'i':
    859                         case 'u':
    860                             base = 10;
    861                             break;
    862                         case 'o':
    863                             base = 8;
    864                             break;
    865                         case 'x':
    866                         case 'X':
    867                             base = 16;
    868                             break;
    869                     }
    870 
    871                     /* Fast path */
    872                     writer.min_size -= 2; /* size preallocated for "%d" */
    873                     res = _PyLong_FormatBytesWriter(&writer, res,
    874                                                     v, base, alternate);
    875                     if (res == NULL)
    876                         goto error;
    877                     continue;
    878                 }
    879 
    880                 temp = formatlong(v, flags, prec, c);
    881                 if (!temp)
    882                     goto error;
    883                 assert(PyUnicode_IS_ASCII(temp));
    884                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
    885                 len = PyUnicode_GET_LENGTH(temp);
    886                 sign = 1;
    887                 if (flags & F_ZERO)
    888                     fill = '0';
    889                 break;
    890 
    891             case 'e':
    892             case 'E':
    893             case 'f':
    894             case 'F':
    895             case 'g':
    896             case 'G':
    897                 if (width == -1 && prec == -1
    898                     && !(flags & (F_SIGN | F_BLANK)))
    899                 {
    900                     /* Fast path */
    901                     writer.min_size -= 2; /* size preallocated for "%f" */
    902                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
    903                     if (res == NULL)
    904                         goto error;
    905                     continue;
    906                 }
    907 
    908                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
    909                     goto error;
    910                 pbuf = PyBytes_AS_STRING(temp);
    911                 len = PyBytes_GET_SIZE(temp);
    912                 sign = 1;
    913                 if (flags & F_ZERO)
    914                     fill = '0';
    915                 break;
    916 
    917             case 'c':
    918                 pbuf = &onechar;
    919                 len = byte_converter(v, &onechar);
    920                 if (!len)
    921                     goto error;
    922                 if (width == -1) {
    923                     /* Fast path */
    924                     *res++ = onechar;
    925                     continue;
    926                 }
    927                 break;
    928 
    929             default:
    930                 PyErr_Format(PyExc_ValueError,
    931                   "unsupported format character '%c' (0x%x) "
    932                   "at index %zd",
    933                   c, c,
    934                   (Py_ssize_t)(fmt - 1 - format));
    935                 goto error;
    936             }
    937 
    938             if (sign) {
    939                 if (*pbuf == '-' || *pbuf == '+') {
    940                     sign = *pbuf++;
    941                     len--;
    942                 }
    943                 else if (flags & F_SIGN)
    944                     sign = '+';
    945                 else if (flags & F_BLANK)
    946                     sign = ' ';
    947                 else
    948                     sign = 0;
    949             }
    950             if (width < len)
    951                 width = len;
    952 
    953             alloc = width;
    954             if (sign != 0 && len == width)
    955                 alloc++;
    956             /* 2: size preallocated for %s */
    957             if (alloc > 2) {
    958                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
    959                 if (res == NULL)
    960                     goto error;
    961             }
    962 #ifdef Py_DEBUG
    963             before = res;
    964 #endif
    965 
    966             /* Write the sign if needed */
    967             if (sign) {
    968                 if (fill != ' ')
    969                     *res++ = sign;
    970                 if (width > len)
    971                     width--;
    972             }
    973 
    974             /* Write the numeric prefix for "x", "X" and "o" formats
    975                if the alternate form is used.
    976                For example, write "0x" for the "%#x" format. */
    977             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
    978                 assert(pbuf[0] == '0');
    979                 assert(pbuf[1] == c);
    980                 if (fill != ' ') {
    981                     *res++ = *pbuf++;
    982                     *res++ = *pbuf++;
    983                 }
    984                 width -= 2;
    985                 if (width < 0)
    986                     width = 0;
    987                 len -= 2;
    988             }
    989 
    990             /* Pad left with the fill character if needed */
    991             if (width > len && !(flags & F_LJUST)) {
    992                 memset(res, fill, width - len);
    993                 res += (width - len);
    994                 width = len;
    995             }
    996 
    997             /* If padding with spaces: write sign if needed and/or numeric
    998                prefix if the alternate form is used */
    999             if (fill == ' ') {
   1000                 if (sign)
   1001                     *res++ = sign;
   1002                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
   1003                     assert(pbuf[0] == '0');
   1004                     assert(pbuf[1] == c);
   1005                     *res++ = *pbuf++;
   1006                     *res++ = *pbuf++;
   1007                 }
   1008             }
   1009 
   1010             /* Copy bytes */
   1011             memcpy(res, pbuf, len);
   1012             res += len;
   1013 
   1014             /* Pad right with the fill character if needed */
   1015             if (width > len) {
   1016                 memset(res, ' ', width - len);
   1017                 res += (width - len);
   1018             }
   1019 
   1020             if (dict && (argidx < arglen) && c != '%') {
   1021                 PyErr_SetString(PyExc_TypeError,
   1022                            "not all arguments converted during bytes formatting");
   1023                 Py_XDECREF(temp);
   1024                 goto error;
   1025             }
   1026             Py_XDECREF(temp);
   1027 
   1028 #ifdef Py_DEBUG
   1029             /* check that we computed the exact size for this write */
   1030             assert((res - before) == alloc);
   1031 #endif
   1032         } /* '%' */
   1033 
   1034         /* If overallocation was disabled, ensure that it was the last
   1035            write. Otherwise, we missed an optimization */
   1036         assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
   1037     } /* until end */
   1038 
   1039     if (argidx < arglen && !dict) {
   1040         PyErr_SetString(PyExc_TypeError,
   1041                         "not all arguments converted during bytes formatting");
   1042         goto error;
   1043     }
   1044 
   1045     if (args_owned) {
   1046         Py_DECREF(args);
   1047     }
   1048     return _PyBytesWriter_Finish(&writer, res);
   1049 
   1050  error:
   1051     _PyBytesWriter_Dealloc(&writer);
   1052     if (args_owned) {
   1053         Py_DECREF(args);
   1054     }
   1055     return NULL;
   1056 }
   1057 
   1058 /* =-= */
   1059 
   1060 static void
   1061 bytes_dealloc(PyObject *op)
   1062 {
   1063     Py_TYPE(op)->tp_free(op);
   1064 }
   1065 
   1066 /* Unescape a backslash-escaped string. If unicode is non-zero,
   1067    the string is a u-literal. If recode_encoding is non-zero,
   1068    the string is UTF-8 encoded and should be re-encoded in the
   1069    specified encoding.  */
   1070 
   1071 static char *
   1072 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
   1073                             const char *errors, const char *recode_encoding,
   1074                             _PyBytesWriter *writer, char *p)
   1075 {
   1076     PyObject *u, *w;
   1077     const char* t;
   1078 
   1079     t = *s;
   1080     /* Decode non-ASCII bytes as UTF-8. */
   1081     while (t < end && (*t & 0x80))
   1082         t++;
   1083     u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
   1084     if (u == NULL)
   1085         return NULL;
   1086 
   1087     /* Recode them in target encoding. */
   1088     w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
   1089     Py_DECREF(u);
   1090     if  (w == NULL)
   1091         return NULL;
   1092     assert(PyBytes_Check(w));
   1093 
   1094     /* Append bytes to output buffer. */
   1095     writer->min_size--;   /* subtract 1 preallocated byte */
   1096     p = _PyBytesWriter_WriteBytes(writer, p,
   1097                                   PyBytes_AS_STRING(w),
   1098                                   PyBytes_GET_SIZE(w));
   1099     Py_DECREF(w);
   1100     if (p == NULL)
   1101         return NULL;
   1102 
   1103     *s = t;
   1104     return p;
   1105 }
   1106 
   1107 PyObject *_PyBytes_DecodeEscape(const char *s,
   1108                                 Py_ssize_t len,
   1109                                 const char *errors,
   1110                                 Py_ssize_t unicode,
   1111                                 const char *recode_encoding,
   1112                                 const char **first_invalid_escape)
   1113 {
   1114     int c;
   1115     char *p;
   1116     const char *end;
   1117     _PyBytesWriter writer;
   1118 
   1119     _PyBytesWriter_Init(&writer);
   1120 
   1121     p = _PyBytesWriter_Alloc(&writer, len);
   1122     if (p == NULL)
   1123         return NULL;
   1124     writer.overallocate = 1;
   1125 
   1126     *first_invalid_escape = NULL;
   1127 
   1128     end = s + len;
   1129     while (s < end) {
   1130         if (*s != '\\') {
   1131           non_esc:
   1132             if (!(recode_encoding && (*s & 0x80))) {
   1133                 *p++ = *s++;
   1134             }
   1135             else {
   1136                 /* non-ASCII character and need to recode */
   1137                 p = _PyBytes_DecodeEscapeRecode(&s, end,
   1138                                                 errors, recode_encoding,
   1139                                                 &writer, p);
   1140                 if (p == NULL)
   1141                     goto failed;
   1142             }
   1143             continue;
   1144         }
   1145 
   1146         s++;
   1147         if (s == end) {
   1148             PyErr_SetString(PyExc_ValueError,
   1149                             "Trailing \\ in string");
   1150             goto failed;
   1151         }
   1152 
   1153         switch (*s++) {
   1154         /* XXX This assumes ASCII! */
   1155         case '\n': break;
   1156         case '\\': *p++ = '\\'; break;
   1157         case '\'': *p++ = '\''; break;
   1158         case '\"': *p++ = '\"'; break;
   1159         case 'b': *p++ = '\b'; break;
   1160         case 'f': *p++ = '\014'; break; /* FF */
   1161         case 't': *p++ = '\t'; break;
   1162         case 'n': *p++ = '\n'; break;
   1163         case 'r': *p++ = '\r'; break;
   1164         case 'v': *p++ = '\013'; break; /* VT */
   1165         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
   1166         case '0': case '1': case '2': case '3':
   1167         case '4': case '5': case '6': case '7':
   1168             c = s[-1] - '0';
   1169             if (s < end && '0' <= *s && *s <= '7') {
   1170                 c = (c<<3) + *s++ - '0';
   1171                 if (s < end && '0' <= *s && *s <= '7')
   1172                     c = (c<<3) + *s++ - '0';
   1173             }
   1174             *p++ = c;
   1175             break;
   1176         case 'x':
   1177             if (s+1 < end) {
   1178                 int digit1, digit2;
   1179                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
   1180                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
   1181                 if (digit1 < 16 && digit2 < 16) {
   1182                     *p++ = (unsigned char)((digit1 << 4) + digit2);
   1183                     s += 2;
   1184                     break;
   1185                 }
   1186             }
   1187             /* invalid hexadecimal digits */
   1188 
   1189             if (!errors || strcmp(errors, "strict") == 0) {
   1190                 PyErr_Format(PyExc_ValueError,
   1191                              "invalid \\x escape at position %d",
   1192                              s - 2 - (end - len));
   1193                 goto failed;
   1194             }
   1195             if (strcmp(errors, "replace") == 0) {
   1196                 *p++ = '?';
   1197             } else if (strcmp(errors, "ignore") == 0)
   1198                 /* do nothing */;
   1199             else {
   1200                 PyErr_Format(PyExc_ValueError,
   1201                              "decoding error; unknown "
   1202                              "error handling code: %.400s",
   1203                              errors);
   1204                 goto failed;
   1205             }
   1206             /* skip \x */
   1207             if (s < end && Py_ISXDIGIT(s[0]))
   1208                 s++; /* and a hexdigit */
   1209             break;
   1210 
   1211         default:
   1212             if (*first_invalid_escape == NULL) {
   1213                 *first_invalid_escape = s-1; /* Back up one char, since we've
   1214                                                 already incremented s. */
   1215             }
   1216             *p++ = '\\';
   1217             s--;
   1218             goto non_esc; /* an arbitrary number of unescaped
   1219                              UTF-8 bytes may follow. */
   1220         }
   1221     }
   1222 
   1223     return _PyBytesWriter_Finish(&writer, p);
   1224 
   1225   failed:
   1226     _PyBytesWriter_Dealloc(&writer);
   1227     return NULL;
   1228 }
   1229 
   1230 PyObject *PyBytes_DecodeEscape(const char *s,
   1231                                 Py_ssize_t len,
   1232                                 const char *errors,
   1233                                 Py_ssize_t unicode,
   1234                                 const char *recode_encoding)
   1235 {
   1236     const char* first_invalid_escape;
   1237     PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
   1238                                              recode_encoding,
   1239                                              &first_invalid_escape);
   1240     if (result == NULL)
   1241         return NULL;
   1242     if (first_invalid_escape != NULL) {
   1243         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
   1244                              "invalid escape sequence '\\%c'",
   1245                              *first_invalid_escape) < 0) {
   1246             Py_DECREF(result);
   1247             return NULL;
   1248         }
   1249     }
   1250     return result;
   1251 
   1252 }
   1253 /* -------------------------------------------------------------------- */
   1254 /* object api */
   1255 
   1256 Py_ssize_t
   1257 PyBytes_Size(PyObject *op)
   1258 {
   1259     if (!PyBytes_Check(op)) {
   1260         PyErr_Format(PyExc_TypeError,
   1261              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
   1262         return -1;
   1263     }
   1264     return Py_SIZE(op);
   1265 }
   1266 
   1267 char *
   1268 PyBytes_AsString(PyObject *op)
   1269 {
   1270     if (!PyBytes_Check(op)) {
   1271         PyErr_Format(PyExc_TypeError,
   1272              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
   1273         return NULL;
   1274     }
   1275     return ((PyBytesObject *)op)->ob_sval;
   1276 }
   1277 
   1278 int
   1279 PyBytes_AsStringAndSize(PyObject *obj,
   1280                          char **s,
   1281                          Py_ssize_t *len)
   1282 {
   1283     if (s == NULL) {
   1284         PyErr_BadInternalCall();
   1285         return -1;
   1286     }
   1287 
   1288     if (!PyBytes_Check(obj)) {
   1289         PyErr_Format(PyExc_TypeError,
   1290              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
   1291         return -1;
   1292     }
   1293 
   1294     *s = PyBytes_AS_STRING(obj);
   1295     if (len != NULL)
   1296         *len = PyBytes_GET_SIZE(obj);
   1297     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
   1298         PyErr_SetString(PyExc_ValueError,
   1299                         "embedded null byte");
   1300         return -1;
   1301     }
   1302     return 0;
   1303 }
   1304 
   1305 /* -------------------------------------------------------------------- */
   1306 /* Methods */
   1307 
   1308 #include "stringlib/stringdefs.h"
   1309 
   1310 #include "stringlib/fastsearch.h"
   1311 #include "stringlib/count.h"
   1312 #include "stringlib/find.h"
   1313 #include "stringlib/join.h"
   1314 #include "stringlib/partition.h"
   1315 #include "stringlib/split.h"
   1316 #include "stringlib/ctype.h"
   1317 
   1318 #include "stringlib/transmogrify.h"
   1319 
   1320 PyObject *
   1321 PyBytes_Repr(PyObject *obj, int smartquotes)
   1322 {
   1323     PyBytesObject* op = (PyBytesObject*) obj;
   1324     Py_ssize_t i, length = Py_SIZE(op);
   1325     Py_ssize_t newsize, squotes, dquotes;
   1326     PyObject *v;
   1327     unsigned char quote, *s, *p;
   1328 
   1329     /* Compute size of output string */
   1330     squotes = dquotes = 0;
   1331     newsize = 3; /* b'' */
   1332     s = (unsigned char*)op->ob_sval;
   1333     for (i = 0; i < length; i++) {
   1334         Py_ssize_t incr = 1;
   1335         switch(s[i]) {
   1336         case '\'': squotes++; break;
   1337         case '"':  dquotes++; break;
   1338         case '\\': case '\t': case '\n': case '\r':
   1339             incr = 2; break; /* \C */
   1340         default:
   1341             if (s[i] < ' ' || s[i] >= 0x7f)
   1342                 incr = 4; /* \xHH */
   1343         }
   1344         if (newsize > PY_SSIZE_T_MAX - incr)
   1345             goto overflow;
   1346         newsize += incr;
   1347     }
   1348     quote = '\'';
   1349     if (smartquotes && squotes && !dquotes)
   1350         quote = '"';
   1351     if (squotes && quote == '\'') {
   1352         if (newsize > PY_SSIZE_T_MAX - squotes)
   1353             goto overflow;
   1354         newsize += squotes;
   1355     }
   1356 
   1357     v = PyUnicode_New(newsize, 127);
   1358     if (v == NULL) {
   1359         return NULL;
   1360     }
   1361     p = PyUnicode_1BYTE_DATA(v);
   1362 
   1363     *p++ = 'b', *p++ = quote;
   1364     for (i = 0; i < length; i++) {
   1365         unsigned char c = op->ob_sval[i];
   1366         if (c == quote || c == '\\')
   1367             *p++ = '\\', *p++ = c;
   1368         else if (c == '\t')
   1369             *p++ = '\\', *p++ = 't';
   1370         else if (c == '\n')
   1371             *p++ = '\\', *p++ = 'n';
   1372         else if (c == '\r')
   1373             *p++ = '\\', *p++ = 'r';
   1374         else if (c < ' ' || c >= 0x7f) {
   1375             *p++ = '\\';
   1376             *p++ = 'x';
   1377             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
   1378             *p++ = Py_hexdigits[c & 0xf];
   1379         }
   1380         else
   1381             *p++ = c;
   1382     }
   1383     *p++ = quote;
   1384     assert(_PyUnicode_CheckConsistency(v, 1));
   1385     return v;
   1386 
   1387   overflow:
   1388     PyErr_SetString(PyExc_OverflowError,
   1389                     "bytes object is too large to make repr");
   1390     return NULL;
   1391 }
   1392 
   1393 static PyObject *
   1394 bytes_repr(PyObject *op)
   1395 {
   1396     return PyBytes_Repr(op, 1);
   1397 }
   1398 
   1399 static PyObject *
   1400 bytes_str(PyObject *op)
   1401 {
   1402     if (Py_BytesWarningFlag) {
   1403         if (PyErr_WarnEx(PyExc_BytesWarning,
   1404                          "str() on a bytes instance", 1))
   1405             return NULL;
   1406     }
   1407     return bytes_repr(op);
   1408 }
   1409 
   1410 static Py_ssize_t
   1411 bytes_length(PyBytesObject *a)
   1412 {
   1413     return Py_SIZE(a);
   1414 }
   1415 
   1416 /* This is also used by PyBytes_Concat() */
   1417 static PyObject *
   1418 bytes_concat(PyObject *a, PyObject *b)
   1419 {
   1420     Py_buffer va, vb;
   1421     PyObject *result = NULL;
   1422 
   1423     va.len = -1;
   1424     vb.len = -1;
   1425     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
   1426         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
   1427         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
   1428                      Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
   1429         goto done;
   1430     }
   1431 
   1432     /* Optimize end cases */
   1433     if (va.len == 0 && PyBytes_CheckExact(b)) {
   1434         result = b;
   1435         Py_INCREF(result);
   1436         goto done;
   1437     }
   1438     if (vb.len == 0 && PyBytes_CheckExact(a)) {
   1439         result = a;
   1440         Py_INCREF(result);
   1441         goto done;
   1442     }
   1443 
   1444     if (va.len > PY_SSIZE_T_MAX - vb.len) {
   1445         PyErr_NoMemory();
   1446         goto done;
   1447     }
   1448 
   1449     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
   1450     if (result != NULL) {
   1451         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
   1452         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
   1453     }
   1454 
   1455   done:
   1456     if (va.len != -1)
   1457         PyBuffer_Release(&va);
   1458     if (vb.len != -1)
   1459         PyBuffer_Release(&vb);
   1460     return result;
   1461 }
   1462 
   1463 static PyObject *
   1464 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
   1465 {
   1466     Py_ssize_t i;
   1467     Py_ssize_t j;
   1468     Py_ssize_t size;
   1469     PyBytesObject *op;
   1470     size_t nbytes;
   1471     if (n < 0)
   1472         n = 0;
   1473     /* watch out for overflows:  the size can overflow int,
   1474      * and the # of bytes needed can overflow size_t
   1475      */
   1476     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
   1477         PyErr_SetString(PyExc_OverflowError,
   1478             "repeated bytes are too long");
   1479         return NULL;
   1480     }
   1481     size = Py_SIZE(a) * n;
   1482     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
   1483         Py_INCREF(a);
   1484         return (PyObject *)a;
   1485     }
   1486     nbytes = (size_t)size;
   1487     if (nbytes + PyBytesObject_SIZE <= nbytes) {
   1488         PyErr_SetString(PyExc_OverflowError,
   1489             "repeated bytes are too long");
   1490         return NULL;
   1491     }
   1492     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
   1493     if (op == NULL)
   1494         return PyErr_NoMemory();
   1495     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
   1496     op->ob_shash = -1;
   1497     op->ob_sval[size] = '\0';
   1498     if (Py_SIZE(a) == 1 && n > 0) {
   1499         memset(op->ob_sval, a->ob_sval[0] , n);
   1500         return (PyObject *) op;
   1501     }
   1502     i = 0;
   1503     if (i < size) {
   1504         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
   1505         i = Py_SIZE(a);
   1506     }
   1507     while (i < size) {
   1508         j = (i <= size-i)  ?  i  :  size-i;
   1509         memcpy(op->ob_sval+i, op->ob_sval, j);
   1510         i += j;
   1511     }
   1512     return (PyObject *) op;
   1513 }
   1514 
   1515 static int
   1516 bytes_contains(PyObject *self, PyObject *arg)
   1517 {
   1518     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
   1519 }
   1520 
   1521 static PyObject *
   1522 bytes_item(PyBytesObject *a, Py_ssize_t i)
   1523 {
   1524     if (i < 0 || i >= Py_SIZE(a)) {
   1525         PyErr_SetString(PyExc_IndexError, "index out of range");
   1526         return NULL;
   1527     }
   1528     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
   1529 }
   1530 
   1531 static int
   1532 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
   1533 {
   1534     int cmp;
   1535     Py_ssize_t len;
   1536 
   1537     len = Py_SIZE(a);
   1538     if (Py_SIZE(b) != len)
   1539         return 0;
   1540 
   1541     if (a->ob_sval[0] != b->ob_sval[0])
   1542         return 0;
   1543 
   1544     cmp = memcmp(a->ob_sval, b->ob_sval, len);
   1545     return (cmp == 0);
   1546 }
   1547 
   1548 static PyObject*
   1549 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
   1550 {
   1551     int c;
   1552     Py_ssize_t len_a, len_b;
   1553     Py_ssize_t min_len;
   1554     PyObject *result;
   1555     int rc;
   1556 
   1557     /* Make sure both arguments are strings. */
   1558     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
   1559         if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
   1560             rc = PyObject_IsInstance((PyObject*)a,
   1561                                      (PyObject*)&PyUnicode_Type);
   1562             if (!rc)
   1563                 rc = PyObject_IsInstance((PyObject*)b,
   1564                                          (PyObject*)&PyUnicode_Type);
   1565             if (rc < 0)
   1566                 return NULL;
   1567             if (rc) {
   1568                 if (PyErr_WarnEx(PyExc_BytesWarning,
   1569                                  "Comparison between bytes and string", 1))
   1570                     return NULL;
   1571             }
   1572             else {
   1573                 rc = PyObject_IsInstance((PyObject*)a,
   1574                                          (PyObject*)&PyLong_Type);
   1575                 if (!rc)
   1576                     rc = PyObject_IsInstance((PyObject*)b,
   1577                                              (PyObject*)&PyLong_Type);
   1578                 if (rc < 0)
   1579                     return NULL;
   1580                 if (rc) {
   1581                     if (PyErr_WarnEx(PyExc_BytesWarning,
   1582                                      "Comparison between bytes and int", 1))
   1583                         return NULL;
   1584                 }
   1585             }
   1586         }
   1587         result = Py_NotImplemented;
   1588     }
   1589     else if (a == b) {
   1590         switch (op) {
   1591         case Py_EQ:
   1592         case Py_LE:
   1593         case Py_GE:
   1594             /* a string is equal to itself */
   1595             result = Py_True;
   1596             break;
   1597         case Py_NE:
   1598         case Py_LT:
   1599         case Py_GT:
   1600             result = Py_False;
   1601             break;
   1602         default:
   1603             PyErr_BadArgument();
   1604             return NULL;
   1605         }
   1606     }
   1607     else if (op == Py_EQ || op == Py_NE) {
   1608         int eq = bytes_compare_eq(a, b);
   1609         eq ^= (op == Py_NE);
   1610         result = eq ? Py_True : Py_False;
   1611     }
   1612     else {
   1613         len_a = Py_SIZE(a);
   1614         len_b = Py_SIZE(b);
   1615         min_len = Py_MIN(len_a, len_b);
   1616         if (min_len > 0) {
   1617             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
   1618             if (c == 0)
   1619                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
   1620         }
   1621         else
   1622             c = 0;
   1623         if (c == 0)
   1624             c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
   1625         switch (op) {
   1626         case Py_LT: c = c <  0; break;
   1627         case Py_LE: c = c <= 0; break;
   1628         case Py_GT: c = c >  0; break;
   1629         case Py_GE: c = c >= 0; break;
   1630         default:
   1631             PyErr_BadArgument();
   1632             return NULL;
   1633         }
   1634         result = c ? Py_True : Py_False;
   1635     }
   1636 
   1637     Py_INCREF(result);
   1638     return result;
   1639 }
   1640 
   1641 static Py_hash_t
   1642 bytes_hash(PyBytesObject *a)
   1643 {
   1644     if (a->ob_shash == -1) {
   1645         /* Can't fail */
   1646         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
   1647     }
   1648     return a->ob_shash;
   1649 }
   1650 
   1651 static PyObject*
   1652 bytes_subscript(PyBytesObject* self, PyObject* item)
   1653 {
   1654     if (PyIndex_Check(item)) {
   1655         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
   1656         if (i == -1 && PyErr_Occurred())
   1657             return NULL;
   1658         if (i < 0)
   1659             i += PyBytes_GET_SIZE(self);
   1660         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
   1661             PyErr_SetString(PyExc_IndexError,
   1662                             "index out of range");
   1663             return NULL;
   1664         }
   1665         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
   1666     }
   1667     else if (PySlice_Check(item)) {
   1668         Py_ssize_t start, stop, step, slicelength, cur, i;
   1669         char* source_buf;
   1670         char* result_buf;
   1671         PyObject* result;
   1672 
   1673         if (PySlice_GetIndicesEx(item,
   1674                          PyBytes_GET_SIZE(self),
   1675                          &start, &stop, &step, &slicelength) < 0) {
   1676             return NULL;
   1677         }
   1678 
   1679         if (slicelength <= 0) {
   1680             return PyBytes_FromStringAndSize("", 0);
   1681         }
   1682         else if (start == 0 && step == 1 &&
   1683                  slicelength == PyBytes_GET_SIZE(self) &&
   1684                  PyBytes_CheckExact(self)) {
   1685             Py_INCREF(self);
   1686             return (PyObject *)self;
   1687         }
   1688         else if (step == 1) {
   1689             return PyBytes_FromStringAndSize(
   1690                 PyBytes_AS_STRING(self) + start,
   1691                 slicelength);
   1692         }
   1693         else {
   1694             source_buf = PyBytes_AS_STRING(self);
   1695             result = PyBytes_FromStringAndSize(NULL, slicelength);
   1696             if (result == NULL)
   1697                 return NULL;
   1698 
   1699             result_buf = PyBytes_AS_STRING(result);
   1700             for (cur = start, i = 0; i < slicelength;
   1701                  cur += step, i++) {
   1702                 result_buf[i] = source_buf[cur];
   1703             }
   1704 
   1705             return result;
   1706         }
   1707     }
   1708     else {
   1709         PyErr_Format(PyExc_TypeError,
   1710                      "byte indices must be integers or slices, not %.200s",
   1711                      Py_TYPE(item)->tp_name);
   1712         return NULL;
   1713     }
   1714 }
   1715 
   1716 static int
   1717 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
   1718 {
   1719     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
   1720                              1, flags);
   1721 }
   1722 
   1723 static PySequenceMethods bytes_as_sequence = {
   1724     (lenfunc)bytes_length, /*sq_length*/
   1725     (binaryfunc)bytes_concat, /*sq_concat*/
   1726     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
   1727     (ssizeargfunc)bytes_item, /*sq_item*/
   1728     0,                  /*sq_slice*/
   1729     0,                  /*sq_ass_item*/
   1730     0,                  /*sq_ass_slice*/
   1731     (objobjproc)bytes_contains /*sq_contains*/
   1732 };
   1733 
   1734 static PyMappingMethods bytes_as_mapping = {
   1735     (lenfunc)bytes_length,
   1736     (binaryfunc)bytes_subscript,
   1737     0,
   1738 };
   1739 
   1740 static PyBufferProcs bytes_as_buffer = {
   1741     (getbufferproc)bytes_buffer_getbuffer,
   1742     NULL,
   1743 };
   1744 
   1745 
   1746 #define LEFTSTRIP 0
   1747 #define RIGHTSTRIP 1
   1748 #define BOTHSTRIP 2
   1749 
   1750 /*[clinic input]
   1751 bytes.split
   1752 
   1753     sep: object = None
   1754         The delimiter according which to split the bytes.
   1755         None (the default value) means split on ASCII whitespace characters
   1756         (space, tab, return, newline, formfeed, vertical tab).
   1757     maxsplit: Py_ssize_t = -1
   1758         Maximum number of splits to do.
   1759         -1 (the default value) means no limit.
   1760 
   1761 Return a list of the sections in the bytes, using sep as the delimiter.
   1762 [clinic start generated code]*/
   1763 
   1764 static PyObject *
   1765 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
   1766 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
   1767 {
   1768     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
   1769     const char *s = PyBytes_AS_STRING(self), *sub;
   1770     Py_buffer vsub;
   1771     PyObject *list;
   1772 
   1773     if (maxsplit < 0)
   1774         maxsplit = PY_SSIZE_T_MAX;
   1775     if (sep == Py_None)
   1776         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
   1777     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
   1778         return NULL;
   1779     sub = vsub.buf;
   1780     n = vsub.len;
   1781 
   1782     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
   1783     PyBuffer_Release(&vsub);
   1784     return list;
   1785 }
   1786 
   1787 /*[clinic input]
   1788 bytes.partition
   1789 
   1790     sep: Py_buffer
   1791     /
   1792 
   1793 Partition the bytes into three parts using the given separator.
   1794 
   1795 This will search for the separator sep in the bytes. If the separator is found,
   1796 returns a 3-tuple containing the part before the separator, the separator
   1797 itself, and the part after it.
   1798 
   1799 If the separator is not found, returns a 3-tuple containing the original bytes
   1800 object and two empty bytes objects.
   1801 [clinic start generated code]*/
   1802 
   1803 static PyObject *
   1804 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
   1805 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
   1806 {
   1807     return stringlib_partition(
   1808         (PyObject*) self,
   1809         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
   1810         sep->obj, (const char *)sep->buf, sep->len
   1811         );
   1812 }
   1813 
   1814 /*[clinic input]
   1815 bytes.rpartition
   1816 
   1817     sep: Py_buffer
   1818     /
   1819 
   1820 Partition the bytes into three parts using the given separator.
   1821 
   1822 This will search for the separator sep in the bytes, starting and the end. If
   1823 the separator is found, returns a 3-tuple containing the part before the
   1824 separator, the separator itself, and the part after it.
   1825 
   1826 If the separator is not found, returns a 3-tuple containing two empty bytes
   1827 objects and the original bytes object.
   1828 [clinic start generated code]*/
   1829 
   1830 static PyObject *
   1831 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
   1832 /*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
   1833 {
   1834     return stringlib_rpartition(
   1835         (PyObject*) self,
   1836         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
   1837         sep->obj, (const char *)sep->buf, sep->len
   1838         );
   1839 }
   1840 
   1841 /*[clinic input]
   1842 bytes.rsplit = bytes.split
   1843 
   1844 Return a list of the sections in the bytes, using sep as the delimiter.
   1845 
   1846 Splitting is done starting at the end of the bytes and working to the front.
   1847 [clinic start generated code]*/
   1848 
   1849 static PyObject *
   1850 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
   1851 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
   1852 {
   1853     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
   1854     const char *s = PyBytes_AS_STRING(self), *sub;
   1855     Py_buffer vsub;
   1856     PyObject *list;
   1857 
   1858     if (maxsplit < 0)
   1859         maxsplit = PY_SSIZE_T_MAX;
   1860     if (sep == Py_None)
   1861         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
   1862     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
   1863         return NULL;
   1864     sub = vsub.buf;
   1865     n = vsub.len;
   1866 
   1867     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
   1868     PyBuffer_Release(&vsub);
   1869     return list;
   1870 }
   1871 
   1872 
   1873 /*[clinic input]
   1874 bytes.join
   1875 
   1876     iterable_of_bytes: object
   1877     /
   1878 
   1879 Concatenate any number of bytes objects.
   1880 
   1881 The bytes whose method is called is inserted in between each pair.
   1882 
   1883 The result is returned as a new bytes object.
   1884 
   1885 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
   1886 [clinic start generated code]*/
   1887 
   1888 static PyObject *
   1889 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
   1890 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
   1891 {
   1892     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
   1893 }
   1894 
   1895 PyObject *
   1896 _PyBytes_Join(PyObject *sep, PyObject *x)
   1897 {
   1898     assert(sep != NULL && PyBytes_Check(sep));
   1899     assert(x != NULL);
   1900     return bytes_join((PyBytesObject*)sep, x);
   1901 }
   1902 
   1903 static PyObject *
   1904 bytes_find(PyBytesObject *self, PyObject *args)
   1905 {
   1906     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1907 }
   1908 
   1909 static PyObject *
   1910 bytes_index(PyBytesObject *self, PyObject *args)
   1911 {
   1912     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1913 }
   1914 
   1915 
   1916 static PyObject *
   1917 bytes_rfind(PyBytesObject *self, PyObject *args)
   1918 {
   1919     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1920 }
   1921 
   1922 
   1923 static PyObject *
   1924 bytes_rindex(PyBytesObject *self, PyObject *args)
   1925 {
   1926     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   1927 }
   1928 
   1929 
   1930 Py_LOCAL_INLINE(PyObject *)
   1931 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
   1932 {
   1933     Py_buffer vsep;
   1934     char *s = PyBytes_AS_STRING(self);
   1935     Py_ssize_t len = PyBytes_GET_SIZE(self);
   1936     char *sep;
   1937     Py_ssize_t seplen;
   1938     Py_ssize_t i, j;
   1939 
   1940     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
   1941         return NULL;
   1942     sep = vsep.buf;
   1943     seplen = vsep.len;
   1944 
   1945     i = 0;
   1946     if (striptype != RIGHTSTRIP) {
   1947         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
   1948             i++;
   1949         }
   1950     }
   1951 
   1952     j = len;
   1953     if (striptype != LEFTSTRIP) {
   1954         do {
   1955             j--;
   1956         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
   1957         j++;
   1958     }
   1959 
   1960     PyBuffer_Release(&vsep);
   1961 
   1962     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
   1963         Py_INCREF(self);
   1964         return (PyObject*)self;
   1965     }
   1966     else
   1967         return PyBytes_FromStringAndSize(s+i, j-i);
   1968 }
   1969 
   1970 
   1971 Py_LOCAL_INLINE(PyObject *)
   1972 do_strip(PyBytesObject *self, int striptype)
   1973 {
   1974     char *s = PyBytes_AS_STRING(self);
   1975     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
   1976 
   1977     i = 0;
   1978     if (striptype != RIGHTSTRIP) {
   1979         while (i < len && Py_ISSPACE(s[i])) {
   1980             i++;
   1981         }
   1982     }
   1983 
   1984     j = len;
   1985     if (striptype != LEFTSTRIP) {
   1986         do {
   1987             j--;
   1988         } while (j >= i && Py_ISSPACE(s[j]));
   1989         j++;
   1990     }
   1991 
   1992     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
   1993         Py_INCREF(self);
   1994         return (PyObject*)self;
   1995     }
   1996     else
   1997         return PyBytes_FromStringAndSize(s+i, j-i);
   1998 }
   1999 
   2000 
   2001 Py_LOCAL_INLINE(PyObject *)
   2002 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
   2003 {
   2004     if (bytes != NULL && bytes != Py_None) {
   2005         return do_xstrip(self, striptype, bytes);
   2006     }
   2007     return do_strip(self, striptype);
   2008 }
   2009 
   2010 /*[clinic input]
   2011 bytes.strip
   2012 
   2013     bytes: object = None
   2014     /
   2015 
   2016 Strip leading and trailing bytes contained in the argument.
   2017 
   2018 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
   2019 [clinic start generated code]*/
   2020 
   2021 static PyObject *
   2022 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
   2023 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
   2024 {
   2025     return do_argstrip(self, BOTHSTRIP, bytes);
   2026 }
   2027 
   2028 /*[clinic input]
   2029 bytes.lstrip
   2030 
   2031     bytes: object = None
   2032     /
   2033 
   2034 Strip leading bytes contained in the argument.
   2035 
   2036 If the argument is omitted or None, strip leading  ASCII whitespace.
   2037 [clinic start generated code]*/
   2038 
   2039 static PyObject *
   2040 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
   2041 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
   2042 {
   2043     return do_argstrip(self, LEFTSTRIP, bytes);
   2044 }
   2045 
   2046 /*[clinic input]
   2047 bytes.rstrip
   2048 
   2049     bytes: object = None
   2050     /
   2051 
   2052 Strip trailing bytes contained in the argument.
   2053 
   2054 If the argument is omitted or None, strip trailing ASCII whitespace.
   2055 [clinic start generated code]*/
   2056 
   2057 static PyObject *
   2058 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
   2059 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
   2060 {
   2061     return do_argstrip(self, RIGHTSTRIP, bytes);
   2062 }
   2063 
   2064 
   2065 static PyObject *
   2066 bytes_count(PyBytesObject *self, PyObject *args)
   2067 {
   2068     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   2069 }
   2070 
   2071 
   2072 /*[clinic input]
   2073 bytes.translate
   2074 
   2075     table: object
   2076         Translation table, which must be a bytes object of length 256.
   2077     /
   2078     delete as deletechars: object(c_default="NULL") = b''
   2079 
   2080 Return a copy with each character mapped by the given translation table.
   2081 
   2082 All characters occurring in the optional argument delete are removed.
   2083 The remaining characters are mapped through the given translation table.
   2084 [clinic start generated code]*/
   2085 
   2086 static PyObject *
   2087 bytes_translate_impl(PyBytesObject *self, PyObject *table,
   2088                      PyObject *deletechars)
   2089 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
   2090 {
   2091     char *input, *output;
   2092     Py_buffer table_view = {NULL, NULL};
   2093     Py_buffer del_table_view = {NULL, NULL};
   2094     const char *table_chars;
   2095     Py_ssize_t i, c, changed = 0;
   2096     PyObject *input_obj = (PyObject*)self;
   2097     const char *output_start, *del_table_chars=NULL;
   2098     Py_ssize_t inlen, tablen, dellen = 0;
   2099     PyObject *result;
   2100     int trans_table[256];
   2101 
   2102     if (PyBytes_Check(table)) {
   2103         table_chars = PyBytes_AS_STRING(table);
   2104         tablen = PyBytes_GET_SIZE(table);
   2105     }
   2106     else if (table == Py_None) {
   2107         table_chars = NULL;
   2108         tablen = 256;
   2109     }
   2110     else {
   2111         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
   2112             return NULL;
   2113         table_chars = table_view.buf;
   2114         tablen = table_view.len;
   2115     }
   2116 
   2117     if (tablen != 256) {
   2118         PyErr_SetString(PyExc_ValueError,
   2119           "translation table must be 256 characters long");
   2120         PyBuffer_Release(&table_view);
   2121         return NULL;
   2122     }
   2123 
   2124     if (deletechars != NULL) {
   2125         if (PyBytes_Check(deletechars)) {
   2126             del_table_chars = PyBytes_AS_STRING(deletechars);
   2127             dellen = PyBytes_GET_SIZE(deletechars);
   2128         }
   2129         else {
   2130             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
   2131                 PyBuffer_Release(&table_view);
   2132                 return NULL;
   2133             }
   2134             del_table_chars = del_table_view.buf;
   2135             dellen = del_table_view.len;
   2136         }
   2137     }
   2138     else {
   2139         del_table_chars = NULL;
   2140         dellen = 0;
   2141     }
   2142 
   2143     inlen = PyBytes_GET_SIZE(input_obj);
   2144     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
   2145     if (result == NULL) {
   2146         PyBuffer_Release(&del_table_view);
   2147         PyBuffer_Release(&table_view);
   2148         return NULL;
   2149     }
   2150     output_start = output = PyBytes_AS_STRING(result);
   2151     input = PyBytes_AS_STRING(input_obj);
   2152 
   2153     if (dellen == 0 && table_chars != NULL) {
   2154         /* If no deletions are required, use faster code */
   2155         for (i = inlen; --i >= 0; ) {
   2156             c = Py_CHARMASK(*input++);
   2157             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
   2158                 changed = 1;
   2159         }
   2160         if (!changed && PyBytes_CheckExact(input_obj)) {
   2161             Py_INCREF(input_obj);
   2162             Py_DECREF(result);
   2163             result = input_obj;
   2164         }
   2165         PyBuffer_Release(&del_table_view);
   2166         PyBuffer_Release(&table_view);
   2167         return result;
   2168     }
   2169 
   2170     if (table_chars == NULL) {
   2171         for (i = 0; i < 256; i++)
   2172             trans_table[i] = Py_CHARMASK(i);
   2173     } else {
   2174         for (i = 0; i < 256; i++)
   2175             trans_table[i] = Py_CHARMASK(table_chars[i]);
   2176     }
   2177     PyBuffer_Release(&table_view);
   2178 
   2179     for (i = 0; i < dellen; i++)
   2180         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
   2181     PyBuffer_Release(&del_table_view);
   2182 
   2183     for (i = inlen; --i >= 0; ) {
   2184         c = Py_CHARMASK(*input++);
   2185         if (trans_table[c] != -1)
   2186             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
   2187                 continue;
   2188         changed = 1;
   2189     }
   2190     if (!changed && PyBytes_CheckExact(input_obj)) {
   2191         Py_DECREF(result);
   2192         Py_INCREF(input_obj);
   2193         return input_obj;
   2194     }
   2195     /* Fix the size of the resulting string */
   2196     if (inlen > 0)
   2197         _PyBytes_Resize(&result, output - output_start);
   2198     return result;
   2199 }
   2200 
   2201 
   2202 /*[clinic input]
   2203 
   2204 @staticmethod
   2205 bytes.maketrans
   2206 
   2207     frm: Py_buffer
   2208     to: Py_buffer
   2209     /
   2210 
   2211 Return a translation table useable for the bytes or bytearray translate method.
   2212 
   2213 The returned table will be one where each byte in frm is mapped to the byte at
   2214 the same position in to.
   2215 
   2216 The bytes objects frm and to must be of the same length.
   2217 [clinic start generated code]*/
   2218 
   2219 static PyObject *
   2220 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
   2221 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
   2222 {
   2223     return _Py_bytes_maketrans(frm, to);
   2224 }
   2225 
   2226 
   2227 /*[clinic input]
   2228 bytes.replace
   2229 
   2230     old: Py_buffer
   2231     new: Py_buffer
   2232     count: Py_ssize_t = -1
   2233         Maximum number of occurrences to replace.
   2234         -1 (the default value) means replace all occurrences.
   2235     /
   2236 
   2237 Return a copy with all occurrences of substring old replaced by new.
   2238 
   2239 If the optional argument count is given, only the first count occurrences are
   2240 replaced.
   2241 [clinic start generated code]*/
   2242 
   2243 static PyObject *
   2244 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
   2245                    Py_ssize_t count)
   2246 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
   2247 {
   2248     return stringlib_replace((PyObject *)self,
   2249                              (const char *)old->buf, old->len,
   2250                              (const char *)new->buf, new->len, count);
   2251 }
   2252 
   2253 /** End DALKE **/
   2254 
   2255 
   2256 static PyObject *
   2257 bytes_startswith(PyBytesObject *self, PyObject *args)
   2258 {
   2259     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   2260 }
   2261 
   2262 static PyObject *
   2263 bytes_endswith(PyBytesObject *self, PyObject *args)
   2264 {
   2265     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
   2266 }
   2267 
   2268 
   2269 /*[clinic input]
   2270 bytes.decode
   2271 
   2272     encoding: str(c_default="NULL") = 'utf-8'
   2273         The encoding with which to decode the bytes.
   2274     errors: str(c_default="NULL") = 'strict'
   2275         The error handling scheme to use for the handling of decoding errors.
   2276         The default is 'strict' meaning that decoding errors raise a
   2277         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
   2278         as well as any other name registered with codecs.register_error that
   2279         can handle UnicodeDecodeErrors.
   2280 
   2281 Decode the bytes using the codec registered for encoding.
   2282 [clinic start generated code]*/
   2283 
   2284 static PyObject *
   2285 bytes_decode_impl(PyBytesObject *self, const char *encoding,
   2286                   const char *errors)
   2287 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
   2288 {
   2289     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
   2290 }
   2291 
   2292 
   2293 /*[clinic input]
   2294 bytes.splitlines
   2295 
   2296     keepends: int(c_default="0") = False
   2297 
   2298 Return a list of the lines in the bytes, breaking at line boundaries.
   2299 
   2300 Line breaks are not included in the resulting list unless keepends is given and
   2301 true.
   2302 [clinic start generated code]*/
   2303 
   2304 static PyObject *
   2305 bytes_splitlines_impl(PyBytesObject *self, int keepends)
   2306 /*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
   2307 {
   2308     return stringlib_splitlines(
   2309         (PyObject*) self, PyBytes_AS_STRING(self),
   2310         PyBytes_GET_SIZE(self), keepends
   2311         );
   2312 }
   2313 
   2314 /*[clinic input]
   2315 @classmethod
   2316 bytes.fromhex
   2317 
   2318     string: unicode
   2319     /
   2320 
   2321 Create a bytes object from a string of hexadecimal numbers.
   2322 
   2323 Spaces between two numbers are accepted.
   2324 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
   2325 [clinic start generated code]*/
   2326 
   2327 static PyObject *
   2328 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
   2329 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
   2330 {
   2331     PyObject *result = _PyBytes_FromHex(string, 0);
   2332     if (type != &PyBytes_Type && result != NULL) {
   2333         Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
   2334                                                        result, NULL));
   2335     }
   2336     return result;
   2337 }
   2338 
   2339 PyObject*
   2340 _PyBytes_FromHex(PyObject *string, int use_bytearray)
   2341 {
   2342     char *buf;
   2343     Py_ssize_t hexlen, invalid_char;
   2344     unsigned int top, bot;
   2345     Py_UCS1 *str, *end;
   2346     _PyBytesWriter writer;
   2347 
   2348     _PyBytesWriter_Init(&writer);
   2349     writer.use_bytearray = use_bytearray;
   2350 
   2351     assert(PyUnicode_Check(string));
   2352     if (PyUnicode_READY(string))
   2353         return NULL;
   2354     hexlen = PyUnicode_GET_LENGTH(string);
   2355 
   2356     if (!PyUnicode_IS_ASCII(string)) {
   2357         void *data = PyUnicode_DATA(string);
   2358         unsigned int kind = PyUnicode_KIND(string);
   2359         Py_ssize_t i;
   2360 
   2361         /* search for the first non-ASCII character */
   2362         for (i = 0; i < hexlen; i++) {
   2363             if (PyUnicode_READ(kind, data, i) >= 128)
   2364                 break;
   2365         }
   2366         invalid_char = i;
   2367         goto error;
   2368     }
   2369 
   2370     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
   2371     str = PyUnicode_1BYTE_DATA(string);
   2372 
   2373     /* This overestimates if there are spaces */
   2374     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
   2375     if (buf == NULL)
   2376         return NULL;
   2377 
   2378     end = str + hexlen;
   2379     while (str < end) {
   2380         /* skip over spaces in the input */
   2381         if (*str == ' ') {
   2382             do {
   2383                 str++;
   2384             } while (*str == ' ');
   2385             if (str >= end)
   2386                 break;
   2387         }
   2388 
   2389         top = _PyLong_DigitValue[*str];
   2390         if (top >= 16) {
   2391             invalid_char = str - PyUnicode_1BYTE_DATA(string);
   2392             goto error;
   2393         }
   2394         str++;
   2395 
   2396         bot = _PyLong_DigitValue[*str];
   2397         if (bot >= 16) {
   2398             invalid_char = str - PyUnicode_1BYTE_DATA(string);
   2399             goto error;
   2400         }
   2401         str++;
   2402 
   2403         *buf++ = (unsigned char)((top << 4) + bot);
   2404     }
   2405 
   2406     return _PyBytesWriter_Finish(&writer, buf);
   2407 
   2408   error:
   2409     PyErr_Format(PyExc_ValueError,
   2410                  "non-hexadecimal number found in "
   2411                  "fromhex() arg at position %zd", invalid_char);
   2412     _PyBytesWriter_Dealloc(&writer);
   2413     return NULL;
   2414 }
   2415 
   2416 PyDoc_STRVAR(hex__doc__,
   2417 "B.hex() -> string\n\
   2418 \n\
   2419 Create a string of hexadecimal numbers from a bytes object.\n\
   2420 Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
   2421 
   2422 static PyObject *
   2423 bytes_hex(PyBytesObject *self)
   2424 {
   2425     char* argbuf = PyBytes_AS_STRING(self);
   2426     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
   2427     return _Py_strhex(argbuf, arglen);
   2428 }
   2429 
   2430 static PyObject *
   2431 bytes_getnewargs(PyBytesObject *v)
   2432 {
   2433     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
   2434 }
   2435 
   2436 
   2437 static PyMethodDef
   2438 bytes_methods[] = {
   2439     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
   2440     {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
   2441      _Py_capitalize__doc__},
   2442     {"center", (PyCFunction)stringlib_center, METH_VARARGS,
   2443      _Py_center__doc__},
   2444     {"count", (PyCFunction)bytes_count, METH_VARARGS,
   2445      _Py_count__doc__},
   2446     BYTES_DECODE_METHODDEF
   2447     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
   2448      _Py_endswith__doc__},
   2449     {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
   2450      _Py_expandtabs__doc__},
   2451     {"find", (PyCFunction)bytes_find, METH_VARARGS,
   2452      _Py_find__doc__},
   2453     BYTES_FROMHEX_METHODDEF
   2454     {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
   2455     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
   2456     {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
   2457      _Py_isalnum__doc__},
   2458     {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
   2459      _Py_isalpha__doc__},
   2460     {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
   2461      _Py_isdigit__doc__},
   2462     {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
   2463      _Py_islower__doc__},
   2464     {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
   2465      _Py_isspace__doc__},
   2466     {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
   2467      _Py_istitle__doc__},
   2468     {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
   2469      _Py_isupper__doc__},
   2470     BYTES_JOIN_METHODDEF
   2471     {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
   2472     {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
   2473     BYTES_LSTRIP_METHODDEF
   2474     BYTES_MAKETRANS_METHODDEF
   2475     BYTES_PARTITION_METHODDEF
   2476     BYTES_REPLACE_METHODDEF
   2477     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
   2478     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
   2479     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
   2480     BYTES_RPARTITION_METHODDEF
   2481     BYTES_RSPLIT_METHODDEF
   2482     BYTES_RSTRIP_METHODDEF
   2483     BYTES_SPLIT_METHODDEF
   2484     BYTES_SPLITLINES_METHODDEF
   2485     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
   2486      _Py_startswith__doc__},
   2487     BYTES_STRIP_METHODDEF
   2488     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
   2489      _Py_swapcase__doc__},
   2490     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
   2491     BYTES_TRANSLATE_METHODDEF
   2492     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
   2493     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
   2494     {NULL,     NULL}                         /* sentinel */
   2495 };
   2496 
   2497 static PyObject *
   2498 bytes_mod(PyObject *self, PyObject *arg)
   2499 {
   2500     if (!PyBytes_Check(self)) {
   2501         Py_RETURN_NOTIMPLEMENTED;
   2502     }
   2503     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
   2504                              arg, 0);
   2505 }
   2506 
   2507 static PyNumberMethods bytes_as_number = {
   2508     0,              /*nb_add*/
   2509     0,              /*nb_subtract*/
   2510     0,              /*nb_multiply*/
   2511     bytes_mod,      /*nb_remainder*/
   2512 };
   2513 
   2514 static PyObject *
   2515 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
   2516 
   2517 static PyObject *
   2518 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   2519 {
   2520     PyObject *x = NULL;
   2521     const char *encoding = NULL;
   2522     const char *errors = NULL;
   2523     PyObject *new = NULL;
   2524     PyObject *func;
   2525     Py_ssize_t size;
   2526     static char *kwlist[] = {"source", "encoding", "errors", 0};
   2527     _Py_IDENTIFIER(__bytes__);
   2528 
   2529     if (type != &PyBytes_Type)
   2530         return bytes_subtype_new(type, args, kwds);
   2531     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
   2532                                      &encoding, &errors))
   2533         return NULL;
   2534     if (x == NULL) {
   2535         if (encoding != NULL || errors != NULL) {
   2536             PyErr_SetString(PyExc_TypeError,
   2537                             "encoding or errors without sequence "
   2538                             "argument");
   2539             return NULL;
   2540         }
   2541         return PyBytes_FromStringAndSize(NULL, 0);
   2542     }
   2543 
   2544     if (encoding != NULL) {
   2545         /* Encode via the codec registry */
   2546         if (!PyUnicode_Check(x)) {
   2547             PyErr_SetString(PyExc_TypeError,
   2548                             "encoding without a string argument");
   2549             return NULL;
   2550         }
   2551         new = PyUnicode_AsEncodedString(x, encoding, errors);
   2552         if (new == NULL)
   2553             return NULL;
   2554         assert(PyBytes_Check(new));
   2555         return new;
   2556     }
   2557 
   2558     if (errors != NULL) {
   2559         PyErr_SetString(PyExc_TypeError,
   2560                         PyUnicode_Check(x) ?
   2561                         "string argument without an encoding" :
   2562                         "errors without a string argument");
   2563         return NULL;
   2564     }
   2565 
   2566     /* We'd like to call PyObject_Bytes here, but we need to check for an
   2567        integer argument before deferring to PyBytes_FromObject, something
   2568        PyObject_Bytes doesn't do. */
   2569     func = _PyObject_LookupSpecial(x, &PyId___bytes__);
   2570     if (func != NULL) {
   2571         new = PyObject_CallFunctionObjArgs(func, NULL);
   2572         Py_DECREF(func);
   2573         if (new == NULL)
   2574             return NULL;
   2575         if (!PyBytes_Check(new)) {
   2576             PyErr_Format(PyExc_TypeError,
   2577                          "__bytes__ returned non-bytes (type %.200s)",
   2578                          Py_TYPE(new)->tp_name);
   2579             Py_DECREF(new);
   2580             return NULL;
   2581         }
   2582         return new;
   2583     }
   2584     else if (PyErr_Occurred())
   2585         return NULL;
   2586 
   2587     if (PyUnicode_Check(x)) {
   2588         PyErr_SetString(PyExc_TypeError,
   2589                         "string argument without an encoding");
   2590         return NULL;
   2591     }
   2592     /* Is it an integer? */
   2593     if (PyIndex_Check(x)) {
   2594         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
   2595         if (size == -1 && PyErr_Occurred()) {
   2596             if (PyErr_ExceptionMatches(PyExc_OverflowError))
   2597                 return NULL;
   2598             PyErr_Clear();  /* fall through */
   2599         }
   2600         else {
   2601             if (size < 0) {
   2602                 PyErr_SetString(PyExc_ValueError, "negative count");
   2603                 return NULL;
   2604             }
   2605             new = _PyBytes_FromSize(size, 1);
   2606             if (new == NULL)
   2607                 return NULL;
   2608             return new;
   2609         }
   2610     }
   2611 
   2612     return PyBytes_FromObject(x);
   2613 }
   2614 
   2615 static PyObject*
   2616 _PyBytes_FromBuffer(PyObject *x)
   2617 {
   2618     PyObject *new;
   2619     Py_buffer view;
   2620 
   2621     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
   2622         return NULL;
   2623 
   2624     new = PyBytes_FromStringAndSize(NULL, view.len);
   2625     if (!new)
   2626         goto fail;
   2627     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
   2628                 &view, view.len, 'C') < 0)
   2629         goto fail;
   2630     PyBuffer_Release(&view);
   2631     return new;
   2632 
   2633 fail:
   2634     Py_XDECREF(new);
   2635     PyBuffer_Release(&view);
   2636     return NULL;
   2637 }
   2638 
   2639 #define _PyBytes_FROM_LIST_BODY(x, GET_ITEM)                                \
   2640     do {                                                                    \
   2641         PyObject *bytes;                                                    \
   2642         Py_ssize_t i;                                                       \
   2643         Py_ssize_t value;                                                   \
   2644         char *str;                                                          \
   2645         PyObject *item;                                                     \
   2646                                                                             \
   2647         bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));                \
   2648         if (bytes == NULL)                                                  \
   2649             return NULL;                                                    \
   2650         str = ((PyBytesObject *)bytes)->ob_sval;                            \
   2651                                                                             \
   2652         for (i = 0; i < Py_SIZE(x); i++) {                                  \
   2653             item = GET_ITEM((x), i);                                        \
   2654             value = PyNumber_AsSsize_t(item, NULL);                         \
   2655             if (value == -1 && PyErr_Occurred())                            \
   2656                 goto error;                                                 \
   2657                                                                             \
   2658             if (value < 0 || value >= 256) {                                \
   2659                 PyErr_SetString(PyExc_ValueError,                           \
   2660                                 "bytes must be in range(0, 256)");          \
   2661                 goto error;                                                 \
   2662             }                                                               \
   2663             *str++ = (char) value;                                          \
   2664         }                                                                   \
   2665         return bytes;                                                       \
   2666                                                                             \
   2667     error:                                                                  \
   2668         Py_DECREF(bytes);                                                   \
   2669         return NULL;                                                        \
   2670     } while (0)
   2671 
   2672 static PyObject*
   2673 _PyBytes_FromList(PyObject *x)
   2674 {
   2675     _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
   2676 }
   2677 
   2678 static PyObject*
   2679 _PyBytes_FromTuple(PyObject *x)
   2680 {
   2681     _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
   2682 }
   2683 
   2684 static PyObject *
   2685 _PyBytes_FromIterator(PyObject *it, PyObject *x)
   2686 {
   2687     char *str;
   2688     Py_ssize_t i, size;
   2689     _PyBytesWriter writer;
   2690 
   2691     /* For iterator version, create a string object and resize as needed */
   2692     size = PyObject_LengthHint(x, 64);
   2693     if (size == -1 && PyErr_Occurred())
   2694         return NULL;
   2695 
   2696     _PyBytesWriter_Init(&writer);
   2697     str = _PyBytesWriter_Alloc(&writer, size);
   2698     if (str == NULL)
   2699         return NULL;
   2700     writer.overallocate = 1;
   2701     size = writer.allocated;
   2702 
   2703     /* Run the iterator to exhaustion */
   2704     for (i = 0; ; i++) {
   2705         PyObject *item;
   2706         Py_ssize_t value;
   2707 
   2708         /* Get the next item */
   2709         item = PyIter_Next(it);
   2710         if (item == NULL) {
   2711             if (PyErr_Occurred())
   2712                 goto error;
   2713             break;
   2714         }
   2715 
   2716         /* Interpret it as an int (__index__) */
   2717         value = PyNumber_AsSsize_t(item, NULL);
   2718         Py_DECREF(item);
   2719         if (value == -1 && PyErr_Occurred())
   2720             goto error;
   2721 
   2722         /* Range check */
   2723         if (value < 0 || value >= 256) {
   2724             PyErr_SetString(PyExc_ValueError,
   2725                             "bytes must be in range(0, 256)");
   2726             goto error;
   2727         }
   2728 
   2729         /* Append the byte */
   2730         if (i >= size) {
   2731             str = _PyBytesWriter_Resize(&writer, str, size+1);
   2732             if (str == NULL)
   2733                 return NULL;
   2734             size = writer.allocated;
   2735         }
   2736         *str++ = (char) value;
   2737     }
   2738 
   2739     return _PyBytesWriter_Finish(&writer, str);
   2740 
   2741   error:
   2742     _PyBytesWriter_Dealloc(&writer);
   2743     return NULL;
   2744 }
   2745 
   2746 PyObject *
   2747 PyBytes_FromObject(PyObject *x)
   2748 {
   2749     PyObject *it, *result;
   2750 
   2751     if (x == NULL) {
   2752         PyErr_BadInternalCall();
   2753         return NULL;
   2754     }
   2755 
   2756     if (PyBytes_CheckExact(x)) {
   2757         Py_INCREF(x);
   2758         return x;
   2759     }
   2760 
   2761     /* Use the modern buffer interface */
   2762     if (PyObject_CheckBuffer(x))
   2763         return _PyBytes_FromBuffer(x);
   2764 
   2765     if (PyList_CheckExact(x))
   2766         return _PyBytes_FromList(x);
   2767 
   2768     if (PyTuple_CheckExact(x))
   2769         return _PyBytes_FromTuple(x);
   2770 
   2771     if (!PyUnicode_Check(x)) {
   2772         it = PyObject_GetIter(x);
   2773         if (it != NULL) {
   2774             result = _PyBytes_FromIterator(it, x);
   2775             Py_DECREF(it);
   2776             return result;
   2777         }
   2778     }
   2779 
   2780     PyErr_Format(PyExc_TypeError,
   2781                  "cannot convert '%.200s' object to bytes",
   2782                  x->ob_type->tp_name);
   2783     return NULL;
   2784 }
   2785 
   2786 static PyObject *
   2787 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   2788 {
   2789     PyObject *tmp, *pnew;
   2790     Py_ssize_t n;
   2791 
   2792     assert(PyType_IsSubtype(type, &PyBytes_Type));
   2793     tmp = bytes_new(&PyBytes_Type, args, kwds);
   2794     if (tmp == NULL)
   2795         return NULL;
   2796     assert(PyBytes_Check(tmp));
   2797     n = PyBytes_GET_SIZE(tmp);
   2798     pnew = type->tp_alloc(type, n);
   2799     if (pnew != NULL) {
   2800         memcpy(PyBytes_AS_STRING(pnew),
   2801                   PyBytes_AS_STRING(tmp), n+1);
   2802         ((PyBytesObject *)pnew)->ob_shash =
   2803             ((PyBytesObject *)tmp)->ob_shash;
   2804     }
   2805     Py_DECREF(tmp);
   2806     return pnew;
   2807 }
   2808 
   2809 PyDoc_STRVAR(bytes_doc,
   2810 "bytes(iterable_of_ints) -> bytes\n\
   2811 bytes(string, encoding[, errors]) -> bytes\n\
   2812 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
   2813 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
   2814 bytes() -> empty bytes object\n\
   2815 \n\
   2816 Construct an immutable array of bytes from:\n\
   2817   - an iterable yielding integers in range(256)\n\
   2818   - a text string encoded using the specified encoding\n\
   2819   - any object implementing the buffer API.\n\
   2820   - an integer");
   2821 
   2822 static PyObject *bytes_iter(PyObject *seq);
   2823 
   2824 PyTypeObject PyBytes_Type = {
   2825     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   2826     "bytes",
   2827     PyBytesObject_SIZE,
   2828     sizeof(char),
   2829     bytes_dealloc,                      /* tp_dealloc */
   2830     0,                                          /* tp_print */
   2831     0,                                          /* tp_getattr */
   2832     0,                                          /* tp_setattr */
   2833     0,                                          /* tp_reserved */
   2834     (reprfunc)bytes_repr,                       /* tp_repr */
   2835     &bytes_as_number,                           /* tp_as_number */
   2836     &bytes_as_sequence,                         /* tp_as_sequence */
   2837     &bytes_as_mapping,                          /* tp_as_mapping */
   2838     (hashfunc)bytes_hash,                       /* tp_hash */
   2839     0,                                          /* tp_call */
   2840     bytes_str,                                  /* tp_str */
   2841     PyObject_GenericGetAttr,                    /* tp_getattro */
   2842     0,                                          /* tp_setattro */
   2843     &bytes_as_buffer,                           /* tp_as_buffer */
   2844     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
   2845         Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
   2846     bytes_doc,                                  /* tp_doc */
   2847     0,                                          /* tp_traverse */
   2848     0,                                          /* tp_clear */
   2849     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
   2850     0,                                          /* tp_weaklistoffset */
   2851     bytes_iter,                                 /* tp_iter */
   2852     0,                                          /* tp_iternext */
   2853     bytes_methods,                              /* tp_methods */
   2854     0,                                          /* tp_members */
   2855     0,                                          /* tp_getset */
   2856     &PyBaseObject_Type,                         /* tp_base */
   2857     0,                                          /* tp_dict */
   2858     0,                                          /* tp_descr_get */
   2859     0,                                          /* tp_descr_set */
   2860     0,                                          /* tp_dictoffset */
   2861     0,                                          /* tp_init */
   2862     0,                                          /* tp_alloc */
   2863     bytes_new,                                  /* tp_new */
   2864     PyObject_Del,                               /* tp_free */
   2865 };
   2866 
   2867 void
   2868 PyBytes_Concat(PyObject **pv, PyObject *w)
   2869 {
   2870     assert(pv != NULL);
   2871     if (*pv == NULL)
   2872         return;
   2873     if (w == NULL) {
   2874         Py_CLEAR(*pv);
   2875         return;
   2876     }
   2877 
   2878     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
   2879         /* Only one reference, so we can resize in place */
   2880         Py_ssize_t oldsize;
   2881         Py_buffer wb;
   2882 
   2883         wb.len = -1;
   2884         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
   2885             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
   2886                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
   2887             Py_CLEAR(*pv);
   2888             return;
   2889         }
   2890 
   2891         oldsize = PyBytes_GET_SIZE(*pv);
   2892         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
   2893             PyErr_NoMemory();
   2894             goto error;
   2895         }
   2896         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
   2897             goto error;
   2898 
   2899         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
   2900         PyBuffer_Release(&wb);
   2901         return;
   2902 
   2903       error:
   2904         PyBuffer_Release(&wb);
   2905         Py_CLEAR(*pv);
   2906         return;
   2907     }
   2908 
   2909     else {
   2910         /* Multiple references, need to create new object */
   2911         PyObject *v;
   2912         v = bytes_concat(*pv, w);
   2913         Py_SETREF(*pv, v);
   2914     }
   2915 }
   2916 
   2917 void
   2918 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
   2919 {
   2920     PyBytes_Concat(pv, w);
   2921     Py_XDECREF(w);
   2922 }
   2923 
   2924 
   2925 /* The following function breaks the notion that bytes are immutable:
   2926    it changes the size of a bytes object.  We get away with this only if there
   2927    is only one module referencing the object.  You can also think of it
   2928    as creating a new bytes object and destroying the old one, only
   2929    more efficiently.  In any case, don't use this if the bytes object may
   2930    already be known to some other part of the code...
   2931    Note that if there's not enough memory to resize the bytes object, the
   2932    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
   2933    memory" exception is set, and -1 is returned.  Else (on success) 0 is
   2934    returned, and the value in *pv may or may not be the same as on input.
   2935    As always, an extra byte is allocated for a trailing \0 byte (newsize
   2936    does *not* include that), and a trailing \0 byte is stored.
   2937 */
   2938 
   2939 int
   2940 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
   2941 {
   2942     PyObject *v;
   2943     PyBytesObject *sv;
   2944     v = *pv;
   2945     if (!PyBytes_Check(v) || newsize < 0) {
   2946         goto error;
   2947     }
   2948     if (Py_SIZE(v) == newsize) {
   2949         /* return early if newsize equals to v->ob_size */
   2950         return 0;
   2951     }
   2952     if (Py_REFCNT(v) != 1) {
   2953         goto error;
   2954     }
   2955     /* XXX UNREF/NEWREF interface should be more symmetrical */
   2956     _Py_DEC_REFTOTAL;
   2957     _Py_ForgetReference(v);
   2958     *pv = (PyObject *)
   2959         PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
   2960     if (*pv == NULL) {
   2961         PyObject_Del(v);
   2962         PyErr_NoMemory();
   2963         return -1;
   2964     }
   2965     _Py_NewReference(*pv);
   2966     sv = (PyBytesObject *) *pv;
   2967     Py_SIZE(sv) = newsize;
   2968     sv->ob_sval[newsize] = '\0';
   2969     sv->ob_shash = -1;          /* invalidate cached hash value */
   2970     return 0;
   2971 error:
   2972     *pv = 0;
   2973     Py_DECREF(v);
   2974     PyErr_BadInternalCall();
   2975     return -1;
   2976 }
   2977 
   2978 void
   2979 PyBytes_Fini(void)
   2980 {
   2981     int i;
   2982     for (i = 0; i < UCHAR_MAX + 1; i++)
   2983         Py_CLEAR(characters[i]);
   2984     Py_CLEAR(nullstring);
   2985 }
   2986 
   2987 /*********************** Bytes Iterator ****************************/
   2988 
   2989 typedef struct {
   2990     PyObject_HEAD
   2991     Py_ssize_t it_index;
   2992     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
   2993 } striterobject;
   2994 
   2995 static void
   2996 striter_dealloc(striterobject *it)
   2997 {
   2998     _PyObject_GC_UNTRACK(it);
   2999     Py_XDECREF(it->it_seq);
   3000     PyObject_GC_Del(it);
   3001 }
   3002 
   3003 static int
   3004 striter_traverse(striterobject *it, visitproc visit, void *arg)
   3005 {
   3006     Py_VISIT(it->it_seq);
   3007     return 0;
   3008 }
   3009 
   3010 static PyObject *
   3011 striter_next(striterobject *it)
   3012 {
   3013     PyBytesObject *seq;
   3014     PyObject *item;
   3015 
   3016     assert(it != NULL);
   3017     seq = it->it_seq;
   3018     if (seq == NULL)
   3019         return NULL;
   3020     assert(PyBytes_Check(seq));
   3021 
   3022     if (it->it_index < PyBytes_GET_SIZE(seq)) {
   3023         item = PyLong_FromLong(
   3024             (unsigned char)seq->ob_sval[it->it_index]);
   3025         if (item != NULL)
   3026             ++it->it_index;
   3027         return item;
   3028     }
   3029 
   3030     it->it_seq = NULL;
   3031     Py_DECREF(seq);
   3032     return NULL;
   3033 }
   3034 
   3035 static PyObject *
   3036 striter_len(striterobject *it)
   3037 {
   3038     Py_ssize_t len = 0;
   3039     if (it->it_seq)
   3040         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
   3041     return PyLong_FromSsize_t(len);
   3042 }
   3043 
   3044 PyDoc_STRVAR(length_hint_doc,
   3045              "Private method returning an estimate of len(list(it)).");
   3046 
   3047 static PyObject *
   3048 striter_reduce(striterobject *it)
   3049 {
   3050     if (it->it_seq != NULL) {
   3051         return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
   3052                              it->it_seq, it->it_index);
   3053     } else {
   3054         PyObject *u = PyUnicode_FromUnicode(NULL, 0);
   3055         if (u == NULL)
   3056             return NULL;
   3057         return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
   3058     }
   3059 }
   3060 
   3061 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
   3062 
   3063 static PyObject *
   3064 striter_setstate(striterobject *it, PyObject *state)
   3065 {
   3066     Py_ssize_t index = PyLong_AsSsize_t(state);
   3067     if (index == -1 && PyErr_Occurred())
   3068         return NULL;
   3069     if (it->it_seq != NULL) {
   3070         if (index < 0)
   3071             index = 0;
   3072         else if (index > PyBytes_GET_SIZE(it->it_seq))
   3073             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
   3074         it->it_index = index;
   3075     }
   3076     Py_RETURN_NONE;
   3077 }
   3078 
   3079 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
   3080 
   3081 static PyMethodDef striter_methods[] = {
   3082     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
   3083      length_hint_doc},
   3084     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
   3085      reduce_doc},
   3086     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
   3087      setstate_doc},
   3088     {NULL,              NULL}           /* sentinel */
   3089 };
   3090 
   3091 PyTypeObject PyBytesIter_Type = {
   3092     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   3093     "bytes_iterator",                           /* tp_name */
   3094     sizeof(striterobject),                      /* tp_basicsize */
   3095     0,                                          /* tp_itemsize */
   3096     /* methods */
   3097     (destructor)striter_dealloc,                /* tp_dealloc */
   3098     0,                                          /* tp_print */
   3099     0,                                          /* tp_getattr */
   3100     0,                                          /* tp_setattr */
   3101     0,                                          /* tp_reserved */
   3102     0,                                          /* tp_repr */
   3103     0,                                          /* tp_as_number */
   3104     0,                                          /* tp_as_sequence */
   3105     0,                                          /* tp_as_mapping */
   3106     0,                                          /* tp_hash */
   3107     0,                                          /* tp_call */
   3108     0,                                          /* tp_str */
   3109     PyObject_GenericGetAttr,                    /* tp_getattro */
   3110     0,                                          /* tp_setattro */
   3111     0,                                          /* tp_as_buffer */
   3112     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
   3113     0,                                          /* tp_doc */
   3114     (traverseproc)striter_traverse,     /* tp_traverse */
   3115     0,                                          /* tp_clear */
   3116     0,                                          /* tp_richcompare */
   3117     0,                                          /* tp_weaklistoffset */
   3118     PyObject_SelfIter,                          /* tp_iter */
   3119     (iternextfunc)striter_next,                 /* tp_iternext */
   3120     striter_methods,                            /* tp_methods */
   3121     0,
   3122 };
   3123 
   3124 static PyObject *
   3125 bytes_iter(PyObject *seq)
   3126 {
   3127     striterobject *it;
   3128 
   3129     if (!PyBytes_Check(seq)) {
   3130         PyErr_BadInternalCall();
   3131         return NULL;
   3132     }
   3133     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
   3134     if (it == NULL)
   3135         return NULL;
   3136     it->it_index = 0;
   3137     Py_INCREF(seq);
   3138     it->it_seq = (PyBytesObject *)seq;
   3139     _PyObject_GC_TRACK(it);
   3140     return (PyObject *)it;
   3141 }
   3142 
   3143 
   3144 /* _PyBytesWriter API */
   3145 
   3146 #ifdef MS_WINDOWS
   3147    /* On Windows, overallocate by 50% is the best factor */
   3148 #  define OVERALLOCATE_FACTOR 2
   3149 #else
   3150    /* On Linux, overallocate by 25% is the best factor */
   3151 #  define OVERALLOCATE_FACTOR 4
   3152 #endif
   3153 
   3154 void
   3155 _PyBytesWriter_Init(_PyBytesWriter *writer)
   3156 {
   3157     /* Set all attributes before small_buffer to 0 */
   3158     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
   3159 #ifdef Py_DEBUG
   3160     memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
   3161 #endif
   3162 }
   3163 
   3164 void
   3165 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
   3166 {
   3167     Py_CLEAR(writer->buffer);
   3168 }
   3169 
   3170 Py_LOCAL_INLINE(char*)
   3171 _PyBytesWriter_AsString(_PyBytesWriter *writer)
   3172 {
   3173     if (writer->use_small_buffer) {
   3174         assert(writer->buffer == NULL);
   3175         return writer->small_buffer;
   3176     }
   3177     else if (writer->use_bytearray) {
   3178         assert(writer->buffer != NULL);
   3179         return PyByteArray_AS_STRING(writer->buffer);
   3180     }
   3181     else {
   3182         assert(writer->buffer != NULL);
   3183         return PyBytes_AS_STRING(writer->buffer);
   3184     }
   3185 }
   3186 
   3187 Py_LOCAL_INLINE(Py_ssize_t)
   3188 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
   3189 {
   3190     char *start = _PyBytesWriter_AsString(writer);
   3191     assert(str != NULL);
   3192     assert(str >= start);
   3193     assert(str - start <= writer->allocated);
   3194     return str - start;
   3195 }
   3196 
   3197 Py_LOCAL_INLINE(void)
   3198 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
   3199 {
   3200 #ifdef Py_DEBUG
   3201     char *start, *end;
   3202 
   3203     if (writer->use_small_buffer) {
   3204         assert(writer->buffer == NULL);
   3205     }
   3206     else {
   3207         assert(writer->buffer != NULL);
   3208         if (writer->use_bytearray)
   3209             assert(PyByteArray_CheckExact(writer->buffer));
   3210         else
   3211             assert(PyBytes_CheckExact(writer->buffer));
   3212         assert(Py_REFCNT(writer->buffer) == 1);
   3213     }
   3214 
   3215     if (writer->use_bytearray) {
   3216         /* bytearray has its own overallocation algorithm,
   3217            writer overallocation must be disabled */
   3218         assert(!writer->overallocate);
   3219     }
   3220 
   3221     assert(0 <= writer->allocated);
   3222     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
   3223     /* the last byte must always be null */
   3224     start = _PyBytesWriter_AsString(writer);
   3225     assert(start[writer->allocated] == 0);
   3226 
   3227     end = start + writer->allocated;
   3228     assert(str != NULL);
   3229     assert(start <= str && str <= end);
   3230 #endif
   3231 }
   3232 
   3233 void*
   3234 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
   3235 {
   3236     Py_ssize_t allocated, pos;
   3237 
   3238     _PyBytesWriter_CheckConsistency(writer, str);
   3239     assert(writer->allocated < size);
   3240 
   3241     allocated = size;
   3242     if (writer->overallocate
   3243         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
   3244         /* overallocate to limit the number of realloc() */
   3245         allocated += allocated / OVERALLOCATE_FACTOR;
   3246     }
   3247 
   3248     pos = _PyBytesWriter_GetSize(writer, str);
   3249     if (!writer->use_small_buffer) {
   3250         if (writer->use_bytearray) {
   3251             if (PyByteArray_Resize(writer->buffer, allocated))
   3252                 goto error;
   3253             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
   3254                but we cannot use ob_alloc because bytes may need to be moved
   3255                to use the whole buffer. bytearray uses an internal optimization
   3256                to avoid moving or copying bytes when bytes are removed at the
   3257                beginning (ex: del bytearray[:1]). */
   3258         }
   3259         else {
   3260             if (_PyBytes_Resize(&writer->buffer, allocated))
   3261                 goto error;
   3262         }
   3263     }
   3264     else {
   3265         /* convert from stack buffer to bytes object buffer */
   3266         assert(writer->buffer == NULL);
   3267 
   3268         if (writer->use_bytearray)
   3269             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
   3270         else
   3271             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
   3272         if (writer->buffer == NULL)
   3273             goto error;
   3274 
   3275         if (pos != 0) {
   3276             char *dest;
   3277             if (writer->use_bytearray)
   3278                 dest = PyByteArray_AS_STRING(writer->buffer);
   3279             else
   3280                 dest = PyBytes_AS_STRING(writer->buffer);
   3281             memcpy(dest,
   3282                       writer->small_buffer,
   3283                       pos);
   3284         }
   3285 
   3286         writer->use_small_buffer = 0;
   3287 #ifdef Py_DEBUG
   3288         memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
   3289 #endif
   3290     }
   3291     writer->allocated = allocated;
   3292 
   3293     str = _PyBytesWriter_AsString(writer) + pos;
   3294     _PyBytesWriter_CheckConsistency(writer, str);
   3295     return str;
   3296 
   3297 error:
   3298     _PyBytesWriter_Dealloc(writer);
   3299     return NULL;
   3300 }
   3301 
   3302 void*
   3303 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
   3304 {
   3305     Py_ssize_t new_min_size;
   3306 
   3307     _PyBytesWriter_CheckConsistency(writer, str);
   3308     assert(size >= 0);
   3309 
   3310     if (size == 0) {
   3311         /* nothing to do */
   3312         return str;
   3313     }
   3314 
   3315     if (writer->min_size > PY_SSIZE_T_MAX - size) {
   3316         PyErr_NoMemory();
   3317         _PyBytesWriter_Dealloc(writer);
   3318         return NULL;
   3319     }
   3320     new_min_size = writer->min_size + size;
   3321 
   3322     if (new_min_size > writer->allocated)
   3323         str = _PyBytesWriter_Resize(writer, str, new_min_size);
   3324 
   3325     writer->min_size = new_min_size;
   3326     return str;
   3327 }
   3328 
   3329 /* Allocate the buffer to write size bytes.
   3330    Return the pointer to the beginning of buffer data.
   3331    Raise an exception and return NULL on error. */
   3332 void*
   3333 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
   3334 {
   3335     /* ensure that _PyBytesWriter_Alloc() is only called once */
   3336     assert(writer->min_size == 0 && writer->buffer == NULL);
   3337     assert(size >= 0);
   3338 
   3339     writer->use_small_buffer = 1;
   3340 #ifdef Py_DEBUG
   3341     writer->allocated = sizeof(writer->small_buffer) - 1;
   3342     /* In debug mode, don't use the full small buffer because it is less
   3343        efficient than bytes and bytearray objects to detect buffer underflow
   3344        and buffer overflow. Use 10 bytes of the small buffer to test also
   3345        code using the smaller buffer in debug mode.
   3346 
   3347        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
   3348        in debug mode to also be able to detect stack overflow when running
   3349        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
   3350        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
   3351        stack overflow. */
   3352     writer->allocated = Py_MIN(writer->allocated, 10);
   3353     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
   3354        to detect buffer overflow */
   3355     writer->small_buffer[writer->allocated] = 0;
   3356 #else
   3357     writer->allocated = sizeof(writer->small_buffer);
   3358 #endif
   3359     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
   3360 }
   3361 
   3362 PyObject *
   3363 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
   3364 {
   3365     Py_ssize_t size;
   3366     PyObject *result;
   3367 
   3368     _PyBytesWriter_CheckConsistency(writer, str);
   3369 
   3370     size = _PyBytesWriter_GetSize(writer, str);
   3371     if (size == 0 && !writer->use_bytearray) {
   3372         Py_CLEAR(writer->buffer);
   3373         /* Get the empty byte string singleton */
   3374         result = PyBytes_FromStringAndSize(NULL, 0);
   3375     }
   3376     else if (writer->use_small_buffer) {
   3377         if (writer->use_bytearray) {
   3378             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
   3379         }
   3380         else {
   3381             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
   3382         }
   3383     }
   3384     else {
   3385         result = writer->buffer;
   3386         writer->buffer = NULL;
   3387 
   3388         if (size != writer->allocated) {
   3389             if (writer->use_bytearray) {
   3390                 if (PyByteArray_Resize(result, size)) {
   3391                     Py_DECREF(result);
   3392                     return NULL;
   3393                 }
   3394             }
   3395             else {
   3396                 if (_PyBytes_Resize(&result, size)) {
   3397                     assert(result == NULL);
   3398                     return NULL;
   3399                 }
   3400             }
   3401         }
   3402     }
   3403     return result;
   3404 }
   3405 
   3406 void*
   3407 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
   3408                           const void *bytes, Py_ssize_t size)
   3409 {
   3410     char *str = (char *)ptr;
   3411 
   3412     str = _PyBytesWriter_Prepare(writer, str, size);
   3413     if (str == NULL)
   3414         return NULL;
   3415 
   3416     memcpy(str, bytes, size);
   3417     str += size;
   3418 
   3419     return str;
   3420 }
   3421