Home | History | Annotate | Download | only in Objects
      1 /* String (str/bytes) object implementation */
      2 
      3 #define PY_SSIZE_T_CLEAN
      4 
      5 #include "Python.h"
      6 #include <ctype.h>
      7 #include <stddef.h>
      8 
      9 #ifdef COUNT_ALLOCS
     10 Py_ssize_t null_strings, one_strings;
     11 #endif
     12 
     13 static PyStringObject *characters[UCHAR_MAX + 1];
     14 static PyStringObject *nullstring;
     15 
     16 /* This dictionary holds all interned strings.  Note that references to
     17    strings in this dictionary are *not* counted in the string's ob_refcnt.
     18    When the interned string reaches a refcnt of 0 the string deallocation
     19    function will delete the reference from this dictionary.
     20 
     21    Another way to look at this is that to say that the actual reference
     22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
     23 */
     24 static PyObject *interned;
     25 
     26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
     27    for a string of length n should request PyStringObject_SIZE + n bytes.
     28 
     29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
     30    3 bytes per string allocation on a typical system.
     31 */
     32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
     33 
     34 /*
     35    For PyString_FromString(), the parameter `str' points to a null-terminated
     36    string containing exactly `size' bytes.
     37 
     38    For PyString_FromStringAndSize(), the parameter the parameter `str' is
     39    either NULL or else points to a string containing at least `size' bytes.
     40    For PyString_FromStringAndSize(), the string in the `str' parameter does
     41    not have to be null-terminated.  (Therefore it is safe to construct a
     42    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
     43    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
     44    bytes (setting the last byte to the null terminating character) and you can
     45    fill in the data yourself.  If `str' is non-NULL then the resulting
     46    PyString object must be treated as immutable and you must not fill in nor
     47    alter the data yourself, since the strings may be shared.
     48 
     49    The PyObject member `op->ob_size', which denotes the number of "extra
     50    items" in a variable-size object, will contain the number of bytes
     51    allocated for string data, not counting the null terminating character.
     52    It is therefore equal to the `size' parameter (for
     53    PyString_FromStringAndSize()) or the length of the string in the `str'
     54    parameter (for PyString_FromString()).
     55 */
     56 PyObject *
     57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
     58 {
     59     register PyStringObject *op;
     60     if (size < 0) {
     61         PyErr_SetString(PyExc_SystemError,
     62             "Negative size passed to PyString_FromStringAndSize");
     63         return NULL;
     64     }
     65     if (size == 0 && (op = nullstring) != NULL) {
     66 #ifdef COUNT_ALLOCS
     67         null_strings++;
     68 #endif
     69         Py_INCREF(op);
     70         return (PyObject *)op;
     71     }
     72     if (size == 1 && str != NULL &&
     73         (op = characters[*str & UCHAR_MAX]) != NULL)
     74     {
     75 #ifdef COUNT_ALLOCS
     76         one_strings++;
     77 #endif
     78         Py_INCREF(op);
     79         return (PyObject *)op;
     80     }
     81 
     82     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
     83         PyErr_SetString(PyExc_OverflowError, "string is too large");
     84         return NULL;
     85     }
     86 
     87     /* Inline PyObject_NewVar */
     88     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
     89     if (op == NULL)
     90         return PyErr_NoMemory();
     91     PyObject_INIT_VAR(op, &PyString_Type, size);
     92     op->ob_shash = -1;
     93     op->ob_sstate = SSTATE_NOT_INTERNED;
     94     if (str != NULL)
     95         Py_MEMCPY(op->ob_sval, str, size);
     96     op->ob_sval[size] = '\0';
     97     /* share short strings */
     98     if (size == 0) {
     99         PyObject *t = (PyObject *)op;
    100         PyString_InternInPlace(&t);
    101         op = (PyStringObject *)t;
    102         nullstring = op;
    103         Py_INCREF(op);
    104     } else if (size == 1 && str != NULL) {
    105         PyObject *t = (PyObject *)op;
    106         PyString_InternInPlace(&t);
    107         op = (PyStringObject *)t;
    108         characters[*str & UCHAR_MAX] = op;
    109         Py_INCREF(op);
    110     }
    111     return (PyObject *) op;
    112 }
    113 
    114 PyObject *
    115 PyString_FromString(const char *str)
    116 {
    117     register size_t size;
    118     register PyStringObject *op;
    119 
    120     assert(str != NULL);
    121     size = strlen(str);
    122     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
    123         PyErr_SetString(PyExc_OverflowError,
    124             "string is too long for a Python string");
    125         return NULL;
    126     }
    127     if (size == 0 && (op = nullstring) != NULL) {
    128 #ifdef COUNT_ALLOCS
    129         null_strings++;
    130 #endif
    131         Py_INCREF(op);
    132         return (PyObject *)op;
    133     }
    134     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
    135 #ifdef COUNT_ALLOCS
    136         one_strings++;
    137 #endif
    138         Py_INCREF(op);
    139         return (PyObject *)op;
    140     }
    141 
    142     /* Inline PyObject_NewVar */
    143     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
    144     if (op == NULL)
    145         return PyErr_NoMemory();
    146     PyObject_INIT_VAR(op, &PyString_Type, size);
    147     op->ob_shash = -1;
    148     op->ob_sstate = SSTATE_NOT_INTERNED;
    149     Py_MEMCPY(op->ob_sval, str, size+1);
    150     /* share short strings */
    151     if (size == 0) {
    152         PyObject *t = (PyObject *)op;
    153         PyString_InternInPlace(&t);
    154         op = (PyStringObject *)t;
    155         nullstring = op;
    156         Py_INCREF(op);
    157     } else if (size == 1) {
    158         PyObject *t = (PyObject *)op;
    159         PyString_InternInPlace(&t);
    160         op = (PyStringObject *)t;
    161         characters[*str & UCHAR_MAX] = op;
    162         Py_INCREF(op);
    163     }
    164     return (PyObject *) op;
    165 }
    166 
    167 PyObject *
    168 PyString_FromFormatV(const char *format, va_list vargs)
    169 {
    170     va_list count;
    171     Py_ssize_t n = 0;
    172     const char* f;
    173     char *s;
    174     PyObject* string;
    175 
    176 #ifdef VA_LIST_IS_ARRAY
    177     Py_MEMCPY(count, vargs, sizeof(va_list));
    178 #else
    179 #ifdef  __va_copy
    180     __va_copy(count, vargs);
    181 #else
    182     count = vargs;
    183 #endif
    184 #endif
    185     /* step 1: figure out how large a buffer we need */
    186     for (f = format; *f; f++) {
    187         if (*f == '%') {
    188 #ifdef HAVE_LONG_LONG
    189             int longlongflag = 0;
    190 #endif
    191             const char* p = f;
    192             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
    193                 ;
    194 
    195             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
    196              * they don't affect the amount of space we reserve.
    197              */
    198             if (*f == 'l') {
    199                 if (f[1] == 'd' || f[1] == 'u') {
    200                     ++f;
    201                 }
    202 #ifdef HAVE_LONG_LONG
    203                 else if (f[1] == 'l' &&
    204                          (f[2] == 'd' || f[2] == 'u')) {
    205                     longlongflag = 1;
    206                     f += 2;
    207                 }
    208 #endif
    209             }
    210             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
    211                 ++f;
    212             }
    213 
    214             switch (*f) {
    215             case 'c':
    216                 (void)va_arg(count, int);
    217                 /* fall through... */
    218             case '%':
    219                 n++;
    220                 break;
    221             case 'd': case 'u': case 'i': case 'x':
    222                 (void) va_arg(count, int);
    223 #ifdef HAVE_LONG_LONG
    224                 /* Need at most
    225                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
    226                    plus 1 for the sign.  53/22 is an upper
    227                    bound for log10(256). */
    228                 if (longlongflag)
    229                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
    230                 else
    231 #endif
    232                     /* 20 bytes is enough to hold a 64-bit
    233                        integer.  Decimal takes the most
    234                        space.  This isn't enough for
    235                        octal. */
    236                     n += 20;
    237 
    238                 break;
    239             case 's':
    240                 s = va_arg(count, char*);
    241                 n += strlen(s);
    242                 break;
    243             case 'p':
    244                 (void) va_arg(count, int);
    245                 /* maximum 64-bit pointer representation:
    246                  * 0xffffffffffffffff
    247                  * so 19 characters is enough.
    248                  * XXX I count 18 -- what's the extra for?
    249                  */
    250                 n += 19;
    251                 break;
    252             default:
    253                 /* if we stumble upon an unknown
    254                    formatting code, copy the rest of
    255                    the format string to the output
    256                    string. (we cannot just skip the
    257                    code, since there's no way to know
    258                    what's in the argument list) */
    259                 n += strlen(p);
    260                 goto expand;
    261             }
    262         } else
    263             n++;
    264     }
    265  expand:
    266     /* step 2: fill the buffer */
    267     /* Since we've analyzed how much space we need for the worst case,
    268        use sprintf directly instead of the slower PyOS_snprintf. */
    269     string = PyString_FromStringAndSize(NULL, n);
    270     if (!string)
    271         return NULL;
    272 
    273     s = PyString_AsString(string);
    274 
    275     for (f = format; *f; f++) {
    276         if (*f == '%') {
    277             const char* p = f++;
    278             Py_ssize_t i;
    279             int longflag = 0;
    280 #ifdef HAVE_LONG_LONG
    281             int longlongflag = 0;
    282 #endif
    283             int size_tflag = 0;
    284             /* parse the width.precision part (we're only
    285                interested in the precision value, if any) */
    286             n = 0;
    287             while (isdigit(Py_CHARMASK(*f)))
    288                 n = (n*10) + *f++ - '0';
    289             if (*f == '.') {
    290                 f++;
    291                 n = 0;
    292                 while (isdigit(Py_CHARMASK(*f)))
    293                     n = (n*10) + *f++ - '0';
    294             }
    295             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
    296                 f++;
    297             /* Handle %ld, %lu, %lld and %llu. */
    298             if (*f == 'l') {
    299                 if (f[1] == 'd' || f[1] == 'u') {
    300                     longflag = 1;
    301                     ++f;
    302                 }
    303 #ifdef HAVE_LONG_LONG
    304                 else if (f[1] == 'l' &&
    305                          (f[2] == 'd' || f[2] == 'u')) {
    306                     longlongflag = 1;
    307                     f += 2;
    308                 }
    309 #endif
    310             }
    311             /* handle the size_t flag. */
    312             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
    313                 size_tflag = 1;
    314                 ++f;
    315             }
    316 
    317             switch (*f) {
    318             case 'c':
    319                 *s++ = va_arg(vargs, int);
    320                 break;
    321             case 'd':
    322                 if (longflag)
    323                     sprintf(s, "%ld", va_arg(vargs, long));
    324 #ifdef HAVE_LONG_LONG
    325                 else if (longlongflag)
    326                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
    327                         va_arg(vargs, PY_LONG_LONG));
    328 #endif
    329                 else if (size_tflag)
    330                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
    331                         va_arg(vargs, Py_ssize_t));
    332                 else
    333                     sprintf(s, "%d", va_arg(vargs, int));
    334                 s += strlen(s);
    335                 break;
    336             case 'u':
    337                 if (longflag)
    338                     sprintf(s, "%lu",
    339                         va_arg(vargs, unsigned long));
    340 #ifdef HAVE_LONG_LONG
    341                 else if (longlongflag)
    342                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
    343                         va_arg(vargs, PY_LONG_LONG));
    344 #endif
    345                 else if (size_tflag)
    346                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
    347                         va_arg(vargs, size_t));
    348                 else
    349                     sprintf(s, "%u",
    350                         va_arg(vargs, unsigned int));
    351                 s += strlen(s);
    352                 break;
    353             case 'i':
    354                 sprintf(s, "%i", va_arg(vargs, int));
    355                 s += strlen(s);
    356                 break;
    357             case 'x':
    358                 sprintf(s, "%x", va_arg(vargs, int));
    359                 s += strlen(s);
    360                 break;
    361             case 's':
    362                 p = va_arg(vargs, char*);
    363                 i = strlen(p);
    364                 if (n > 0 && i > n)
    365                     i = n;
    366                 Py_MEMCPY(s, p, i);
    367                 s += i;
    368                 break;
    369             case 'p':
    370                 sprintf(s, "%p", va_arg(vargs, void*));
    371                 /* %p is ill-defined:  ensure leading 0x. */
    372                 if (s[1] == 'X')
    373                     s[1] = 'x';
    374                 else if (s[1] != 'x') {
    375                     memmove(s+2, s, strlen(s)+1);
    376                     s[0] = '0';
    377                     s[1] = 'x';
    378                 }
    379                 s += strlen(s);
    380                 break;
    381             case '%':
    382                 *s++ = '%';
    383                 break;
    384             default:
    385                 strcpy(s, p);
    386                 s += strlen(s);
    387                 goto end;
    388             }
    389         } else
    390             *s++ = *f;
    391     }
    392 
    393  end:
    394     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
    395         return NULL;
    396     return string;
    397 }
    398 
    399 PyObject *
    400 PyString_FromFormat(const char *format, ...)
    401 {
    402     PyObject* ret;
    403     va_list vargs;
    404 
    405 #ifdef HAVE_STDARG_PROTOTYPES
    406     va_start(vargs, format);
    407 #else
    408     va_start(vargs);
    409 #endif
    410     ret = PyString_FromFormatV(format, vargs);
    411     va_end(vargs);
    412     return ret;
    413 }
    414 
    415 
    416 PyObject *PyString_Decode(const char *s,
    417                           Py_ssize_t size,
    418                           const char *encoding,
    419                           const char *errors)
    420 {
    421     PyObject *v, *str;
    422 
    423     str = PyString_FromStringAndSize(s, size);
    424     if (str == NULL)
    425         return NULL;
    426     v = PyString_AsDecodedString(str, encoding, errors);
    427     Py_DECREF(str);
    428     return v;
    429 }
    430 
    431 PyObject *PyString_AsDecodedObject(PyObject *str,
    432                                    const char *encoding,
    433                                    const char *errors)
    434 {
    435     PyObject *v;
    436 
    437     if (!PyString_Check(str)) {
    438         PyErr_BadArgument();
    439         goto onError;
    440     }
    441 
    442     if (encoding == NULL) {
    443 #ifdef Py_USING_UNICODE
    444         encoding = PyUnicode_GetDefaultEncoding();
    445 #else
    446         PyErr_SetString(PyExc_ValueError, "no encoding specified");
    447         goto onError;
    448 #endif
    449     }
    450 
    451     /* Decode via the codec registry */
    452     v = PyCodec_Decode(str, encoding, errors);
    453     if (v == NULL)
    454         goto onError;
    455 
    456     return v;
    457 
    458  onError:
    459     return NULL;
    460 }
    461 
    462 PyObject *PyString_AsDecodedString(PyObject *str,
    463                                    const char *encoding,
    464                                    const char *errors)
    465 {
    466     PyObject *v;
    467 
    468     v = PyString_AsDecodedObject(str, encoding, errors);
    469     if (v == NULL)
    470         goto onError;
    471 
    472 #ifdef Py_USING_UNICODE
    473     /* Convert Unicode to a string using the default encoding */
    474     if (PyUnicode_Check(v)) {
    475         PyObject *temp = v;
    476         v = PyUnicode_AsEncodedString(v, NULL, NULL);
    477         Py_DECREF(temp);
    478         if (v == NULL)
    479             goto onError;
    480     }
    481 #endif
    482     if (!PyString_Check(v)) {
    483         PyErr_Format(PyExc_TypeError,
    484                      "decoder did not return a string object (type=%.400s)",
    485                      Py_TYPE(v)->tp_name);
    486         Py_DECREF(v);
    487         goto onError;
    488     }
    489 
    490     return v;
    491 
    492  onError:
    493     return NULL;
    494 }
    495 
    496 PyObject *PyString_Encode(const char *s,
    497                           Py_ssize_t size,
    498                           const char *encoding,
    499                           const char *errors)
    500 {
    501     PyObject *v, *str;
    502 
    503     str = PyString_FromStringAndSize(s, size);
    504     if (str == NULL)
    505         return NULL;
    506     v = PyString_AsEncodedString(str, encoding, errors);
    507     Py_DECREF(str);
    508     return v;
    509 }
    510 
    511 PyObject *PyString_AsEncodedObject(PyObject *str,
    512                                    const char *encoding,
    513                                    const char *errors)
    514 {
    515     PyObject *v;
    516 
    517     if (!PyString_Check(str)) {
    518         PyErr_BadArgument();
    519         goto onError;
    520     }
    521 
    522     if (encoding == NULL) {
    523 #ifdef Py_USING_UNICODE
    524         encoding = PyUnicode_GetDefaultEncoding();
    525 #else
    526         PyErr_SetString(PyExc_ValueError, "no encoding specified");
    527         goto onError;
    528 #endif
    529     }
    530 
    531     /* Encode via the codec registry */
    532     v = PyCodec_Encode(str, encoding, errors);
    533     if (v == NULL)
    534         goto onError;
    535 
    536     return v;
    537 
    538  onError:
    539     return NULL;
    540 }
    541 
    542 PyObject *PyString_AsEncodedString(PyObject *str,
    543                                    const char *encoding,
    544                                    const char *errors)
    545 {
    546     PyObject *v;
    547 
    548     v = PyString_AsEncodedObject(str, encoding, errors);
    549     if (v == NULL)
    550         goto onError;
    551 
    552 #ifdef Py_USING_UNICODE
    553     /* Convert Unicode to a string using the default encoding */
    554     if (PyUnicode_Check(v)) {
    555         PyObject *temp = v;
    556         v = PyUnicode_AsEncodedString(v, NULL, NULL);
    557         Py_DECREF(temp);
    558         if (v == NULL)
    559             goto onError;
    560     }
    561 #endif
    562     if (!PyString_Check(v)) {
    563         PyErr_Format(PyExc_TypeError,
    564                      "encoder did not return a string object (type=%.400s)",
    565                      Py_TYPE(v)->tp_name);
    566         Py_DECREF(v);
    567         goto onError;
    568     }
    569 
    570     return v;
    571 
    572  onError:
    573     return NULL;
    574 }
    575 
    576 static void
    577 string_dealloc(PyObject *op)
    578 {
    579     switch (PyString_CHECK_INTERNED(op)) {
    580         case SSTATE_NOT_INTERNED:
    581             break;
    582 
    583         case SSTATE_INTERNED_MORTAL:
    584             /* revive dead object temporarily for DelItem */
    585             Py_REFCNT(op) = 3;
    586             if (PyDict_DelItem(interned, op) != 0)
    587                 Py_FatalError(
    588                     "deletion of interned string failed");
    589             break;
    590 
    591         case SSTATE_INTERNED_IMMORTAL:
    592             Py_FatalError("Immortal interned string died.");
    593 
    594         default:
    595             Py_FatalError("Inconsistent interned string state.");
    596     }
    597     Py_TYPE(op)->tp_free(op);
    598 }
    599 
    600 /* Unescape a backslash-escaped string. If unicode is non-zero,
    601    the string is a u-literal. If recode_encoding is non-zero,
    602    the string is UTF-8 encoded and should be re-encoded in the
    603    specified encoding.  */
    604 
    605 PyObject *PyString_DecodeEscape(const char *s,
    606                                 Py_ssize_t len,
    607                                 const char *errors,
    608                                 Py_ssize_t unicode,
    609                                 const char *recode_encoding)
    610 {
    611     int c;
    612     char *p, *buf;
    613     const char *end;
    614     PyObject *v;
    615     Py_ssize_t newlen = recode_encoding ? 4*len:len;
    616     v = PyString_FromStringAndSize((char *)NULL, newlen);
    617     if (v == NULL)
    618         return NULL;
    619     p = buf = PyString_AsString(v);
    620     end = s + len;
    621     while (s < end) {
    622         if (*s != '\\') {
    623           non_esc:
    624 #ifdef Py_USING_UNICODE
    625             if (recode_encoding && (*s & 0x80)) {
    626                 PyObject *u, *w;
    627                 char *r;
    628                 const char* t;
    629                 Py_ssize_t rn;
    630                 t = s;
    631                 /* Decode non-ASCII bytes as UTF-8. */
    632                 while (t < end && (*t & 0x80)) t++;
    633                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
    634                 if(!u) goto failed;
    635 
    636                 /* Recode them in target encoding. */
    637                 w = PyUnicode_AsEncodedString(
    638                     u, recode_encoding, errors);
    639                 Py_DECREF(u);
    640                 if (!w)                 goto failed;
    641 
    642                 /* Append bytes to output buffer. */
    643                 assert(PyString_Check(w));
    644                 r = PyString_AS_STRING(w);
    645                 rn = PyString_GET_SIZE(w);
    646                 Py_MEMCPY(p, r, rn);
    647                 p += rn;
    648                 Py_DECREF(w);
    649                 s = t;
    650             } else {
    651                 *p++ = *s++;
    652             }
    653 #else
    654             *p++ = *s++;
    655 #endif
    656             continue;
    657         }
    658         s++;
    659         if (s==end) {
    660             PyErr_SetString(PyExc_ValueError,
    661                             "Trailing \\ in string");
    662             goto failed;
    663         }
    664         switch (*s++) {
    665         /* XXX This assumes ASCII! */
    666         case '\n': break;
    667         case '\\': *p++ = '\\'; break;
    668         case '\'': *p++ = '\''; break;
    669         case '\"': *p++ = '\"'; break;
    670         case 'b': *p++ = '\b'; break;
    671         case 'f': *p++ = '\014'; break; /* FF */
    672         case 't': *p++ = '\t'; break;
    673         case 'n': *p++ = '\n'; break;
    674         case 'r': *p++ = '\r'; break;
    675         case 'v': *p++ = '\013'; break; /* VT */
    676         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
    677         case '0': case '1': case '2': case '3':
    678         case '4': case '5': case '6': case '7':
    679             c = s[-1] - '0';
    680             if (s < end && '0' <= *s && *s <= '7') {
    681                 c = (c<<3) + *s++ - '0';
    682                 if (s < end && '0' <= *s && *s <= '7')
    683                     c = (c<<3) + *s++ - '0';
    684             }
    685             *p++ = c;
    686             break;
    687         case 'x':
    688             if (s+1 < end &&
    689                 isxdigit(Py_CHARMASK(s[0])) &&
    690                 isxdigit(Py_CHARMASK(s[1])))
    691             {
    692                 unsigned int x = 0;
    693                 c = Py_CHARMASK(*s);
    694                 s++;
    695                 if (isdigit(c))
    696                     x = c - '0';
    697                 else if (islower(c))
    698                     x = 10 + c - 'a';
    699                 else
    700                     x = 10 + c - 'A';
    701                 x = x << 4;
    702                 c = Py_CHARMASK(*s);
    703                 s++;
    704                 if (isdigit(c))
    705                     x += c - '0';
    706                 else if (islower(c))
    707                     x += 10 + c - 'a';
    708                 else
    709                     x += 10 + c - 'A';
    710                 *p++ = x;
    711                 break;
    712             }
    713             if (!errors || strcmp(errors, "strict") == 0) {
    714                 PyErr_SetString(PyExc_ValueError,
    715                                 "invalid \\x escape");
    716                 goto failed;
    717             }
    718             if (strcmp(errors, "replace") == 0) {
    719                 *p++ = '?';
    720             } else if (strcmp(errors, "ignore") == 0)
    721                 /* do nothing */;
    722             else {
    723                 PyErr_Format(PyExc_ValueError,
    724                              "decoding error; "
    725                              "unknown error handling code: %.400s",
    726                              errors);
    727                 goto failed;
    728             }
    729 #ifndef Py_USING_UNICODE
    730         case 'u':
    731         case 'U':
    732         case 'N':
    733             if (unicode) {
    734                 PyErr_SetString(PyExc_ValueError,
    735                           "Unicode escapes not legal "
    736                           "when Unicode disabled");
    737                 goto failed;
    738             }
    739 #endif
    740         default:
    741             *p++ = '\\';
    742             s--;
    743             goto non_esc; /* an arbitrary number of unescaped
    744                              UTF-8 bytes may follow. */
    745         }
    746     }
    747     if (p-buf < newlen && _PyString_Resize(&v, p - buf))
    748         goto failed;
    749     return v;
    750   failed:
    751     Py_DECREF(v);
    752     return NULL;
    753 }
    754 
    755 /* -------------------------------------------------------------------- */
    756 /* object api */
    757 
    758 static Py_ssize_t
    759 string_getsize(register PyObject *op)
    760 {
    761     char *s;
    762     Py_ssize_t len;
    763     if (PyString_AsStringAndSize(op, &s, &len))
    764         return -1;
    765     return len;
    766 }
    767 
    768 static /*const*/ char *
    769 string_getbuffer(register PyObject *op)
    770 {
    771     char *s;
    772     Py_ssize_t len;
    773     if (PyString_AsStringAndSize(op, &s, &len))
    774         return NULL;
    775     return s;
    776 }
    777 
    778 Py_ssize_t
    779 PyString_Size(register PyObject *op)
    780 {
    781     if (!PyString_Check(op))
    782         return string_getsize(op);
    783     return Py_SIZE(op);
    784 }
    785 
    786 /*const*/ char *
    787 PyString_AsString(register PyObject *op)
    788 {
    789     if (!PyString_Check(op))
    790         return string_getbuffer(op);
    791     return ((PyStringObject *)op) -> ob_sval;
    792 }
    793 
    794 int
    795 PyString_AsStringAndSize(register PyObject *obj,
    796                          register char **s,
    797                          register Py_ssize_t *len)
    798 {
    799     if (s == NULL) {
    800         PyErr_BadInternalCall();
    801         return -1;
    802     }
    803 
    804     if (!PyString_Check(obj)) {
    805 #ifdef Py_USING_UNICODE
    806         if (PyUnicode_Check(obj)) {
    807             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
    808             if (obj == NULL)
    809                 return -1;
    810         }
    811         else
    812 #endif
    813         {
    814             PyErr_Format(PyExc_TypeError,
    815                          "expected string or Unicode object, "
    816                          "%.200s found", Py_TYPE(obj)->tp_name);
    817             return -1;
    818         }
    819     }
    820 
    821     *s = PyString_AS_STRING(obj);
    822     if (len != NULL)
    823         *len = PyString_GET_SIZE(obj);
    824     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
    825         PyErr_SetString(PyExc_TypeError,
    826                         "expected string without null bytes");
    827         return -1;
    828     }
    829     return 0;
    830 }
    831 
    832 /* -------------------------------------------------------------------- */
    833 /* Methods */
    834 
    835 #include "stringlib/stringdefs.h"
    836 #include "stringlib/fastsearch.h"
    837 
    838 #include "stringlib/count.h"
    839 #include "stringlib/find.h"
    840 #include "stringlib/partition.h"
    841 #include "stringlib/split.h"
    842 
    843 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
    844 #include "stringlib/localeutil.h"
    845 
    846 
    847 
    848 static int
    849 string_print(PyStringObject *op, FILE *fp, int flags)
    850 {
    851     Py_ssize_t i, str_len;
    852     char c;
    853     int quote;
    854 
    855     /* XXX Ought to check for interrupts when writing long strings */
    856     if (! PyString_CheckExact(op)) {
    857         int ret;
    858         /* A str subclass may have its own __str__ method. */
    859         op = (PyStringObject *) PyObject_Str((PyObject *)op);
    860         if (op == NULL)
    861             return -1;
    862         ret = string_print(op, fp, flags);
    863         Py_DECREF(op);
    864         return ret;
    865     }
    866     if (flags & Py_PRINT_RAW) {
    867         char *data = op->ob_sval;
    868         Py_ssize_t size = Py_SIZE(op);
    869         Py_BEGIN_ALLOW_THREADS
    870         while (size > INT_MAX) {
    871             /* Very long strings cannot be written atomically.
    872              * But don't write exactly INT_MAX bytes at a time
    873              * to avoid memory aligment issues.
    874              */
    875             const int chunk_size = INT_MAX & ~0x3FFF;
    876             fwrite(data, 1, chunk_size, fp);
    877             data += chunk_size;
    878             size -= chunk_size;
    879         }
    880 #ifdef __VMS
    881         if (size) fwrite(data, (int)size, 1, fp);
    882 #else
    883         fwrite(data, 1, (int)size, fp);
    884 #endif
    885         Py_END_ALLOW_THREADS
    886         return 0;
    887     }
    888 
    889     /* figure out which quote to use; single is preferred */
    890     quote = '\'';
    891     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
    892         !memchr(op->ob_sval, '"', Py_SIZE(op)))
    893         quote = '"';
    894 
    895     str_len = Py_SIZE(op);
    896     Py_BEGIN_ALLOW_THREADS
    897     fputc(quote, fp);
    898     for (i = 0; i < str_len; i++) {
    899         /* Since strings are immutable and the caller should have a
    900         reference, accessing the interal buffer should not be an issue
    901         with the GIL released. */
    902         c = op->ob_sval[i];
    903         if (c == quote || c == '\\')
    904             fprintf(fp, "\\%c", c);
    905         else if (c == '\t')
    906             fprintf(fp, "\\t");
    907         else if (c == '\n')
    908             fprintf(fp, "\\n");
    909         else if (c == '\r')
    910             fprintf(fp, "\\r");
    911         else if (c < ' ' || c >= 0x7f)
    912             fprintf(fp, "\\x%02x", c & 0xff);
    913         else
    914             fputc(c, fp);
    915     }
    916     fputc(quote, fp);
    917     Py_END_ALLOW_THREADS
    918     return 0;
    919 }
    920 
    921 PyObject *
    922 PyString_Repr(PyObject *obj, int smartquotes)
    923 {
    924     register PyStringObject* op = (PyStringObject*) obj;
    925     size_t newsize = 2 + 4 * Py_SIZE(op);
    926     PyObject *v;
    927     if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
    928         PyErr_SetString(PyExc_OverflowError,
    929             "string is too large to make repr");
    930         return NULL;
    931     }
    932     v = PyString_FromStringAndSize((char *)NULL, newsize);
    933     if (v == NULL) {
    934         return NULL;
    935     }
    936     else {
    937         register Py_ssize_t i;
    938         register char c;
    939         register char *p;
    940         int quote;
    941 
    942         /* figure out which quote to use; single is preferred */
    943         quote = '\'';
    944         if (smartquotes &&
    945             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
    946             !memchr(op->ob_sval, '"', Py_SIZE(op)))
    947             quote = '"';
    948 
    949         p = PyString_AS_STRING(v);
    950         *p++ = quote;
    951         for (i = 0; i < Py_SIZE(op); i++) {
    952             /* There's at least enough room for a hex escape
    953                and a closing quote. */
    954             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
    955             c = op->ob_sval[i];
    956             if (c == quote || c == '\\')
    957                 *p++ = '\\', *p++ = c;
    958             else if (c == '\t')
    959                 *p++ = '\\', *p++ = 't';
    960             else if (c == '\n')
    961                 *p++ = '\\', *p++ = 'n';
    962             else if (c == '\r')
    963                 *p++ = '\\', *p++ = 'r';
    964             else if (c < ' ' || c >= 0x7f) {
    965                 /* For performance, we don't want to call
    966                    PyOS_snprintf here (extra layers of
    967                    function call). */
    968                 sprintf(p, "\\x%02x", c & 0xff);
    969                 p += 4;
    970             }
    971             else
    972                 *p++ = c;
    973         }
    974         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
    975         *p++ = quote;
    976         *p = '\0';
    977         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
    978             return NULL;
    979         return v;
    980     }
    981 }
    982 
    983 static PyObject *
    984 string_repr(PyObject *op)
    985 {
    986     return PyString_Repr(op, 1);
    987 }
    988 
    989 static PyObject *
    990 string_str(PyObject *s)
    991 {
    992     assert(PyString_Check(s));
    993     if (PyString_CheckExact(s)) {
    994         Py_INCREF(s);
    995         return s;
    996     }
    997     else {
    998         /* Subtype -- return genuine string with the same value. */
    999         PyStringObject *t = (PyStringObject *) s;
   1000         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
   1001     }
   1002 }
   1003 
   1004 static Py_ssize_t
   1005 string_length(PyStringObject *a)
   1006 {
   1007     return Py_SIZE(a);
   1008 }
   1009 
   1010 static PyObject *
   1011 string_concat(register PyStringObject *a, register PyObject *bb)
   1012 {
   1013     register Py_ssize_t size;
   1014     register PyStringObject *op;
   1015     if (!PyString_Check(bb)) {
   1016 #ifdef Py_USING_UNICODE
   1017         if (PyUnicode_Check(bb))
   1018             return PyUnicode_Concat((PyObject *)a, bb);
   1019 #endif
   1020         if (PyByteArray_Check(bb))
   1021             return PyByteArray_Concat((PyObject *)a, bb);
   1022         PyErr_Format(PyExc_TypeError,
   1023                      "cannot concatenate 'str' and '%.200s' objects",
   1024                      Py_TYPE(bb)->tp_name);
   1025         return NULL;
   1026     }
   1027 #define b ((PyStringObject *)bb)
   1028     /* Optimize cases with empty left or right operand */
   1029     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
   1030         PyString_CheckExact(a) && PyString_CheckExact(b)) {
   1031         if (Py_SIZE(a) == 0) {
   1032             Py_INCREF(bb);
   1033             return bb;
   1034         }
   1035         Py_INCREF(a);
   1036         return (PyObject *)a;
   1037     }
   1038     size = Py_SIZE(a) + Py_SIZE(b);
   1039     /* Check that string sizes are not negative, to prevent an
   1040        overflow in cases where we are passed incorrectly-created
   1041        strings with negative lengths (due to a bug in other code).
   1042     */
   1043     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
   1044         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
   1045         PyErr_SetString(PyExc_OverflowError,
   1046                         "strings are too large to concat");
   1047         return NULL;
   1048     }
   1049 
   1050     /* Inline PyObject_NewVar */
   1051     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
   1052         PyErr_SetString(PyExc_OverflowError,
   1053                         "strings are too large to concat");
   1054         return NULL;
   1055     }
   1056     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
   1057     if (op == NULL)
   1058         return PyErr_NoMemory();
   1059     PyObject_INIT_VAR(op, &PyString_Type, size);
   1060     op->ob_shash = -1;
   1061     op->ob_sstate = SSTATE_NOT_INTERNED;
   1062     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
   1063     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
   1064     op->ob_sval[size] = '\0';
   1065     return (PyObject *) op;
   1066 #undef b
   1067 }
   1068 
   1069 static PyObject *
   1070 string_repeat(register PyStringObject *a, register Py_ssize_t n)
   1071 {
   1072     register Py_ssize_t i;
   1073     register Py_ssize_t j;
   1074     register Py_ssize_t size;
   1075     register PyStringObject *op;
   1076     size_t nbytes;
   1077     if (n < 0)
   1078         n = 0;
   1079     /* watch out for overflows:  the size can overflow int,
   1080      * and the # of bytes needed can overflow size_t
   1081      */
   1082     size = Py_SIZE(a) * n;
   1083     if (n && size / n != Py_SIZE(a)) {
   1084         PyErr_SetString(PyExc_OverflowError,
   1085             "repeated string is too long");
   1086         return NULL;
   1087     }
   1088     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
   1089         Py_INCREF(a);
   1090         return (PyObject *)a;
   1091     }
   1092     nbytes = (size_t)size;
   1093     if (nbytes + PyStringObject_SIZE <= nbytes) {
   1094         PyErr_SetString(PyExc_OverflowError,
   1095             "repeated string is too long");
   1096         return NULL;
   1097     }
   1098     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
   1099     if (op == NULL)
   1100         return PyErr_NoMemory();
   1101     PyObject_INIT_VAR(op, &PyString_Type, size);
   1102     op->ob_shash = -1;
   1103     op->ob_sstate = SSTATE_NOT_INTERNED;
   1104     op->ob_sval[size] = '\0';
   1105     if (Py_SIZE(a) == 1 && n > 0) {
   1106         memset(op->ob_sval, a->ob_sval[0] , n);
   1107         return (PyObject *) op;
   1108     }
   1109     i = 0;
   1110     if (i < size) {
   1111         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
   1112         i = Py_SIZE(a);
   1113     }
   1114     while (i < size) {
   1115         j = (i <= size-i)  ?  i  :  size-i;
   1116         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
   1117         i += j;
   1118     }
   1119     return (PyObject *) op;
   1120 }
   1121 
   1122 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
   1123 
   1124 static PyObject *
   1125 string_slice(register PyStringObject *a, register Py_ssize_t i,
   1126              register Py_ssize_t j)
   1127      /* j -- may be negative! */
   1128 {
   1129     if (i < 0)
   1130         i = 0;
   1131     if (j < 0)
   1132         j = 0; /* Avoid signed/unsigned bug in next line */
   1133     if (j > Py_SIZE(a))
   1134         j = Py_SIZE(a);
   1135     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
   1136         /* It's the same as a */
   1137         Py_INCREF(a);
   1138         return (PyObject *)a;
   1139     }
   1140     if (j < i)
   1141         j = i;
   1142     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
   1143 }
   1144 
   1145 static int
   1146 string_contains(PyObject *str_obj, PyObject *sub_obj)
   1147 {
   1148     if (!PyString_CheckExact(sub_obj)) {
   1149 #ifdef Py_USING_UNICODE
   1150         if (PyUnicode_Check(sub_obj))
   1151             return PyUnicode_Contains(str_obj, sub_obj);
   1152 #endif
   1153         if (!PyString_Check(sub_obj)) {
   1154             PyErr_Format(PyExc_TypeError,
   1155                 "'in <string>' requires string as left operand, "
   1156                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
   1157             return -1;
   1158         }
   1159     }
   1160 
   1161     return stringlib_contains_obj(str_obj, sub_obj);
   1162 }
   1163 
   1164 static PyObject *
   1165 string_item(PyStringObject *a, register Py_ssize_t i)
   1166 {
   1167     char pchar;
   1168     PyObject *v;
   1169     if (i < 0 || i >= Py_SIZE(a)) {
   1170         PyErr_SetString(PyExc_IndexError, "string index out of range");
   1171         return NULL;
   1172     }
   1173     pchar = a->ob_sval[i];
   1174     v = (PyObject *)characters[pchar & UCHAR_MAX];
   1175     if (v == NULL)
   1176         v = PyString_FromStringAndSize(&pchar, 1);
   1177     else {
   1178 #ifdef COUNT_ALLOCS
   1179         one_strings++;
   1180 #endif
   1181         Py_INCREF(v);
   1182     }
   1183     return v;
   1184 }
   1185 
   1186 static PyObject*
   1187 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
   1188 {
   1189     int c;
   1190     Py_ssize_t len_a, len_b;
   1191     Py_ssize_t min_len;
   1192     PyObject *result;
   1193 
   1194     /* Make sure both arguments are strings. */
   1195     if (!(PyString_Check(a) && PyString_Check(b))) {
   1196         result = Py_NotImplemented;
   1197         goto out;
   1198     }
   1199     if (a == b) {
   1200         switch (op) {
   1201         case Py_EQ:case Py_LE:case Py_GE:
   1202             result = Py_True;
   1203             goto out;
   1204         case Py_NE:case Py_LT:case Py_GT:
   1205             result = Py_False;
   1206             goto out;
   1207         }
   1208     }
   1209     if (op == Py_EQ) {
   1210         /* Supporting Py_NE here as well does not save
   1211            much time, since Py_NE is rarely used.  */
   1212         if (Py_SIZE(a) == Py_SIZE(b)
   1213             && (a->ob_sval[0] == b->ob_sval[0]
   1214             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
   1215             result = Py_True;
   1216         } else {
   1217             result = Py_False;
   1218         }
   1219         goto out;
   1220     }
   1221     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
   1222     min_len = (len_a < len_b) ? len_a : len_b;
   1223     if (min_len > 0) {
   1224         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
   1225         if (c==0)
   1226             c = memcmp(a->ob_sval, b->ob_sval, min_len);
   1227     } else
   1228         c = 0;
   1229     if (c == 0)
   1230         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
   1231     switch (op) {
   1232     case Py_LT: c = c <  0; break;
   1233     case Py_LE: c = c <= 0; break;
   1234     case Py_EQ: assert(0);  break; /* unreachable */
   1235     case Py_NE: c = c != 0; break;
   1236     case Py_GT: c = c >  0; break;
   1237     case Py_GE: c = c >= 0; break;
   1238     default:
   1239         result = Py_NotImplemented;
   1240         goto out;
   1241     }
   1242     result = c ? Py_True : Py_False;
   1243   out:
   1244     Py_INCREF(result);
   1245     return result;
   1246 }
   1247 
   1248 int
   1249 _PyString_Eq(PyObject *o1, PyObject *o2)
   1250 {
   1251     PyStringObject *a = (PyStringObject*) o1;
   1252     PyStringObject *b = (PyStringObject*) o2;
   1253     return Py_SIZE(a) == Py_SIZE(b)
   1254       && *a->ob_sval == *b->ob_sval
   1255       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
   1256 }
   1257 
   1258 static long
   1259 string_hash(PyStringObject *a)
   1260 {
   1261     register Py_ssize_t len;
   1262     register unsigned char *p;
   1263     register long x;
   1264 
   1265     if (a->ob_shash != -1)
   1266         return a->ob_shash;
   1267     len = Py_SIZE(a);
   1268     p = (unsigned char *) a->ob_sval;
   1269     x = *p << 7;
   1270     while (--len >= 0)
   1271         x = (1000003*x) ^ *p++;
   1272     x ^= Py_SIZE(a);
   1273     if (x == -1)
   1274         x = -2;
   1275     a->ob_shash = x;
   1276     return x;
   1277 }
   1278 
   1279 static PyObject*
   1280 string_subscript(PyStringObject* self, PyObject* item)
   1281 {
   1282     if (PyIndex_Check(item)) {
   1283         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
   1284         if (i == -1 && PyErr_Occurred())
   1285             return NULL;
   1286         if (i < 0)
   1287             i += PyString_GET_SIZE(self);
   1288         return string_item(self, i);
   1289     }
   1290     else if (PySlice_Check(item)) {
   1291         Py_ssize_t start, stop, step, slicelength, cur, i;
   1292         char* source_buf;
   1293         char* result_buf;
   1294         PyObject* result;
   1295 
   1296         if (PySlice_GetIndicesEx((PySliceObject*)item,
   1297                          PyString_GET_SIZE(self),
   1298                          &start, &stop, &step, &slicelength) < 0) {
   1299             return NULL;
   1300         }
   1301 
   1302         if (slicelength <= 0) {
   1303             return PyString_FromStringAndSize("", 0);
   1304         }
   1305         else if (start == 0 && step == 1 &&
   1306                  slicelength == PyString_GET_SIZE(self) &&
   1307                  PyString_CheckExact(self)) {
   1308             Py_INCREF(self);
   1309             return (PyObject *)self;
   1310         }
   1311         else if (step == 1) {
   1312             return PyString_FromStringAndSize(
   1313                 PyString_AS_STRING(self) + start,
   1314                 slicelength);
   1315         }
   1316         else {
   1317             source_buf = PyString_AsString((PyObject*)self);
   1318             result_buf = (char *)PyMem_Malloc(slicelength);
   1319             if (result_buf == NULL)
   1320                 return PyErr_NoMemory();
   1321 
   1322             for (cur = start, i = 0; i < slicelength;
   1323                  cur += step, i++) {
   1324                 result_buf[i] = source_buf[cur];
   1325             }
   1326 
   1327             result = PyString_FromStringAndSize(result_buf,
   1328                                                 slicelength);
   1329             PyMem_Free(result_buf);
   1330             return result;
   1331         }
   1332     }
   1333     else {
   1334         PyErr_Format(PyExc_TypeError,
   1335                      "string indices must be integers, not %.200s",
   1336                      Py_TYPE(item)->tp_name);
   1337         return NULL;
   1338     }
   1339 }
   1340 
   1341 static Py_ssize_t
   1342 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
   1343 {
   1344     if ( index != 0 ) {
   1345         PyErr_SetString(PyExc_SystemError,
   1346                         "accessing non-existent string segment");
   1347         return -1;
   1348     }
   1349     *ptr = (void *)self->ob_sval;
   1350     return Py_SIZE(self);
   1351 }
   1352 
   1353 static Py_ssize_t
   1354 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
   1355 {
   1356     PyErr_SetString(PyExc_TypeError,
   1357                     "Cannot use string as modifiable buffer");
   1358     return -1;
   1359 }
   1360 
   1361 static Py_ssize_t
   1362 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
   1363 {
   1364     if ( lenp )
   1365         *lenp = Py_SIZE(self);
   1366     return 1;
   1367 }
   1368 
   1369 static Py_ssize_t
   1370 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
   1371 {
   1372     if ( index != 0 ) {
   1373         PyErr_SetString(PyExc_SystemError,
   1374                         "accessing non-existent string segment");
   1375         return -1;
   1376     }
   1377     *ptr = self->ob_sval;
   1378     return Py_SIZE(self);
   1379 }
   1380 
   1381 static int
   1382 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
   1383 {
   1384     return PyBuffer_FillInfo(view, (PyObject*)self,
   1385                              (void *)self->ob_sval, Py_SIZE(self),
   1386                              1, flags);
   1387 }
   1388 
   1389 static PySequenceMethods string_as_sequence = {
   1390     (lenfunc)string_length, /*sq_length*/
   1391     (binaryfunc)string_concat, /*sq_concat*/
   1392     (ssizeargfunc)string_repeat, /*sq_repeat*/
   1393     (ssizeargfunc)string_item, /*sq_item*/
   1394     (ssizessizeargfunc)string_slice, /*sq_slice*/
   1395     0,                  /*sq_ass_item*/
   1396     0,                  /*sq_ass_slice*/
   1397     (objobjproc)string_contains /*sq_contains*/
   1398 };
   1399 
   1400 static PyMappingMethods string_as_mapping = {
   1401     (lenfunc)string_length,
   1402     (binaryfunc)string_subscript,
   1403     0,
   1404 };
   1405 
   1406 static PyBufferProcs string_as_buffer = {
   1407     (readbufferproc)string_buffer_getreadbuf,
   1408     (writebufferproc)string_buffer_getwritebuf,
   1409     (segcountproc)string_buffer_getsegcount,
   1410     (charbufferproc)string_buffer_getcharbuf,
   1411     (getbufferproc)string_buffer_getbuffer,
   1412     0, /* XXX */
   1413 };
   1414 
   1415 
   1416 
   1417 #define LEFTSTRIP 0
   1418 #define RIGHTSTRIP 1
   1419 #define BOTHSTRIP 2
   1420 
   1421 /* Arrays indexed by above */
   1422 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
   1423 
   1424 #define STRIPNAME(i) (stripformat[i]+3)
   1425 
   1426 PyDoc_STRVAR(split__doc__,
   1427 "S.split([sep [,maxsplit]]) -> list of strings\n\
   1428 \n\
   1429 Return a list of the words in the string S, using sep as the\n\
   1430 delimiter string.  If maxsplit is given, at most maxsplit\n\
   1431 splits are done. If sep is not specified or is None, any\n\
   1432 whitespace string is a separator and empty strings are removed\n\
   1433 from the result.");
   1434 
   1435 static PyObject *
   1436 string_split(PyStringObject *self, PyObject *args)
   1437 {
   1438     Py_ssize_t len = PyString_GET_SIZE(self), n;
   1439     Py_ssize_t maxsplit = -1;
   1440     const char *s = PyString_AS_STRING(self), *sub;
   1441     PyObject *subobj = Py_None;
   1442 
   1443     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
   1444         return NULL;
   1445     if (maxsplit < 0)
   1446         maxsplit = PY_SSIZE_T_MAX;
   1447     if (subobj == Py_None)
   1448         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
   1449     if (PyString_Check(subobj)) {
   1450         sub = PyString_AS_STRING(subobj);
   1451         n = PyString_GET_SIZE(subobj);
   1452     }
   1453 #ifdef Py_USING_UNICODE
   1454     else if (PyUnicode_Check(subobj))
   1455         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
   1456 #endif
   1457     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
   1458         return NULL;
   1459 
   1460     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
   1461 }
   1462 
   1463 PyDoc_STRVAR(partition__doc__,
   1464 "S.partition(sep) -> (head, sep, tail)\n\
   1465 \n\
   1466 Search for the separator sep in S, and return the part before it,\n\
   1467 the separator itself, and the part after it.  If the separator is not\n\
   1468 found, return S and two empty strings.");
   1469 
   1470 static PyObject *
   1471 string_partition(PyStringObject *self, PyObject *sep_obj)
   1472 {
   1473     const char *sep;
   1474     Py_ssize_t sep_len;
   1475 
   1476     if (PyString_Check(sep_obj)) {
   1477         sep = PyString_AS_STRING(sep_obj);
   1478         sep_len = PyString_GET_SIZE(sep_obj);
   1479     }
   1480 #ifdef Py_USING_UNICODE
   1481     else if (PyUnicode_Check(sep_obj))
   1482         return PyUnicode_Partition((PyObject *) self, sep_obj);
   1483 #endif
   1484     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
   1485         return NULL;
   1486 
   1487     return stringlib_partition(
   1488         (PyObject*) self,
   1489         PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1490         sep_obj, sep, sep_len
   1491         );
   1492 }
   1493 
   1494 PyDoc_STRVAR(rpartition__doc__,
   1495 "S.rpartition(sep) -> (head, sep, tail)\n\
   1496 \n\
   1497 Search for the separator sep in S, starting at the end of S, and return\n\
   1498 the part before it, the separator itself, and the part after it.  If the\n\
   1499 separator is not found, return two empty strings and S.");
   1500 
   1501 static PyObject *
   1502 string_rpartition(PyStringObject *self, PyObject *sep_obj)
   1503 {
   1504     const char *sep;
   1505     Py_ssize_t sep_len;
   1506 
   1507     if (PyString_Check(sep_obj)) {
   1508         sep = PyString_AS_STRING(sep_obj);
   1509         sep_len = PyString_GET_SIZE(sep_obj);
   1510     }
   1511 #ifdef Py_USING_UNICODE
   1512     else if (PyUnicode_Check(sep_obj))
   1513         return PyUnicode_RPartition((PyObject *) self, sep_obj);
   1514 #endif
   1515     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
   1516         return NULL;
   1517 
   1518     return stringlib_rpartition(
   1519         (PyObject*) self,
   1520         PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1521         sep_obj, sep, sep_len
   1522         );
   1523 }
   1524 
   1525 PyDoc_STRVAR(rsplit__doc__,
   1526 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
   1527 \n\
   1528 Return a list of the words in the string S, using sep as the\n\
   1529 delimiter string, starting at the end of the string and working\n\
   1530 to the front.  If maxsplit is given, at most maxsplit splits are\n\
   1531 done. If sep is not specified or is None, any whitespace string\n\
   1532 is a separator.");
   1533 
   1534 static PyObject *
   1535 string_rsplit(PyStringObject *self, PyObject *args)
   1536 {
   1537     Py_ssize_t len = PyString_GET_SIZE(self), n;
   1538     Py_ssize_t maxsplit = -1;
   1539     const char *s = PyString_AS_STRING(self), *sub;
   1540     PyObject *subobj = Py_None;
   1541 
   1542     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
   1543         return NULL;
   1544     if (maxsplit < 0)
   1545         maxsplit = PY_SSIZE_T_MAX;
   1546     if (subobj == Py_None)
   1547         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
   1548     if (PyString_Check(subobj)) {
   1549         sub = PyString_AS_STRING(subobj);
   1550         n = PyString_GET_SIZE(subobj);
   1551     }
   1552 #ifdef Py_USING_UNICODE
   1553     else if (PyUnicode_Check(subobj))
   1554         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
   1555 #endif
   1556     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
   1557         return NULL;
   1558 
   1559     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
   1560 }
   1561 
   1562 
   1563 PyDoc_STRVAR(join__doc__,
   1564 "S.join(iterable) -> string\n\
   1565 \n\
   1566 Return a string which is the concatenation of the strings in the\n\
   1567 iterable.  The separator between elements is S.");
   1568 
   1569 static PyObject *
   1570 string_join(PyStringObject *self, PyObject *orig)
   1571 {
   1572     char *sep = PyString_AS_STRING(self);
   1573     const Py_ssize_t seplen = PyString_GET_SIZE(self);
   1574     PyObject *res = NULL;
   1575     char *p;
   1576     Py_ssize_t seqlen = 0;
   1577     size_t sz = 0;
   1578     Py_ssize_t i;
   1579     PyObject *seq, *item;
   1580 
   1581     seq = PySequence_Fast(orig, "");
   1582     if (seq == NULL) {
   1583         return NULL;
   1584     }
   1585 
   1586     seqlen = PySequence_Size(seq);
   1587     if (seqlen == 0) {
   1588         Py_DECREF(seq);
   1589         return PyString_FromString("");
   1590     }
   1591     if (seqlen == 1) {
   1592         item = PySequence_Fast_GET_ITEM(seq, 0);
   1593         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
   1594             Py_INCREF(item);
   1595             Py_DECREF(seq);
   1596             return item;
   1597         }
   1598     }
   1599 
   1600     /* There are at least two things to join, or else we have a subclass
   1601      * of the builtin types in the sequence.
   1602      * Do a pre-pass to figure out the total amount of space we'll
   1603      * need (sz), see whether any argument is absurd, and defer to
   1604      * the Unicode join if appropriate.
   1605      */
   1606     for (i = 0; i < seqlen; i++) {
   1607         const size_t old_sz = sz;
   1608         item = PySequence_Fast_GET_ITEM(seq, i);
   1609         if (!PyString_Check(item)){
   1610 #ifdef Py_USING_UNICODE
   1611             if (PyUnicode_Check(item)) {
   1612                 /* Defer to Unicode join.
   1613                  * CAUTION:  There's no gurantee that the
   1614                  * original sequence can be iterated over
   1615                  * again, so we must pass seq here.
   1616                  */
   1617                 PyObject *result;
   1618                 result = PyUnicode_Join((PyObject *)self, seq);
   1619                 Py_DECREF(seq);
   1620                 return result;
   1621             }
   1622 #endif
   1623             PyErr_Format(PyExc_TypeError,
   1624                          "sequence item %zd: expected string,"
   1625                          " %.80s found",
   1626                          i, Py_TYPE(item)->tp_name);
   1627             Py_DECREF(seq);
   1628             return NULL;
   1629         }
   1630         sz += PyString_GET_SIZE(item);
   1631         if (i != 0)
   1632             sz += seplen;
   1633         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
   1634             PyErr_SetString(PyExc_OverflowError,
   1635                 "join() result is too long for a Python string");
   1636             Py_DECREF(seq);
   1637             return NULL;
   1638         }
   1639     }
   1640 
   1641     /* Allocate result space. */
   1642     res = PyString_FromStringAndSize((char*)NULL, sz);
   1643     if (res == NULL) {
   1644         Py_DECREF(seq);
   1645         return NULL;
   1646     }
   1647 
   1648     /* Catenate everything. */
   1649     p = PyString_AS_STRING(res);
   1650     for (i = 0; i < seqlen; ++i) {
   1651         size_t n;
   1652         item = PySequence_Fast_GET_ITEM(seq, i);
   1653         n = PyString_GET_SIZE(item);
   1654         Py_MEMCPY(p, PyString_AS_STRING(item), n);
   1655         p += n;
   1656         if (i < seqlen - 1) {
   1657             Py_MEMCPY(p, sep, seplen);
   1658             p += seplen;
   1659         }
   1660     }
   1661 
   1662     Py_DECREF(seq);
   1663     return res;
   1664 }
   1665 
   1666 PyObject *
   1667 _PyString_Join(PyObject *sep, PyObject *x)
   1668 {
   1669     assert(sep != NULL && PyString_Check(sep));
   1670     assert(x != NULL);
   1671     return string_join((PyStringObject *)sep, x);
   1672 }
   1673 
   1674 /* helper macro to fixup start/end slice values */
   1675 #define ADJUST_INDICES(start, end, len)         \
   1676     if (end > len)                          \
   1677         end = len;                          \
   1678     else if (end < 0) {                     \
   1679         end += len;                         \
   1680         if (end < 0)                        \
   1681         end = 0;                        \
   1682     }                                       \
   1683     if (start < 0) {                        \
   1684         start += len;                       \
   1685         if (start < 0)                      \
   1686         start = 0;                      \
   1687     }
   1688 
   1689 Py_LOCAL_INLINE(Py_ssize_t)
   1690 string_find_internal(PyStringObject *self, PyObject *args, int dir)
   1691 {
   1692     PyObject *subobj;
   1693     const char *sub;
   1694     Py_ssize_t sub_len;
   1695     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
   1696 
   1697     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
   1698                                     args, &subobj, &start, &end))
   1699         return -2;
   1700 
   1701     if (PyString_Check(subobj)) {
   1702         sub = PyString_AS_STRING(subobj);
   1703         sub_len = PyString_GET_SIZE(subobj);
   1704     }
   1705 #ifdef Py_USING_UNICODE
   1706     else if (PyUnicode_Check(subobj))
   1707         return PyUnicode_Find(
   1708             (PyObject *)self, subobj, start, end, dir);
   1709 #endif
   1710     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
   1711         /* XXX - the "expected a character buffer object" is pretty
   1712            confusing for a non-expert.  remap to something else ? */
   1713         return -2;
   1714 
   1715     if (dir > 0)
   1716         return stringlib_find_slice(
   1717             PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1718             sub, sub_len, start, end);
   1719     else
   1720         return stringlib_rfind_slice(
   1721             PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1722             sub, sub_len, start, end);
   1723 }
   1724 
   1725 
   1726 PyDoc_STRVAR(find__doc__,
   1727 "S.find(sub [,start [,end]]) -> int\n\
   1728 \n\
   1729 Return the lowest index in S where substring sub is found,\n\
   1730 such that sub is contained within s[start:end].  Optional\n\
   1731 arguments start and end are interpreted as in slice notation.\n\
   1732 \n\
   1733 Return -1 on failure.");
   1734 
   1735 static PyObject *
   1736 string_find(PyStringObject *self, PyObject *args)
   1737 {
   1738     Py_ssize_t result = string_find_internal(self, args, +1);
   1739     if (result == -2)
   1740         return NULL;
   1741     return PyInt_FromSsize_t(result);
   1742 }
   1743 
   1744 
   1745 PyDoc_STRVAR(index__doc__,
   1746 "S.index(sub [,start [,end]]) -> int\n\
   1747 \n\
   1748 Like S.find() but raise ValueError when the substring is not found.");
   1749 
   1750 static PyObject *
   1751 string_index(PyStringObject *self, PyObject *args)
   1752 {
   1753     Py_ssize_t result = string_find_internal(self, args, +1);
   1754     if (result == -2)
   1755         return NULL;
   1756     if (result == -1) {
   1757         PyErr_SetString(PyExc_ValueError,
   1758                         "substring not found");
   1759         return NULL;
   1760     }
   1761     return PyInt_FromSsize_t(result);
   1762 }
   1763 
   1764 
   1765 PyDoc_STRVAR(rfind__doc__,
   1766 "S.rfind(sub [,start [,end]]) -> int\n\
   1767 \n\
   1768 Return the highest index in S where substring sub is found,\n\
   1769 such that sub is contained within s[start:end].  Optional\n\
   1770 arguments start and end are interpreted as in slice notation.\n\
   1771 \n\
   1772 Return -1 on failure.");
   1773 
   1774 static PyObject *
   1775 string_rfind(PyStringObject *self, PyObject *args)
   1776 {
   1777     Py_ssize_t result = string_find_internal(self, args, -1);
   1778     if (result == -2)
   1779         return NULL;
   1780     return PyInt_FromSsize_t(result);
   1781 }
   1782 
   1783 
   1784 PyDoc_STRVAR(rindex__doc__,
   1785 "S.rindex(sub [,start [,end]]) -> int\n\
   1786 \n\
   1787 Like S.rfind() but raise ValueError when the substring is not found.");
   1788 
   1789 static PyObject *
   1790 string_rindex(PyStringObject *self, PyObject *args)
   1791 {
   1792     Py_ssize_t result = string_find_internal(self, args, -1);
   1793     if (result == -2)
   1794         return NULL;
   1795     if (result == -1) {
   1796         PyErr_SetString(PyExc_ValueError,
   1797                         "substring not found");
   1798         return NULL;
   1799     }
   1800     return PyInt_FromSsize_t(result);
   1801 }
   1802 
   1803 
   1804 Py_LOCAL_INLINE(PyObject *)
   1805 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
   1806 {
   1807     char *s = PyString_AS_STRING(self);
   1808     Py_ssize_t len = PyString_GET_SIZE(self);
   1809     char *sep = PyString_AS_STRING(sepobj);
   1810     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
   1811     Py_ssize_t i, j;
   1812 
   1813     i = 0;
   1814     if (striptype != RIGHTSTRIP) {
   1815         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
   1816             i++;
   1817         }
   1818     }
   1819 
   1820     j = len;
   1821     if (striptype != LEFTSTRIP) {
   1822         do {
   1823             j--;
   1824         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
   1825         j++;
   1826     }
   1827 
   1828     if (i == 0 && j == len && PyString_CheckExact(self)) {
   1829         Py_INCREF(self);
   1830         return (PyObject*)self;
   1831     }
   1832     else
   1833         return PyString_FromStringAndSize(s+i, j-i);
   1834 }
   1835 
   1836 
   1837 Py_LOCAL_INLINE(PyObject *)
   1838 do_strip(PyStringObject *self, int striptype)
   1839 {
   1840     char *s = PyString_AS_STRING(self);
   1841     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
   1842 
   1843     i = 0;
   1844     if (striptype != RIGHTSTRIP) {
   1845         while (i < len && isspace(Py_CHARMASK(s[i]))) {
   1846             i++;
   1847         }
   1848     }
   1849 
   1850     j = len;
   1851     if (striptype != LEFTSTRIP) {
   1852         do {
   1853             j--;
   1854         } while (j >= i && isspace(Py_CHARMASK(s[j])));
   1855         j++;
   1856     }
   1857 
   1858     if (i == 0 && j == len && PyString_CheckExact(self)) {
   1859         Py_INCREF(self);
   1860         return (PyObject*)self;
   1861     }
   1862     else
   1863         return PyString_FromStringAndSize(s+i, j-i);
   1864 }
   1865 
   1866 
   1867 Py_LOCAL_INLINE(PyObject *)
   1868 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
   1869 {
   1870     PyObject *sep = NULL;
   1871 
   1872     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
   1873         return NULL;
   1874 
   1875     if (sep != NULL && sep != Py_None) {
   1876         if (PyString_Check(sep))
   1877             return do_xstrip(self, striptype, sep);
   1878 #ifdef Py_USING_UNICODE
   1879         else if (PyUnicode_Check(sep)) {
   1880             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
   1881             PyObject *res;
   1882             if (uniself==NULL)
   1883                 return NULL;
   1884             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
   1885                 striptype, sep);
   1886             Py_DECREF(uniself);
   1887             return res;
   1888         }
   1889 #endif
   1890         PyErr_Format(PyExc_TypeError,
   1891 #ifdef Py_USING_UNICODE
   1892                      "%s arg must be None, str or unicode",
   1893 #else
   1894                      "%s arg must be None or str",
   1895 #endif
   1896                      STRIPNAME(striptype));
   1897         return NULL;
   1898     }
   1899 
   1900     return do_strip(self, striptype);
   1901 }
   1902 
   1903 
   1904 PyDoc_STRVAR(strip__doc__,
   1905 "S.strip([chars]) -> string or unicode\n\
   1906 \n\
   1907 Return a copy of the string S with leading and trailing\n\
   1908 whitespace removed.\n\
   1909 If chars is given and not None, remove characters in chars instead.\n\
   1910 If chars is unicode, S will be converted to unicode before stripping");
   1911 
   1912 static PyObject *
   1913 string_strip(PyStringObject *self, PyObject *args)
   1914 {
   1915     if (PyTuple_GET_SIZE(args) == 0)
   1916         return do_strip(self, BOTHSTRIP); /* Common case */
   1917     else
   1918         return do_argstrip(self, BOTHSTRIP, args);
   1919 }
   1920 
   1921 
   1922 PyDoc_STRVAR(lstrip__doc__,
   1923 "S.lstrip([chars]) -> string or unicode\n\
   1924 \n\
   1925 Return a copy of the string S with leading whitespace removed.\n\
   1926 If chars is given and not None, remove characters in chars instead.\n\
   1927 If chars is unicode, S will be converted to unicode before stripping");
   1928 
   1929 static PyObject *
   1930 string_lstrip(PyStringObject *self, PyObject *args)
   1931 {
   1932     if (PyTuple_GET_SIZE(args) == 0)
   1933         return do_strip(self, LEFTSTRIP); /* Common case */
   1934     else
   1935         return do_argstrip(self, LEFTSTRIP, args);
   1936 }
   1937 
   1938 
   1939 PyDoc_STRVAR(rstrip__doc__,
   1940 "S.rstrip([chars]) -> string or unicode\n\
   1941 \n\
   1942 Return a copy of the string S with trailing whitespace removed.\n\
   1943 If chars is given and not None, remove characters in chars instead.\n\
   1944 If chars is unicode, S will be converted to unicode before stripping");
   1945 
   1946 static PyObject *
   1947 string_rstrip(PyStringObject *self, PyObject *args)
   1948 {
   1949     if (PyTuple_GET_SIZE(args) == 0)
   1950         return do_strip(self, RIGHTSTRIP); /* Common case */
   1951     else
   1952         return do_argstrip(self, RIGHTSTRIP, args);
   1953 }
   1954 
   1955 
   1956 PyDoc_STRVAR(lower__doc__,
   1957 "S.lower() -> string\n\
   1958 \n\
   1959 Return a copy of the string S converted to lowercase.");
   1960 
   1961 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
   1962 #ifndef _tolower
   1963 #define _tolower tolower
   1964 #endif
   1965 
   1966 static PyObject *
   1967 string_lower(PyStringObject *self)
   1968 {
   1969     char *s;
   1970     Py_ssize_t i, n = PyString_GET_SIZE(self);
   1971     PyObject *newobj;
   1972 
   1973     newobj = PyString_FromStringAndSize(NULL, n);
   1974     if (!newobj)
   1975         return NULL;
   1976 
   1977     s = PyString_AS_STRING(newobj);
   1978 
   1979     Py_MEMCPY(s, PyString_AS_STRING(self), n);
   1980 
   1981     for (i = 0; i < n; i++) {
   1982         int c = Py_CHARMASK(s[i]);
   1983         if (isupper(c))
   1984             s[i] = _tolower(c);
   1985     }
   1986 
   1987     return newobj;
   1988 }
   1989 
   1990 PyDoc_STRVAR(upper__doc__,
   1991 "S.upper() -> string\n\
   1992 \n\
   1993 Return a copy of the string S converted to uppercase.");
   1994 
   1995 #ifndef _toupper
   1996 #define _toupper toupper
   1997 #endif
   1998 
   1999 static PyObject *
   2000 string_upper(PyStringObject *self)
   2001 {
   2002     char *s;
   2003     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2004     PyObject *newobj;
   2005 
   2006     newobj = PyString_FromStringAndSize(NULL, n);
   2007     if (!newobj)
   2008         return NULL;
   2009 
   2010     s = PyString_AS_STRING(newobj);
   2011 
   2012     Py_MEMCPY(s, PyString_AS_STRING(self), n);
   2013 
   2014     for (i = 0; i < n; i++) {
   2015         int c = Py_CHARMASK(s[i]);
   2016         if (islower(c))
   2017             s[i] = _toupper(c);
   2018     }
   2019 
   2020     return newobj;
   2021 }
   2022 
   2023 PyDoc_STRVAR(title__doc__,
   2024 "S.title() -> string\n\
   2025 \n\
   2026 Return a titlecased version of S, i.e. words start with uppercase\n\
   2027 characters, all remaining cased characters have lowercase.");
   2028 
   2029 static PyObject*
   2030 string_title(PyStringObject *self)
   2031 {
   2032     char *s = PyString_AS_STRING(self), *s_new;
   2033     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2034     int previous_is_cased = 0;
   2035     PyObject *newobj;
   2036 
   2037     newobj = PyString_FromStringAndSize(NULL, n);
   2038     if (newobj == NULL)
   2039         return NULL;
   2040     s_new = PyString_AsString(newobj);
   2041     for (i = 0; i < n; i++) {
   2042         int c = Py_CHARMASK(*s++);
   2043         if (islower(c)) {
   2044             if (!previous_is_cased)
   2045                 c = toupper(c);
   2046             previous_is_cased = 1;
   2047         } else if (isupper(c)) {
   2048             if (previous_is_cased)
   2049                 c = tolower(c);
   2050             previous_is_cased = 1;
   2051         } else
   2052             previous_is_cased = 0;
   2053         *s_new++ = c;
   2054     }
   2055     return newobj;
   2056 }
   2057 
   2058 PyDoc_STRVAR(capitalize__doc__,
   2059 "S.capitalize() -> string\n\
   2060 \n\
   2061 Return a copy of the string S with only its first character\n\
   2062 capitalized.");
   2063 
   2064 static PyObject *
   2065 string_capitalize(PyStringObject *self)
   2066 {
   2067     char *s = PyString_AS_STRING(self), *s_new;
   2068     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2069     PyObject *newobj;
   2070 
   2071     newobj = PyString_FromStringAndSize(NULL, n);
   2072     if (newobj == NULL)
   2073         return NULL;
   2074     s_new = PyString_AsString(newobj);
   2075     if (0 < n) {
   2076         int c = Py_CHARMASK(*s++);
   2077         if (islower(c))
   2078             *s_new = toupper(c);
   2079         else
   2080             *s_new = c;
   2081         s_new++;
   2082     }
   2083     for (i = 1; i < n; i++) {
   2084         int c = Py_CHARMASK(*s++);
   2085         if (isupper(c))
   2086             *s_new = tolower(c);
   2087         else
   2088             *s_new = c;
   2089         s_new++;
   2090     }
   2091     return newobj;
   2092 }
   2093 
   2094 
   2095 PyDoc_STRVAR(count__doc__,
   2096 "S.count(sub[, start[, end]]) -> int\n\
   2097 \n\
   2098 Return the number of non-overlapping occurrences of substring sub in\n\
   2099 string S[start:end].  Optional arguments start and end are interpreted\n\
   2100 as in slice notation.");
   2101 
   2102 static PyObject *
   2103 string_count(PyStringObject *self, PyObject *args)
   2104 {
   2105     PyObject *sub_obj;
   2106     const char *str = PyString_AS_STRING(self), *sub;
   2107     Py_ssize_t sub_len;
   2108     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
   2109 
   2110     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
   2111         return NULL;
   2112 
   2113     if (PyString_Check(sub_obj)) {
   2114         sub = PyString_AS_STRING(sub_obj);
   2115         sub_len = PyString_GET_SIZE(sub_obj);
   2116     }
   2117 #ifdef Py_USING_UNICODE
   2118     else if (PyUnicode_Check(sub_obj)) {
   2119         Py_ssize_t count;
   2120         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
   2121         if (count == -1)
   2122             return NULL;
   2123         else
   2124             return PyInt_FromSsize_t(count);
   2125     }
   2126 #endif
   2127     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
   2128         return NULL;
   2129 
   2130     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
   2131 
   2132     return PyInt_FromSsize_t(
   2133         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
   2134         );
   2135 }
   2136 
   2137 PyDoc_STRVAR(swapcase__doc__,
   2138 "S.swapcase() -> string\n\
   2139 \n\
   2140 Return a copy of the string S with uppercase characters\n\
   2141 converted to lowercase and vice versa.");
   2142 
   2143 static PyObject *
   2144 string_swapcase(PyStringObject *self)
   2145 {
   2146     char *s = PyString_AS_STRING(self), *s_new;
   2147     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2148     PyObject *newobj;
   2149 
   2150     newobj = PyString_FromStringAndSize(NULL, n);
   2151     if (newobj == NULL)
   2152         return NULL;
   2153     s_new = PyString_AsString(newobj);
   2154     for (i = 0; i < n; i++) {
   2155         int c = Py_CHARMASK(*s++);
   2156         if (islower(c)) {
   2157             *s_new = toupper(c);
   2158         }
   2159         else if (isupper(c)) {
   2160             *s_new = tolower(c);
   2161         }
   2162         else
   2163             *s_new = c;
   2164         s_new++;
   2165     }
   2166     return newobj;
   2167 }
   2168 
   2169 
   2170 PyDoc_STRVAR(translate__doc__,
   2171 "S.translate(table [,deletechars]) -> string\n\
   2172 \n\
   2173 Return a copy of the string S, where all characters occurring\n\
   2174 in the optional argument deletechars are removed, and the\n\
   2175 remaining characters have been mapped through the given\n\
   2176 translation table, which must be a string of length 256.");
   2177 
   2178 static PyObject *
   2179 string_translate(PyStringObject *self, PyObject *args)
   2180 {
   2181     register char *input, *output;
   2182     const char *table;
   2183     register Py_ssize_t i, c, changed = 0;
   2184     PyObject *input_obj = (PyObject*)self;
   2185     const char *output_start, *del_table=NULL;
   2186     Py_ssize_t inlen, tablen, dellen = 0;
   2187     PyObject *result;
   2188     int trans_table[256];
   2189     PyObject *tableobj, *delobj = NULL;
   2190 
   2191     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
   2192                           &tableobj, &delobj))
   2193         return NULL;
   2194 
   2195     if (PyString_Check(tableobj)) {
   2196         table = PyString_AS_STRING(tableobj);
   2197         tablen = PyString_GET_SIZE(tableobj);
   2198     }
   2199     else if (tableobj == Py_None) {
   2200         table = NULL;
   2201         tablen = 256;
   2202     }
   2203 #ifdef Py_USING_UNICODE
   2204     else if (PyUnicode_Check(tableobj)) {
   2205         /* Unicode .translate() does not support the deletechars
   2206            parameter; instead a mapping to None will cause characters
   2207            to be deleted. */
   2208         if (delobj != NULL) {
   2209             PyErr_SetString(PyExc_TypeError,
   2210             "deletions are implemented differently for unicode");
   2211             return NULL;
   2212         }
   2213         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
   2214     }
   2215 #endif
   2216     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
   2217         return NULL;
   2218 
   2219     if (tablen != 256) {
   2220         PyErr_SetString(PyExc_ValueError,
   2221           "translation table must be 256 characters long");
   2222         return NULL;
   2223     }
   2224 
   2225     if (delobj != NULL) {
   2226         if (PyString_Check(delobj)) {
   2227             del_table = PyString_AS_STRING(delobj);
   2228             dellen = PyString_GET_SIZE(delobj);
   2229         }
   2230 #ifdef Py_USING_UNICODE
   2231         else if (PyUnicode_Check(delobj)) {
   2232             PyErr_SetString(PyExc_TypeError,
   2233             "deletions are implemented differently for unicode");
   2234             return NULL;
   2235         }
   2236 #endif
   2237         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
   2238             return NULL;
   2239     }
   2240     else {
   2241         del_table = NULL;
   2242         dellen = 0;
   2243     }
   2244 
   2245     inlen = PyString_GET_SIZE(input_obj);
   2246     result = PyString_FromStringAndSize((char *)NULL, inlen);
   2247     if (result == NULL)
   2248         return NULL;
   2249     output_start = output = PyString_AsString(result);
   2250     input = PyString_AS_STRING(input_obj);
   2251 
   2252     if (dellen == 0 && table != NULL) {
   2253         /* If no deletions are required, use faster code */
   2254         for (i = inlen; --i >= 0; ) {
   2255             c = Py_CHARMASK(*input++);
   2256             if (Py_CHARMASK((*output++ = table[c])) != c)
   2257                 changed = 1;
   2258         }
   2259         if (changed || !PyString_CheckExact(input_obj))
   2260             return result;
   2261         Py_DECREF(result);
   2262         Py_INCREF(input_obj);
   2263         return input_obj;
   2264     }
   2265 
   2266     if (table == NULL) {
   2267         for (i = 0; i < 256; i++)
   2268             trans_table[i] = Py_CHARMASK(i);
   2269     } else {
   2270         for (i = 0; i < 256; i++)
   2271             trans_table[i] = Py_CHARMASK(table[i]);
   2272     }
   2273 
   2274     for (i = 0; i < dellen; i++)
   2275         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
   2276 
   2277     for (i = inlen; --i >= 0; ) {
   2278         c = Py_CHARMASK(*input++);
   2279         if (trans_table[c] != -1)
   2280             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
   2281                 continue;
   2282         changed = 1;
   2283     }
   2284     if (!changed && PyString_CheckExact(input_obj)) {
   2285         Py_DECREF(result);
   2286         Py_INCREF(input_obj);
   2287         return input_obj;
   2288     }
   2289     /* Fix the size of the resulting string */
   2290     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
   2291         return NULL;
   2292     return result;
   2293 }
   2294 
   2295 
   2296 /* find and count characters and substrings */
   2297 
   2298 #define findchar(target, target_len, c)                         \
   2299   ((char *)memchr((const void *)(target), c, target_len))
   2300 
   2301 /* String ops must return a string.  */
   2302 /* If the object is subclass of string, create a copy */
   2303 Py_LOCAL(PyStringObject *)
   2304 return_self(PyStringObject *self)
   2305 {
   2306     if (PyString_CheckExact(self)) {
   2307         Py_INCREF(self);
   2308         return self;
   2309     }
   2310     return (PyStringObject *)PyString_FromStringAndSize(
   2311         PyString_AS_STRING(self),
   2312         PyString_GET_SIZE(self));
   2313 }
   2314 
   2315 Py_LOCAL_INLINE(Py_ssize_t)
   2316 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
   2317 {
   2318     Py_ssize_t count=0;
   2319     const char *start=target;
   2320     const char *end=target+target_len;
   2321 
   2322     while ( (start=findchar(start, end-start, c)) != NULL ) {
   2323         count++;
   2324         if (count >= maxcount)
   2325             break;
   2326         start += 1;
   2327     }
   2328     return count;
   2329 }
   2330 
   2331 
   2332 /* Algorithms for different cases of string replacement */
   2333 
   2334 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
   2335 Py_LOCAL(PyStringObject *)
   2336 replace_interleave(PyStringObject *self,
   2337                    const char *to_s, Py_ssize_t to_len,
   2338                    Py_ssize_t maxcount)
   2339 {
   2340     char *self_s, *result_s;
   2341     Py_ssize_t self_len, result_len;
   2342     Py_ssize_t count, i, product;
   2343     PyStringObject *result;
   2344 
   2345     self_len = PyString_GET_SIZE(self);
   2346 
   2347     /* 1 at the end plus 1 after every character */
   2348     count = self_len+1;
   2349     if (maxcount < count)
   2350         count = maxcount;
   2351 
   2352     /* Check for overflow */
   2353     /*   result_len = count * to_len + self_len; */
   2354     product = count * to_len;
   2355     if (product / to_len != count) {
   2356         PyErr_SetString(PyExc_OverflowError,
   2357                         "replace string is too long");
   2358         return NULL;
   2359     }
   2360     result_len = product + self_len;
   2361     if (result_len < 0) {
   2362         PyErr_SetString(PyExc_OverflowError,
   2363                         "replace string is too long");
   2364         return NULL;
   2365     }
   2366 
   2367     if (! (result = (PyStringObject *)
   2368                      PyString_FromStringAndSize(NULL, result_len)) )
   2369         return NULL;
   2370 
   2371     self_s = PyString_AS_STRING(self);
   2372     result_s = PyString_AS_STRING(result);
   2373 
   2374     /* TODO: special case single character, which doesn't need memcpy */
   2375 
   2376     /* Lay the first one down (guaranteed this will occur) */
   2377     Py_MEMCPY(result_s, to_s, to_len);
   2378     result_s += to_len;
   2379     count -= 1;
   2380 
   2381     for (i=0; i<count; i++) {
   2382         *result_s++ = *self_s++;
   2383         Py_MEMCPY(result_s, to_s, to_len);
   2384         result_s += to_len;
   2385     }
   2386 
   2387     /* Copy the rest of the original string */
   2388     Py_MEMCPY(result_s, self_s, self_len-i);
   2389 
   2390     return result;
   2391 }
   2392 
   2393 /* Special case for deleting a single character */
   2394 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
   2395 Py_LOCAL(PyStringObject *)
   2396 replace_delete_single_character(PyStringObject *self,
   2397                                 char from_c, Py_ssize_t maxcount)
   2398 {
   2399     char *self_s, *result_s;
   2400     char *start, *next, *end;
   2401     Py_ssize_t self_len, result_len;
   2402     Py_ssize_t count;
   2403     PyStringObject *result;
   2404 
   2405     self_len = PyString_GET_SIZE(self);
   2406     self_s = PyString_AS_STRING(self);
   2407 
   2408     count = countchar(self_s, self_len, from_c, maxcount);
   2409     if (count == 0) {
   2410         return return_self(self);
   2411     }
   2412 
   2413     result_len = self_len - count;  /* from_len == 1 */
   2414     assert(result_len>=0);
   2415 
   2416     if ( (result = (PyStringObject *)
   2417                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
   2418         return NULL;
   2419     result_s = PyString_AS_STRING(result);
   2420 
   2421     start = self_s;
   2422     end = self_s + self_len;
   2423     while (count-- > 0) {
   2424         next = findchar(start, end-start, from_c);
   2425         if (next == NULL)
   2426             break;
   2427         Py_MEMCPY(result_s, start, next-start);
   2428         result_s += (next-start);
   2429         start = next+1;
   2430     }
   2431     Py_MEMCPY(result_s, start, end-start);
   2432 
   2433     return result;
   2434 }
   2435 
   2436 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
   2437 
   2438 Py_LOCAL(PyStringObject *)
   2439 replace_delete_substring(PyStringObject *self,
   2440                          const char *from_s, Py_ssize_t from_len,
   2441                          Py_ssize_t maxcount) {
   2442     char *self_s, *result_s;
   2443     char *start, *next, *end;
   2444     Py_ssize_t self_len, result_len;
   2445     Py_ssize_t count, offset;
   2446     PyStringObject *result;
   2447 
   2448     self_len = PyString_GET_SIZE(self);
   2449     self_s = PyString_AS_STRING(self);
   2450 
   2451     count = stringlib_count(self_s, self_len,
   2452                             from_s, from_len,
   2453                             maxcount);
   2454 
   2455     if (count == 0) {
   2456         /* no matches */
   2457         return return_self(self);
   2458     }
   2459 
   2460     result_len = self_len - (count * from_len);
   2461     assert (result_len>=0);
   2462 
   2463     if ( (result = (PyStringObject *)
   2464           PyString_FromStringAndSize(NULL, result_len)) == NULL )
   2465         return NULL;
   2466 
   2467     result_s = PyString_AS_STRING(result);
   2468 
   2469     start = self_s;
   2470     end = self_s + self_len;
   2471     while (count-- > 0) {
   2472         offset = stringlib_find(start, end-start,
   2473                                 from_s, from_len,
   2474                                 0);
   2475         if (offset == -1)
   2476             break;
   2477         next = start + offset;
   2478 
   2479         Py_MEMCPY(result_s, start, next-start);
   2480 
   2481         result_s += (next-start);
   2482         start = next+from_len;
   2483     }
   2484     Py_MEMCPY(result_s, start, end-start);
   2485     return result;
   2486 }
   2487 
   2488 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
   2489 Py_LOCAL(PyStringObject *)
   2490 replace_single_character_in_place(PyStringObject *self,
   2491                                   char from_c, char to_c,
   2492                                   Py_ssize_t maxcount)
   2493 {
   2494     char *self_s, *result_s, *start, *end, *next;
   2495     Py_ssize_t self_len;
   2496     PyStringObject *result;
   2497 
   2498     /* The result string will be the same size */
   2499     self_s = PyString_AS_STRING(self);
   2500     self_len = PyString_GET_SIZE(self);
   2501 
   2502     next = findchar(self_s, self_len, from_c);
   2503 
   2504     if (next == NULL) {
   2505         /* No matches; return the original string */
   2506         return return_self(self);
   2507     }
   2508 
   2509     /* Need to make a new string */
   2510     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
   2511     if (result == NULL)
   2512         return NULL;
   2513     result_s = PyString_AS_STRING(result);
   2514     Py_MEMCPY(result_s, self_s, self_len);
   2515 
   2516     /* change everything in-place, starting with this one */
   2517     start =  result_s + (next-self_s);
   2518     *start = to_c;
   2519     start++;
   2520     end = result_s + self_len;
   2521 
   2522     while (--maxcount > 0) {
   2523         next = findchar(start, end-start, from_c);
   2524         if (next == NULL)
   2525             break;
   2526         *next = to_c;
   2527         start = next+1;
   2528     }
   2529 
   2530     return result;
   2531 }
   2532 
   2533 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
   2534 Py_LOCAL(PyStringObject *)
   2535 replace_substring_in_place(PyStringObject *self,
   2536                            const char *from_s, Py_ssize_t from_len,
   2537                            const char *to_s, Py_ssize_t to_len,
   2538                            Py_ssize_t maxcount)
   2539 {
   2540     char *result_s, *start, *end;
   2541     char *self_s;
   2542     Py_ssize_t self_len, offset;
   2543     PyStringObject *result;
   2544 
   2545     /* The result string will be the same size */
   2546 
   2547     self_s = PyString_AS_STRING(self);
   2548     self_len = PyString_GET_SIZE(self);
   2549 
   2550     offset = stringlib_find(self_s, self_len,
   2551                             from_s, from_len,
   2552                             0);
   2553     if (offset == -1) {
   2554         /* No matches; return the original string */
   2555         return return_self(self);
   2556     }
   2557 
   2558     /* Need to make a new string */
   2559     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
   2560     if (result == NULL)
   2561         return NULL;
   2562     result_s = PyString_AS_STRING(result);
   2563     Py_MEMCPY(result_s, self_s, self_len);
   2564 
   2565     /* change everything in-place, starting with this one */
   2566     start =  result_s + offset;
   2567     Py_MEMCPY(start, to_s, from_len);
   2568     start += from_len;
   2569     end = result_s + self_len;
   2570 
   2571     while ( --maxcount > 0) {
   2572         offset = stringlib_find(start, end-start,
   2573                                 from_s, from_len,
   2574                                 0);
   2575         if (offset==-1)
   2576             break;
   2577         Py_MEMCPY(start+offset, to_s, from_len);
   2578         start += offset+from_len;
   2579     }
   2580 
   2581     return result;
   2582 }
   2583 
   2584 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
   2585 Py_LOCAL(PyStringObject *)
   2586 replace_single_character(PyStringObject *self,
   2587                          char from_c,
   2588                          const char *to_s, Py_ssize_t to_len,
   2589                          Py_ssize_t maxcount)
   2590 {
   2591     char *self_s, *result_s;
   2592     char *start, *next, *end;
   2593     Py_ssize_t self_len, result_len;
   2594     Py_ssize_t count, product;
   2595     PyStringObject *result;
   2596 
   2597     self_s = PyString_AS_STRING(self);
   2598     self_len = PyString_GET_SIZE(self);
   2599 
   2600     count = countchar(self_s, self_len, from_c, maxcount);
   2601     if (count == 0) {
   2602         /* no matches, return unchanged */
   2603         return return_self(self);
   2604     }
   2605 
   2606     /* use the difference between current and new, hence the "-1" */
   2607     /*   result_len = self_len + count * (to_len-1)  */
   2608     product = count * (to_len-1);
   2609     if (product / (to_len-1) != count) {
   2610         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2611         return NULL;
   2612     }
   2613     result_len = self_len + product;
   2614     if (result_len < 0) {
   2615         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2616         return NULL;
   2617     }
   2618 
   2619     if ( (result = (PyStringObject *)
   2620           PyString_FromStringAndSize(NULL, result_len)) == NULL)
   2621         return NULL;
   2622     result_s = PyString_AS_STRING(result);
   2623 
   2624     start = self_s;
   2625     end = self_s + self_len;
   2626     while (count-- > 0) {
   2627         next = findchar(start, end-start, from_c);
   2628         if (next == NULL)
   2629             break;
   2630 
   2631         if (next == start) {
   2632             /* replace with the 'to' */
   2633             Py_MEMCPY(result_s, to_s, to_len);
   2634             result_s += to_len;
   2635             start += 1;
   2636         } else {
   2637             /* copy the unchanged old then the 'to' */
   2638             Py_MEMCPY(result_s, start, next-start);
   2639             result_s += (next-start);
   2640             Py_MEMCPY(result_s, to_s, to_len);
   2641             result_s += to_len;
   2642             start = next+1;
   2643         }
   2644     }
   2645     /* Copy the remainder of the remaining string */
   2646     Py_MEMCPY(result_s, start, end-start);
   2647 
   2648     return result;
   2649 }
   2650 
   2651 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
   2652 Py_LOCAL(PyStringObject *)
   2653 replace_substring(PyStringObject *self,
   2654                   const char *from_s, Py_ssize_t from_len,
   2655                   const char *to_s, Py_ssize_t to_len,
   2656                   Py_ssize_t maxcount) {
   2657     char *self_s, *result_s;
   2658     char *start, *next, *end;
   2659     Py_ssize_t self_len, result_len;
   2660     Py_ssize_t count, offset, product;
   2661     PyStringObject *result;
   2662 
   2663     self_s = PyString_AS_STRING(self);
   2664     self_len = PyString_GET_SIZE(self);
   2665 
   2666     count = stringlib_count(self_s, self_len,
   2667                             from_s, from_len,
   2668                             maxcount);
   2669 
   2670     if (count == 0) {
   2671         /* no matches, return unchanged */
   2672         return return_self(self);
   2673     }
   2674 
   2675     /* Check for overflow */
   2676     /*    result_len = self_len + count * (to_len-from_len) */
   2677     product = count * (to_len-from_len);
   2678     if (product / (to_len-from_len) != count) {
   2679         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2680         return NULL;
   2681     }
   2682     result_len = self_len + product;
   2683     if (result_len < 0) {
   2684         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2685         return NULL;
   2686     }
   2687 
   2688     if ( (result = (PyStringObject *)
   2689           PyString_FromStringAndSize(NULL, result_len)) == NULL)
   2690         return NULL;
   2691     result_s = PyString_AS_STRING(result);
   2692 
   2693     start = self_s;
   2694     end = self_s + self_len;
   2695     while (count-- > 0) {
   2696         offset = stringlib_find(start, end-start,
   2697                                 from_s, from_len,
   2698                                 0);
   2699         if (offset == -1)
   2700             break;
   2701         next = start+offset;
   2702         if (next == start) {
   2703             /* replace with the 'to' */
   2704             Py_MEMCPY(result_s, to_s, to_len);
   2705             result_s += to_len;
   2706             start += from_len;
   2707         } else {
   2708             /* copy the unchanged old then the 'to' */
   2709             Py_MEMCPY(result_s, start, next-start);
   2710             result_s += (next-start);
   2711             Py_MEMCPY(result_s, to_s, to_len);
   2712             result_s += to_len;
   2713             start = next+from_len;
   2714         }
   2715     }
   2716     /* Copy the remainder of the remaining string */
   2717     Py_MEMCPY(result_s, start, end-start);
   2718 
   2719     return result;
   2720 }
   2721 
   2722 
   2723 Py_LOCAL(PyStringObject *)
   2724 replace(PyStringObject *self,
   2725     const char *from_s, Py_ssize_t from_len,
   2726     const char *to_s, Py_ssize_t to_len,
   2727     Py_ssize_t maxcount)
   2728 {
   2729     if (maxcount < 0) {
   2730         maxcount = PY_SSIZE_T_MAX;
   2731     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
   2732         /* nothing to do; return the original string */
   2733         return return_self(self);
   2734     }
   2735 
   2736     if (maxcount == 0 ||
   2737         (from_len == 0 && to_len == 0)) {
   2738         /* nothing to do; return the original string */
   2739         return return_self(self);
   2740     }
   2741 
   2742     /* Handle zero-length special cases */
   2743 
   2744     if (from_len == 0) {
   2745         /* insert the 'to' string everywhere.   */
   2746         /*    >>> "Python".replace("", ".")     */
   2747         /*    '.P.y.t.h.o.n.'                   */
   2748         return replace_interleave(self, to_s, to_len, maxcount);
   2749     }
   2750 
   2751     /* Except for "".replace("", "A") == "A" there is no way beyond this */
   2752     /* point for an empty self string to generate a non-empty string */
   2753     /* Special case so the remaining code always gets a non-empty string */
   2754     if (PyString_GET_SIZE(self) == 0) {
   2755         return return_self(self);
   2756     }
   2757 
   2758     if (to_len == 0) {
   2759         /* delete all occurances of 'from' string */
   2760         if (from_len == 1) {
   2761             return replace_delete_single_character(
   2762                 self, from_s[0], maxcount);
   2763         } else {
   2764             return replace_delete_substring(self, from_s, from_len, maxcount);
   2765         }
   2766     }
   2767 
   2768     /* Handle special case where both strings have the same length */
   2769 
   2770     if (from_len == to_len) {
   2771         if (from_len == 1) {
   2772             return replace_single_character_in_place(
   2773                 self,
   2774                 from_s[0],
   2775                 to_s[0],
   2776                 maxcount);
   2777         } else {
   2778             return replace_substring_in_place(
   2779                 self, from_s, from_len, to_s, to_len, maxcount);
   2780         }
   2781     }
   2782 
   2783     /* Otherwise use the more generic algorithms */
   2784     if (from_len == 1) {
   2785         return replace_single_character(self, from_s[0],
   2786                                         to_s, to_len, maxcount);
   2787     } else {
   2788         /* len('from')>=2, len('to')>=1 */
   2789         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
   2790     }
   2791 }
   2792 
   2793 PyDoc_STRVAR(replace__doc__,
   2794 "S.replace(old, new[, count]) -> string\n\
   2795 \n\
   2796 Return a copy of string S with all occurrences of substring\n\
   2797 old replaced by new.  If the optional argument count is\n\
   2798 given, only the first count occurrences are replaced.");
   2799 
   2800 static PyObject *
   2801 string_replace(PyStringObject *self, PyObject *args)
   2802 {
   2803     Py_ssize_t count = -1;
   2804     PyObject *from, *to;
   2805     const char *from_s, *to_s;
   2806     Py_ssize_t from_len, to_len;
   2807 
   2808     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
   2809         return NULL;
   2810 
   2811     if (PyString_Check(from)) {
   2812         from_s = PyString_AS_STRING(from);
   2813         from_len = PyString_GET_SIZE(from);
   2814     }
   2815 #ifdef Py_USING_UNICODE
   2816     if (PyUnicode_Check(from))
   2817         return PyUnicode_Replace((PyObject *)self,
   2818                                  from, to, count);
   2819 #endif
   2820     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
   2821         return NULL;
   2822 
   2823     if (PyString_Check(to)) {
   2824         to_s = PyString_AS_STRING(to);
   2825         to_len = PyString_GET_SIZE(to);
   2826     }
   2827 #ifdef Py_USING_UNICODE
   2828     else if (PyUnicode_Check(to))
   2829         return PyUnicode_Replace((PyObject *)self,
   2830                                  from, to, count);
   2831 #endif
   2832     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
   2833         return NULL;
   2834 
   2835     return (PyObject *)replace((PyStringObject *) self,
   2836                                from_s, from_len,
   2837                                to_s, to_len, count);
   2838 }
   2839 
   2840 /** End DALKE **/
   2841 
   2842 /* Matches the end (direction >= 0) or start (direction < 0) of self
   2843  * against substr, using the start and end arguments. Returns
   2844  * -1 on error, 0 if not found and 1 if found.
   2845  */
   2846 Py_LOCAL(int)
   2847 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
   2848                   Py_ssize_t end, int direction)
   2849 {
   2850     Py_ssize_t len = PyString_GET_SIZE(self);
   2851     Py_ssize_t slen;
   2852     const char* sub;
   2853     const char* str;
   2854 
   2855     if (PyString_Check(substr)) {
   2856         sub = PyString_AS_STRING(substr);
   2857         slen = PyString_GET_SIZE(substr);
   2858     }
   2859 #ifdef Py_USING_UNICODE
   2860     else if (PyUnicode_Check(substr))
   2861         return PyUnicode_Tailmatch((PyObject *)self,
   2862                                    substr, start, end, direction);
   2863 #endif
   2864     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
   2865         return -1;
   2866     str = PyString_AS_STRING(self);
   2867 
   2868     ADJUST_INDICES(start, end, len);
   2869 
   2870     if (direction < 0) {
   2871         /* startswith */
   2872         if (start+slen > len)
   2873             return 0;
   2874     } else {
   2875         /* endswith */
   2876         if (end-start < slen || start > len)
   2877             return 0;
   2878 
   2879         if (end-slen > start)
   2880             start = end - slen;
   2881     }
   2882     if (end-start >= slen)
   2883         return ! memcmp(str+start, sub, slen);
   2884     return 0;
   2885 }
   2886 
   2887 
   2888 PyDoc_STRVAR(startswith__doc__,
   2889 "S.startswith(prefix[, start[, end]]) -> bool\n\
   2890 \n\
   2891 Return True if S starts with the specified prefix, False otherwise.\n\
   2892 With optional start, test S beginning at that position.\n\
   2893 With optional end, stop comparing S at that position.\n\
   2894 prefix can also be a tuple of strings to try.");
   2895 
   2896 static PyObject *
   2897 string_startswith(PyStringObject *self, PyObject *args)
   2898 {
   2899     Py_ssize_t start = 0;
   2900     Py_ssize_t end = PY_SSIZE_T_MAX;
   2901     PyObject *subobj;
   2902     int result;
   2903 
   2904     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
   2905         return NULL;
   2906     if (PyTuple_Check(subobj)) {
   2907         Py_ssize_t i;
   2908         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
   2909             result = _string_tailmatch(self,
   2910                             PyTuple_GET_ITEM(subobj, i),
   2911                             start, end, -1);
   2912             if (result == -1)
   2913                 return NULL;
   2914             else if (result) {
   2915                 Py_RETURN_TRUE;
   2916             }
   2917         }
   2918         Py_RETURN_FALSE;
   2919     }
   2920     result = _string_tailmatch(self, subobj, start, end, -1);
   2921     if (result == -1) {
   2922         if (PyErr_ExceptionMatches(PyExc_TypeError))
   2923             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
   2924                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
   2925         return NULL;
   2926     }
   2927     else
   2928         return PyBool_FromLong(result);
   2929 }
   2930 
   2931 
   2932 PyDoc_STRVAR(endswith__doc__,
   2933 "S.endswith(suffix[, start[, end]]) -> bool\n\
   2934 \n\
   2935 Return True if S ends with the specified suffix, False otherwise.\n\
   2936 With optional start, test S beginning at that position.\n\
   2937 With optional end, stop comparing S at that position.\n\
   2938 suffix can also be a tuple of strings to try.");
   2939 
   2940 static PyObject *
   2941 string_endswith(PyStringObject *self, PyObject *args)
   2942 {
   2943     Py_ssize_t start = 0;
   2944     Py_ssize_t end = PY_SSIZE_T_MAX;
   2945     PyObject *subobj;
   2946     int result;
   2947 
   2948     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
   2949         return NULL;
   2950     if (PyTuple_Check(subobj)) {
   2951         Py_ssize_t i;
   2952         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
   2953             result = _string_tailmatch(self,
   2954                             PyTuple_GET_ITEM(subobj, i),
   2955                             start, end, +1);
   2956             if (result == -1)
   2957                 return NULL;
   2958             else if (result) {
   2959                 Py_RETURN_TRUE;
   2960             }
   2961         }
   2962         Py_RETURN_FALSE;
   2963     }
   2964     result = _string_tailmatch(self, subobj, start, end, +1);
   2965     if (result == -1) {
   2966         if (PyErr_ExceptionMatches(PyExc_TypeError))
   2967             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
   2968                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
   2969         return NULL;
   2970     }
   2971     else
   2972         return PyBool_FromLong(result);
   2973 }
   2974 
   2975 
   2976 PyDoc_STRVAR(encode__doc__,
   2977 "S.encode([encoding[,errors]]) -> object\n\
   2978 \n\
   2979 Encodes S using the codec registered for encoding. encoding defaults\n\
   2980 to the default encoding. errors may be given to set a different error\n\
   2981 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
   2982 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
   2983 'xmlcharrefreplace' as well as any other name registered with\n\
   2984 codecs.register_error that is able to handle UnicodeEncodeErrors.");
   2985 
   2986 static PyObject *
   2987 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
   2988 {
   2989     static char *kwlist[] = {"encoding", "errors", 0};
   2990     char *encoding = NULL;
   2991     char *errors = NULL;
   2992     PyObject *v;
   2993 
   2994     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
   2995                                      kwlist, &encoding, &errors))
   2996         return NULL;
   2997     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
   2998     if (v == NULL)
   2999         goto onError;
   3000     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
   3001         PyErr_Format(PyExc_TypeError,
   3002                      "encoder did not return a string/unicode object "
   3003                      "(type=%.400s)",
   3004                      Py_TYPE(v)->tp_name);
   3005         Py_DECREF(v);
   3006         return NULL;
   3007     }
   3008     return v;
   3009 
   3010  onError:
   3011     return NULL;
   3012 }
   3013 
   3014 
   3015 PyDoc_STRVAR(decode__doc__,
   3016 "S.decode([encoding[,errors]]) -> object\n\
   3017 \n\
   3018 Decodes S using the codec registered for encoding. encoding defaults\n\
   3019 to the default encoding. errors may be given to set a different error\n\
   3020 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
   3021 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
   3022 as well as any other name registered with codecs.register_error that is\n\
   3023 able to handle UnicodeDecodeErrors.");
   3024 
   3025 static PyObject *
   3026 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
   3027 {
   3028     static char *kwlist[] = {"encoding", "errors", 0};
   3029     char *encoding = NULL;
   3030     char *errors = NULL;
   3031     PyObject *v;
   3032 
   3033     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
   3034                                      kwlist, &encoding, &errors))
   3035         return NULL;
   3036     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
   3037     if (v == NULL)
   3038         goto onError;
   3039     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
   3040         PyErr_Format(PyExc_TypeError,
   3041                      "decoder did not return a string/unicode object "
   3042                      "(type=%.400s)",
   3043                      Py_TYPE(v)->tp_name);
   3044         Py_DECREF(v);
   3045         return NULL;
   3046     }
   3047     return v;
   3048 
   3049  onError:
   3050     return NULL;
   3051 }
   3052 
   3053 
   3054 PyDoc_STRVAR(expandtabs__doc__,
   3055 "S.expandtabs([tabsize]) -> string\n\
   3056 \n\
   3057 Return a copy of S where all tab characters are expanded using spaces.\n\
   3058 If tabsize is not given, a tab size of 8 characters is assumed.");
   3059 
   3060 static PyObject*
   3061 string_expandtabs(PyStringObject *self, PyObject *args)
   3062 {
   3063     const char *e, *p, *qe;
   3064     char *q;
   3065     Py_ssize_t i, j, incr;
   3066     PyObject *u;
   3067     int tabsize = 8;
   3068 
   3069     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
   3070         return NULL;
   3071 
   3072     /* First pass: determine size of output string */
   3073     i = 0; /* chars up to and including most recent \n or \r */
   3074     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
   3075     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
   3076     for (p = PyString_AS_STRING(self); p < e; p++)
   3077     if (*p == '\t') {
   3078         if (tabsize > 0) {
   3079             incr = tabsize - (j % tabsize);
   3080             if (j > PY_SSIZE_T_MAX - incr)
   3081                 goto overflow1;
   3082             j += incr;
   3083         }
   3084     }
   3085     else {
   3086         if (j > PY_SSIZE_T_MAX - 1)
   3087             goto overflow1;
   3088         j++;
   3089         if (*p == '\n' || *p == '\r') {
   3090             if (i > PY_SSIZE_T_MAX - j)
   3091                 goto overflow1;
   3092             i += j;
   3093             j = 0;
   3094         }
   3095     }
   3096 
   3097     if (i > PY_SSIZE_T_MAX - j)
   3098         goto overflow1;
   3099 
   3100     /* Second pass: create output string and fill it */
   3101     u = PyString_FromStringAndSize(NULL, i + j);
   3102     if (!u)
   3103         return NULL;
   3104 
   3105     j = 0; /* same as in first pass */
   3106     q = PyString_AS_STRING(u); /* next output char */
   3107     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
   3108 
   3109     for (p = PyString_AS_STRING(self); p < e; p++)
   3110     if (*p == '\t') {
   3111         if (tabsize > 0) {
   3112             i = tabsize - (j % tabsize);
   3113             j += i;
   3114             while (i--) {
   3115                 if (q >= qe)
   3116                     goto overflow2;
   3117                 *q++ = ' ';
   3118             }
   3119         }
   3120     }
   3121     else {
   3122         if (q >= qe)
   3123             goto overflow2;
   3124         *q++ = *p;
   3125         j++;
   3126         if (*p == '\n' || *p == '\r')
   3127             j = 0;
   3128     }
   3129 
   3130     return u;
   3131 
   3132   overflow2:
   3133     Py_DECREF(u);
   3134   overflow1:
   3135     PyErr_SetString(PyExc_OverflowError, "new string is too long");
   3136     return NULL;
   3137 }
   3138 
   3139 Py_LOCAL_INLINE(PyObject *)
   3140 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
   3141 {
   3142     PyObject *u;
   3143 
   3144     if (left < 0)
   3145         left = 0;
   3146     if (right < 0)
   3147         right = 0;
   3148 
   3149     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
   3150         Py_INCREF(self);
   3151         return (PyObject *)self;
   3152     }
   3153 
   3154     u = PyString_FromStringAndSize(NULL,
   3155                                    left + PyString_GET_SIZE(self) + right);
   3156     if (u) {
   3157         if (left)
   3158             memset(PyString_AS_STRING(u), fill, left);
   3159         Py_MEMCPY(PyString_AS_STRING(u) + left,
   3160                PyString_AS_STRING(self),
   3161                PyString_GET_SIZE(self));
   3162         if (right)
   3163             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
   3164                fill, right);
   3165     }
   3166 
   3167     return u;
   3168 }
   3169 
   3170 PyDoc_STRVAR(ljust__doc__,
   3171 "S.ljust(width[, fillchar]) -> string\n"
   3172 "\n"
   3173 "Return S left-justified in a string of length width. Padding is\n"
   3174 "done using the specified fill character (default is a space).");
   3175 
   3176 static PyObject *
   3177 string_ljust(PyStringObject *self, PyObject *args)
   3178 {
   3179     Py_ssize_t width;
   3180     char fillchar = ' ';
   3181 
   3182     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
   3183         return NULL;
   3184 
   3185     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
   3186         Py_INCREF(self);
   3187         return (PyObject*) self;
   3188     }
   3189 
   3190     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
   3191 }
   3192 
   3193 
   3194 PyDoc_STRVAR(rjust__doc__,
   3195 "S.rjust(width[, fillchar]) -> string\n"
   3196 "\n"
   3197 "Return S right-justified in a string of length width. Padding is\n"
   3198 "done using the specified fill character (default is a space)");
   3199 
   3200 static PyObject *
   3201 string_rjust(PyStringObject *self, PyObject *args)
   3202 {
   3203     Py_ssize_t width;
   3204     char fillchar = ' ';
   3205 
   3206     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
   3207         return NULL;
   3208 
   3209     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
   3210         Py_INCREF(self);
   3211         return (PyObject*) self;
   3212     }
   3213 
   3214     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
   3215 }
   3216 
   3217 
   3218 PyDoc_STRVAR(center__doc__,
   3219 "S.center(width[, fillchar]) -> string\n"
   3220 "\n"
   3221 "Return S centered in a string of length width. Padding is\n"
   3222 "done using the specified fill character (default is a space)");
   3223 
   3224 static PyObject *
   3225 string_center(PyStringObject *self, PyObject *args)
   3226 {
   3227     Py_ssize_t marg, left;
   3228     Py_ssize_t width;
   3229     char fillchar = ' ';
   3230 
   3231     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
   3232         return NULL;
   3233 
   3234     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
   3235         Py_INCREF(self);
   3236         return (PyObject*) self;
   3237     }
   3238 
   3239     marg = width - PyString_GET_SIZE(self);
   3240     left = marg / 2 + (marg & width & 1);
   3241 
   3242     return pad(self, left, marg - left, fillchar);
   3243 }
   3244 
   3245 PyDoc_STRVAR(zfill__doc__,
   3246 "S.zfill(width) -> string\n"
   3247 "\n"
   3248 "Pad a numeric string S with zeros on the left, to fill a field\n"
   3249 "of the specified width.  The string S is never truncated.");
   3250 
   3251 static PyObject *
   3252 string_zfill(PyStringObject *self, PyObject *args)
   3253 {
   3254     Py_ssize_t fill;
   3255     PyObject *s;
   3256     char *p;
   3257     Py_ssize_t width;
   3258 
   3259     if (!PyArg_ParseTuple(args, "n:zfill", &width))
   3260         return NULL;
   3261 
   3262     if (PyString_GET_SIZE(self) >= width) {
   3263         if (PyString_CheckExact(self)) {
   3264             Py_INCREF(self);
   3265             return (PyObject*) self;
   3266         }
   3267         else
   3268             return PyString_FromStringAndSize(
   3269             PyString_AS_STRING(self),
   3270             PyString_GET_SIZE(self)
   3271             );
   3272     }
   3273 
   3274     fill = width - PyString_GET_SIZE(self);
   3275 
   3276     s = pad(self, fill, 0, '0');
   3277 
   3278     if (s == NULL)
   3279         return NULL;
   3280 
   3281     p = PyString_AS_STRING(s);
   3282     if (p[fill] == '+' || p[fill] == '-') {
   3283         /* move sign to beginning of string */
   3284         p[0] = p[fill];
   3285         p[fill] = '0';
   3286     }
   3287 
   3288     return (PyObject*) s;
   3289 }
   3290 
   3291 PyDoc_STRVAR(isspace__doc__,
   3292 "S.isspace() -> bool\n\
   3293 \n\
   3294 Return True if all characters in S are whitespace\n\
   3295 and there is at least one character in S, False otherwise.");
   3296 
   3297 static PyObject*
   3298 string_isspace(PyStringObject *self)
   3299 {
   3300     register const unsigned char *p
   3301         = (unsigned char *) PyString_AS_STRING(self);
   3302     register const unsigned char *e;
   3303 
   3304     /* Shortcut for single character strings */
   3305     if (PyString_GET_SIZE(self) == 1 &&
   3306         isspace(*p))
   3307         return PyBool_FromLong(1);
   3308 
   3309     /* Special case for empty strings */
   3310     if (PyString_GET_SIZE(self) == 0)
   3311         return PyBool_FromLong(0);
   3312 
   3313     e = p + PyString_GET_SIZE(self);
   3314     for (; p < e; p++) {
   3315         if (!isspace(*p))
   3316             return PyBool_FromLong(0);
   3317     }
   3318     return PyBool_FromLong(1);
   3319 }
   3320 
   3321 
   3322 PyDoc_STRVAR(isalpha__doc__,
   3323 "S.isalpha() -> bool\n\
   3324 \n\
   3325 Return True if all characters in S are alphabetic\n\
   3326 and there is at least one character in S, False otherwise.");
   3327 
   3328 static PyObject*
   3329 string_isalpha(PyStringObject *self)
   3330 {
   3331     register const unsigned char *p
   3332         = (unsigned char *) PyString_AS_STRING(self);
   3333     register const unsigned char *e;
   3334 
   3335     /* Shortcut for single character strings */
   3336     if (PyString_GET_SIZE(self) == 1 &&
   3337         isalpha(*p))
   3338         return PyBool_FromLong(1);
   3339 
   3340     /* Special case for empty strings */
   3341     if (PyString_GET_SIZE(self) == 0)
   3342         return PyBool_FromLong(0);
   3343 
   3344     e = p + PyString_GET_SIZE(self);
   3345     for (; p < e; p++) {
   3346         if (!isalpha(*p))
   3347             return PyBool_FromLong(0);
   3348     }
   3349     return PyBool_FromLong(1);
   3350 }
   3351 
   3352 
   3353 PyDoc_STRVAR(isalnum__doc__,
   3354 "S.isalnum() -> bool\n\
   3355 \n\
   3356 Return True if all characters in S are alphanumeric\n\
   3357 and there is at least one character in S, False otherwise.");
   3358 
   3359 static PyObject*
   3360 string_isalnum(PyStringObject *self)
   3361 {
   3362     register const unsigned char *p
   3363         = (unsigned char *) PyString_AS_STRING(self);
   3364     register const unsigned char *e;
   3365 
   3366     /* Shortcut for single character strings */
   3367     if (PyString_GET_SIZE(self) == 1 &&
   3368         isalnum(*p))
   3369         return PyBool_FromLong(1);
   3370 
   3371     /* Special case for empty strings */
   3372     if (PyString_GET_SIZE(self) == 0)
   3373         return PyBool_FromLong(0);
   3374 
   3375     e = p + PyString_GET_SIZE(self);
   3376     for (; p < e; p++) {
   3377         if (!isalnum(*p))
   3378             return PyBool_FromLong(0);
   3379     }
   3380     return PyBool_FromLong(1);
   3381 }
   3382 
   3383 
   3384 PyDoc_STRVAR(isdigit__doc__,
   3385 "S.isdigit() -> bool\n\
   3386 \n\
   3387 Return True if all characters in S are digits\n\
   3388 and there is at least one character in S, False otherwise.");
   3389 
   3390 static PyObject*
   3391 string_isdigit(PyStringObject *self)
   3392 {
   3393     register const unsigned char *p
   3394         = (unsigned char *) PyString_AS_STRING(self);
   3395     register const unsigned char *e;
   3396 
   3397     /* Shortcut for single character strings */
   3398     if (PyString_GET_SIZE(self) == 1 &&
   3399         isdigit(*p))
   3400         return PyBool_FromLong(1);
   3401 
   3402     /* Special case for empty strings */
   3403     if (PyString_GET_SIZE(self) == 0)
   3404         return PyBool_FromLong(0);
   3405 
   3406     e = p + PyString_GET_SIZE(self);
   3407     for (; p < e; p++) {
   3408         if (!isdigit(*p))
   3409             return PyBool_FromLong(0);
   3410     }
   3411     return PyBool_FromLong(1);
   3412 }
   3413 
   3414 
   3415 PyDoc_STRVAR(islower__doc__,
   3416 "S.islower() -> bool\n\
   3417 \n\
   3418 Return True if all cased characters in S are lowercase and there is\n\
   3419 at least one cased character in S, False otherwise.");
   3420 
   3421 static PyObject*
   3422 string_islower(PyStringObject *self)
   3423 {
   3424     register const unsigned char *p
   3425         = (unsigned char *) PyString_AS_STRING(self);
   3426     register const unsigned char *e;
   3427     int cased;
   3428 
   3429     /* Shortcut for single character strings */
   3430     if (PyString_GET_SIZE(self) == 1)
   3431         return PyBool_FromLong(islower(*p) != 0);
   3432 
   3433     /* Special case for empty strings */
   3434     if (PyString_GET_SIZE(self) == 0)
   3435         return PyBool_FromLong(0);
   3436 
   3437     e = p + PyString_GET_SIZE(self);
   3438     cased = 0;
   3439     for (; p < e; p++) {
   3440         if (isupper(*p))
   3441             return PyBool_FromLong(0);
   3442         else if (!cased && islower(*p))
   3443             cased = 1;
   3444     }
   3445     return PyBool_FromLong(cased);
   3446 }
   3447 
   3448 
   3449 PyDoc_STRVAR(isupper__doc__,
   3450 "S.isupper() -> bool\n\
   3451 \n\
   3452 Return True if all cased characters in S are uppercase and there is\n\
   3453 at least one cased character in S, False otherwise.");
   3454 
   3455 static PyObject*
   3456 string_isupper(PyStringObject *self)
   3457 {
   3458     register const unsigned char *p
   3459         = (unsigned char *) PyString_AS_STRING(self);
   3460     register const unsigned char *e;
   3461     int cased;
   3462 
   3463     /* Shortcut for single character strings */
   3464     if (PyString_GET_SIZE(self) == 1)
   3465         return PyBool_FromLong(isupper(*p) != 0);
   3466 
   3467     /* Special case for empty strings */
   3468     if (PyString_GET_SIZE(self) == 0)
   3469         return PyBool_FromLong(0);
   3470 
   3471     e = p + PyString_GET_SIZE(self);
   3472     cased = 0;
   3473     for (; p < e; p++) {
   3474         if (islower(*p))
   3475             return PyBool_FromLong(0);
   3476         else if (!cased && isupper(*p))
   3477             cased = 1;
   3478     }
   3479     return PyBool_FromLong(cased);
   3480 }
   3481 
   3482 
   3483 PyDoc_STRVAR(istitle__doc__,
   3484 "S.istitle() -> bool\n\
   3485 \n\
   3486 Return True if S is a titlecased string and there is at least one\n\
   3487 character in S, i.e. uppercase characters may only follow uncased\n\
   3488 characters and lowercase characters only cased ones. Return False\n\
   3489 otherwise.");
   3490 
   3491 static PyObject*
   3492 string_istitle(PyStringObject *self, PyObject *uncased)
   3493 {
   3494     register const unsigned char *p
   3495         = (unsigned char *) PyString_AS_STRING(self);
   3496     register const unsigned char *e;
   3497     int cased, previous_is_cased;
   3498 
   3499     /* Shortcut for single character strings */
   3500     if (PyString_GET_SIZE(self) == 1)
   3501         return PyBool_FromLong(isupper(*p) != 0);
   3502 
   3503     /* Special case for empty strings */
   3504     if (PyString_GET_SIZE(self) == 0)
   3505         return PyBool_FromLong(0);
   3506 
   3507     e = p + PyString_GET_SIZE(self);
   3508     cased = 0;
   3509     previous_is_cased = 0;
   3510     for (; p < e; p++) {
   3511         register const unsigned char ch = *p;
   3512 
   3513         if (isupper(ch)) {
   3514             if (previous_is_cased)
   3515                 return PyBool_FromLong(0);
   3516             previous_is_cased = 1;
   3517             cased = 1;
   3518         }
   3519         else if (islower(ch)) {
   3520             if (!previous_is_cased)
   3521                 return PyBool_FromLong(0);
   3522             previous_is_cased = 1;
   3523             cased = 1;
   3524         }
   3525         else
   3526             previous_is_cased = 0;
   3527     }
   3528     return PyBool_FromLong(cased);
   3529 }
   3530 
   3531 
   3532 PyDoc_STRVAR(splitlines__doc__,
   3533 "S.splitlines([keepends]) -> list of strings\n\
   3534 \n\
   3535 Return a list of the lines in S, breaking at line boundaries.\n\
   3536 Line breaks are not included in the resulting list unless keepends\n\
   3537 is given and true.");
   3538 
   3539 static PyObject*
   3540 string_splitlines(PyStringObject *self, PyObject *args)
   3541 {
   3542     int keepends = 0;
   3543 
   3544     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
   3545         return NULL;
   3546 
   3547     return stringlib_splitlines(
   3548         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
   3549         keepends
   3550     );
   3551 }
   3552 
   3553 PyDoc_STRVAR(sizeof__doc__,
   3554 "S.__sizeof__() -> size of S in memory, in bytes");
   3555 
   3556 static PyObject *
   3557 string_sizeof(PyStringObject *v)
   3558 {
   3559     Py_ssize_t res;
   3560     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
   3561     return PyInt_FromSsize_t(res);
   3562 }
   3563 
   3564 static PyObject *
   3565 string_getnewargs(PyStringObject *v)
   3566 {
   3567     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
   3568 }
   3569 
   3570 
   3571 #include "stringlib/string_format.h"
   3572 
   3573 PyDoc_STRVAR(format__doc__,
   3574 "S.format(*args, **kwargs) -> string\n\
   3575 \n\
   3576 Return a formatted version of S, using substitutions from args and kwargs.\n\
   3577 The substitutions are identified by braces ('{' and '}').");
   3578 
   3579 static PyObject *
   3580 string__format__(PyObject* self, PyObject* args)
   3581 {
   3582     PyObject *format_spec;
   3583     PyObject *result = NULL;
   3584     PyObject *tmp = NULL;
   3585 
   3586     /* If 2.x, convert format_spec to the same type as value */
   3587     /* This is to allow things like u''.format('') */
   3588     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
   3589         goto done;
   3590     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
   3591         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
   3592                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
   3593         goto done;
   3594     }
   3595     tmp = PyObject_Str(format_spec);
   3596     if (tmp == NULL)
   3597         goto done;
   3598     format_spec = tmp;
   3599 
   3600     result = _PyBytes_FormatAdvanced(self,
   3601                                      PyString_AS_STRING(format_spec),
   3602                                      PyString_GET_SIZE(format_spec));
   3603 done:
   3604     Py_XDECREF(tmp);
   3605     return result;
   3606 }
   3607 
   3608 PyDoc_STRVAR(p_format__doc__,
   3609 "S.__format__(format_spec) -> string\n\
   3610 \n\
   3611 Return a formatted version of S as described by format_spec.");
   3612 
   3613 
   3614 static PyMethodDef
   3615 string_methods[] = {
   3616     /* Counterparts of the obsolete stropmodule functions; except
   3617        string.maketrans(). */
   3618     {"join", (PyCFunction)string_join, METH_O, join__doc__},
   3619     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
   3620     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
   3621     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
   3622     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
   3623     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
   3624     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
   3625     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
   3626     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
   3627     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
   3628     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
   3629     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
   3630     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
   3631      capitalize__doc__},
   3632     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
   3633     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
   3634      endswith__doc__},
   3635     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
   3636     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
   3637     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
   3638     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
   3639     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
   3640     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
   3641     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
   3642     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
   3643     {"rpartition", (PyCFunction)string_rpartition, METH_O,
   3644      rpartition__doc__},
   3645     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
   3646      startswith__doc__},
   3647     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
   3648     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
   3649      swapcase__doc__},
   3650     {"translate", (PyCFunction)string_translate, METH_VARARGS,
   3651      translate__doc__},
   3652     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
   3653     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
   3654     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
   3655     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
   3656     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
   3657     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
   3658     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
   3659     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
   3660     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
   3661     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
   3662     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
   3663     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
   3664      expandtabs__doc__},
   3665     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
   3666      splitlines__doc__},
   3667     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
   3668      sizeof__doc__},
   3669     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
   3670     {NULL,     NULL}                         /* sentinel */
   3671 };
   3672 
   3673 static PyObject *
   3674 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
   3675 
   3676 static PyObject *
   3677 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   3678 {
   3679     PyObject *x = NULL;
   3680     static char *kwlist[] = {"object", 0};
   3681 
   3682     if (type != &PyString_Type)
   3683         return str_subtype_new(type, args, kwds);
   3684     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
   3685         return NULL;
   3686     if (x == NULL)
   3687         return PyString_FromString("");
   3688     return PyObject_Str(x);
   3689 }
   3690 
   3691 static PyObject *
   3692 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   3693 {
   3694     PyObject *tmp, *pnew;
   3695     Py_ssize_t n;
   3696 
   3697     assert(PyType_IsSubtype(type, &PyString_Type));
   3698     tmp = string_new(&PyString_Type, args, kwds);
   3699     if (tmp == NULL)
   3700         return NULL;
   3701     assert(PyString_CheckExact(tmp));
   3702     n = PyString_GET_SIZE(tmp);
   3703     pnew = type->tp_alloc(type, n);
   3704     if (pnew != NULL) {
   3705         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
   3706         ((PyStringObject *)pnew)->ob_shash =
   3707             ((PyStringObject *)tmp)->ob_shash;
   3708         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
   3709     }
   3710     Py_DECREF(tmp);
   3711     return pnew;
   3712 }
   3713 
   3714 static PyObject *
   3715 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   3716 {
   3717     PyErr_SetString(PyExc_TypeError,
   3718                     "The basestring type cannot be instantiated");
   3719     return NULL;
   3720 }
   3721 
   3722 static PyObject *
   3723 string_mod(PyObject *v, PyObject *w)
   3724 {
   3725     if (!PyString_Check(v)) {
   3726         Py_INCREF(Py_NotImplemented);
   3727         return Py_NotImplemented;
   3728     }
   3729     return PyString_Format(v, w);
   3730 }
   3731 
   3732 PyDoc_STRVAR(basestring_doc,
   3733 "Type basestring cannot be instantiated; it is the base for str and unicode.");
   3734 
   3735 static PyNumberMethods string_as_number = {
   3736     0,                          /*nb_add*/
   3737     0,                          /*nb_subtract*/
   3738     0,                          /*nb_multiply*/
   3739     0,                          /*nb_divide*/
   3740     string_mod,                 /*nb_remainder*/
   3741 };
   3742 
   3743 
   3744 PyTypeObject PyBaseString_Type = {
   3745     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   3746     "basestring",
   3747     0,
   3748     0,
   3749     0,                                          /* tp_dealloc */
   3750     0,                                          /* tp_print */
   3751     0,                                          /* tp_getattr */
   3752     0,                                          /* tp_setattr */
   3753     0,                                          /* tp_compare */
   3754     0,                                          /* tp_repr */
   3755     0,                                          /* tp_as_number */
   3756     0,                                          /* tp_as_sequence */
   3757     0,                                          /* tp_as_mapping */
   3758     0,                                          /* tp_hash */
   3759     0,                                          /* tp_call */
   3760     0,                                          /* tp_str */
   3761     0,                                          /* tp_getattro */
   3762     0,                                          /* tp_setattro */
   3763     0,                                          /* tp_as_buffer */
   3764     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
   3765     basestring_doc,                             /* tp_doc */
   3766     0,                                          /* tp_traverse */
   3767     0,                                          /* tp_clear */
   3768     0,                                          /* tp_richcompare */
   3769     0,                                          /* tp_weaklistoffset */
   3770     0,                                          /* tp_iter */
   3771     0,                                          /* tp_iternext */
   3772     0,                                          /* tp_methods */
   3773     0,                                          /* tp_members */
   3774     0,                                          /* tp_getset */
   3775     &PyBaseObject_Type,                         /* tp_base */
   3776     0,                                          /* tp_dict */
   3777     0,                                          /* tp_descr_get */
   3778     0,                                          /* tp_descr_set */
   3779     0,                                          /* tp_dictoffset */
   3780     0,                                          /* tp_init */
   3781     0,                                          /* tp_alloc */
   3782     basestring_new,                             /* tp_new */
   3783     0,                                          /* tp_free */
   3784 };
   3785 
   3786 PyDoc_STRVAR(string_doc,
   3787 "str(object) -> string\n\
   3788 \n\
   3789 Return a nice string representation of the object.\n\
   3790 If the argument is a string, the return value is the same object.");
   3791 
   3792 PyTypeObject PyString_Type = {
   3793     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   3794     "str",
   3795     PyStringObject_SIZE,
   3796     sizeof(char),
   3797     string_dealloc,                             /* tp_dealloc */
   3798     (printfunc)string_print,                    /* tp_print */
   3799     0,                                          /* tp_getattr */
   3800     0,                                          /* tp_setattr */
   3801     0,                                          /* tp_compare */
   3802     string_repr,                                /* tp_repr */
   3803     &string_as_number,                          /* tp_as_number */
   3804     &string_as_sequence,                        /* tp_as_sequence */
   3805     &string_as_mapping,                         /* tp_as_mapping */
   3806     (hashfunc)string_hash,                      /* tp_hash */
   3807     0,                                          /* tp_call */
   3808     string_str,                                 /* tp_str */
   3809     PyObject_GenericGetAttr,                    /* tp_getattro */
   3810     0,                                          /* tp_setattro */
   3811     &string_as_buffer,                          /* tp_as_buffer */
   3812     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
   3813         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
   3814         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
   3815     string_doc,                                 /* tp_doc */
   3816     0,                                          /* tp_traverse */
   3817     0,                                          /* tp_clear */
   3818     (richcmpfunc)string_richcompare,            /* tp_richcompare */
   3819     0,                                          /* tp_weaklistoffset */
   3820     0,                                          /* tp_iter */
   3821     0,                                          /* tp_iternext */
   3822     string_methods,                             /* tp_methods */
   3823     0,                                          /* tp_members */
   3824     0,                                          /* tp_getset */
   3825     &PyBaseString_Type,                         /* tp_base */
   3826     0,                                          /* tp_dict */
   3827     0,                                          /* tp_descr_get */
   3828     0,                                          /* tp_descr_set */
   3829     0,                                          /* tp_dictoffset */
   3830     0,                                          /* tp_init */
   3831     0,                                          /* tp_alloc */
   3832     string_new,                                 /* tp_new */
   3833     PyObject_Del,                               /* tp_free */
   3834 };
   3835 
   3836 void
   3837 PyString_Concat(register PyObject **pv, register PyObject *w)
   3838 {
   3839     register PyObject *v;
   3840     if (*pv == NULL)
   3841         return;
   3842     if (w == NULL || !PyString_Check(*pv)) {
   3843         Py_DECREF(*pv);
   3844         *pv = NULL;
   3845         return;
   3846     }
   3847     v = string_concat((PyStringObject *) *pv, w);
   3848     Py_DECREF(*pv);
   3849     *pv = v;
   3850 }
   3851 
   3852 void
   3853 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
   3854 {
   3855     PyString_Concat(pv, w);
   3856     Py_XDECREF(w);
   3857 }
   3858 
   3859 
   3860 /* The following function breaks the notion that strings are immutable:
   3861    it changes the size of a string.  We get away with this only if there
   3862    is only one module referencing the object.  You can also think of it
   3863    as creating a new string object and destroying the old one, only
   3864    more efficiently.  In any case, don't use this if the string may
   3865    already be known to some other part of the code...
   3866    Note that if there's not enough memory to resize the string, the original
   3867    string object at *pv is deallocated, *pv is set to NULL, an "out of
   3868    memory" exception is set, and -1 is returned.  Else (on success) 0 is
   3869    returned, and the value in *pv may or may not be the same as on input.
   3870    As always, an extra byte is allocated for a trailing \0 byte (newsize
   3871    does *not* include that), and a trailing \0 byte is stored.
   3872 */
   3873 
   3874 int
   3875 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
   3876 {
   3877     register PyObject *v;
   3878     register PyStringObject *sv;
   3879     v = *pv;
   3880     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
   3881         PyString_CHECK_INTERNED(v)) {
   3882         *pv = 0;
   3883         Py_DECREF(v);
   3884         PyErr_BadInternalCall();
   3885         return -1;
   3886     }
   3887     /* XXX UNREF/NEWREF interface should be more symmetrical */
   3888     _Py_DEC_REFTOTAL;
   3889     _Py_ForgetReference(v);
   3890     *pv = (PyObject *)
   3891         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
   3892     if (*pv == NULL) {
   3893         PyObject_Del(v);
   3894         PyErr_NoMemory();
   3895         return -1;
   3896     }
   3897     _Py_NewReference(*pv);
   3898     sv = (PyStringObject *) *pv;
   3899     Py_SIZE(sv) = newsize;
   3900     sv->ob_sval[newsize] = '\0';
   3901     sv->ob_shash = -1;          /* invalidate cached hash value */
   3902     return 0;
   3903 }
   3904 
   3905 /* Helpers for formatstring */
   3906 
   3907 Py_LOCAL_INLINE(PyObject *)
   3908 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
   3909 {
   3910     Py_ssize_t argidx = *p_argidx;
   3911     if (argidx < arglen) {
   3912         (*p_argidx)++;
   3913         if (arglen < 0)
   3914             return args;
   3915         else
   3916             return PyTuple_GetItem(args, argidx);
   3917     }
   3918     PyErr_SetString(PyExc_TypeError,
   3919                     "not enough arguments for format string");
   3920     return NULL;
   3921 }
   3922 
   3923 /* Format codes
   3924  * F_LJUST      '-'
   3925  * F_SIGN       '+'
   3926  * F_BLANK      ' '
   3927  * F_ALT        '#'
   3928  * F_ZERO       '0'
   3929  */
   3930 #define F_LJUST (1<<0)
   3931 #define F_SIGN  (1<<1)
   3932 #define F_BLANK (1<<2)
   3933 #define F_ALT   (1<<3)
   3934 #define F_ZERO  (1<<4)
   3935 
   3936 /* Returns a new reference to a PyString object, or NULL on failure. */
   3937 
   3938 static PyObject *
   3939 formatfloat(PyObject *v, int flags, int prec, int type)
   3940 {
   3941     char *p;
   3942     PyObject *result;
   3943     double x;
   3944 
   3945     x = PyFloat_AsDouble(v);
   3946     if (x == -1.0 && PyErr_Occurred()) {
   3947         PyErr_Format(PyExc_TypeError, "float argument required, "
   3948                      "not %.200s", Py_TYPE(v)->tp_name);
   3949         return NULL;
   3950     }
   3951 
   3952     if (prec < 0)
   3953         prec = 6;
   3954 
   3955     p = PyOS_double_to_string(x, type, prec,
   3956                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
   3957 
   3958     if (p == NULL)
   3959         return NULL;
   3960     result = PyString_FromStringAndSize(p, strlen(p));
   3961     PyMem_Free(p);
   3962     return result;
   3963 }
   3964 
   3965 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
   3966  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
   3967  * Python's regular ints.
   3968  * Return value:  a new PyString*, or NULL if error.
   3969  *  .  *pbuf is set to point into it,
   3970  *     *plen set to the # of chars following that.
   3971  *     Caller must decref it when done using pbuf.
   3972  *     The string starting at *pbuf is of the form
   3973  *         "-"? ("0x" | "0X")? digit+
   3974  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
   3975  *         set in flags.  The case of hex digits will be correct,
   3976  *     There will be at least prec digits, zero-filled on the left if
   3977  *         necessary to get that many.
   3978  * val          object to be converted
   3979  * flags        bitmask of format flags; only F_ALT is looked at
   3980  * prec         minimum number of digits; 0-fill on left if needed
   3981  * type         a character in [duoxX]; u acts the same as d
   3982  *
   3983  * CAUTION:  o, x and X conversions on regular ints can never
   3984  * produce a '-' sign, but can for Python's unbounded ints.
   3985  */
   3986 PyObject*
   3987 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
   3988                      char **pbuf, int *plen)
   3989 {
   3990     PyObject *result = NULL;
   3991     char *buf;
   3992     Py_ssize_t i;
   3993     int sign;           /* 1 if '-', else 0 */
   3994     int len;            /* number of characters */
   3995     Py_ssize_t llen;
   3996     int numdigits;      /* len == numnondigits + numdigits */
   3997     int numnondigits = 0;
   3998 
   3999     switch (type) {
   4000     case 'd':
   4001     case 'u':
   4002         result = Py_TYPE(val)->tp_str(val);
   4003         break;
   4004     case 'o':
   4005         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
   4006         break;
   4007     case 'x':
   4008     case 'X':
   4009         numnondigits = 2;
   4010         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
   4011         break;
   4012     default:
   4013         assert(!"'type' not in [duoxX]");
   4014     }
   4015     if (!result)
   4016         return NULL;
   4017 
   4018     buf = PyString_AsString(result);
   4019     if (!buf) {
   4020         Py_DECREF(result);
   4021         return NULL;
   4022     }
   4023 
   4024     /* To modify the string in-place, there can only be one reference. */
   4025     if (Py_REFCNT(result) != 1) {
   4026         PyErr_BadInternalCall();
   4027         return NULL;
   4028     }
   4029     llen = PyString_Size(result);
   4030     if (llen > INT_MAX) {
   4031         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
   4032         return NULL;
   4033     }
   4034     len = (int)llen;
   4035     if (buf[len-1] == 'L') {
   4036         --len;
   4037         buf[len] = '\0';
   4038     }
   4039     sign = buf[0] == '-';
   4040     numnondigits += sign;
   4041     numdigits = len - numnondigits;
   4042     assert(numdigits > 0);
   4043 
   4044     /* Get rid of base marker unless F_ALT */
   4045     if ((flags & F_ALT) == 0) {
   4046         /* Need to skip 0x, 0X or 0. */
   4047         int skipped = 0;
   4048         switch (type) {
   4049         case 'o':
   4050             assert(buf[sign] == '0');
   4051             /* If 0 is only digit, leave it alone. */
   4052             if (numdigits > 1) {
   4053                 skipped = 1;
   4054                 --numdigits;
   4055             }
   4056             break;
   4057         case 'x':
   4058         case 'X':
   4059             assert(buf[sign] == '0');
   4060             assert(buf[sign + 1] == 'x');
   4061             skipped = 2;
   4062             numnondigits -= 2;
   4063             break;
   4064         }
   4065         if (skipped) {
   4066             buf += skipped;
   4067             len -= skipped;
   4068             if (sign)
   4069                 buf[0] = '-';
   4070         }
   4071         assert(len == numnondigits + numdigits);
   4072         assert(numdigits > 0);
   4073     }
   4074 
   4075     /* Fill with leading zeroes to meet minimum width. */
   4076     if (prec > numdigits) {
   4077         PyObject *r1 = PyString_FromStringAndSize(NULL,
   4078                                 numnondigits + prec);
   4079         char *b1;
   4080         if (!r1) {
   4081             Py_DECREF(result);
   4082             return NULL;
   4083         }
   4084         b1 = PyString_AS_STRING(r1);
   4085         for (i = 0; i < numnondigits; ++i)
   4086             *b1++ = *buf++;
   4087         for (i = 0; i < prec - numdigits; i++)
   4088             *b1++ = '0';
   4089         for (i = 0; i < numdigits; i++)
   4090             *b1++ = *buf++;
   4091         *b1 = '\0';
   4092         Py_DECREF(result);
   4093         result = r1;
   4094         buf = PyString_AS_STRING(result);
   4095         len = numnondigits + prec;
   4096     }
   4097 
   4098     /* Fix up case for hex conversions. */
   4099     if (type == 'X') {
   4100         /* Need to convert all lower case letters to upper case.
   4101            and need to convert 0x to 0X (and -0x to -0X). */
   4102         for (i = 0; i < len; i++)
   4103             if (buf[i] >= 'a' && buf[i] <= 'x')
   4104                 buf[i] -= 'a'-'A';
   4105     }
   4106     *pbuf = buf;
   4107     *plen = len;
   4108     return result;
   4109 }
   4110 
   4111 Py_LOCAL_INLINE(int)
   4112 formatint(char *buf, size_t buflen, int flags,
   4113           int prec, int type, PyObject *v)
   4114 {
   4115     /* fmt = '%#.' + `prec` + 'l' + `type`
   4116        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
   4117        + 1 + 1 = 24 */
   4118     char fmt[64];       /* plenty big enough! */
   4119     char *sign;
   4120     long x;
   4121 
   4122     x = PyInt_AsLong(v);
   4123     if (x == -1 && PyErr_Occurred()) {
   4124         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
   4125                      Py_TYPE(v)->tp_name);
   4126         return -1;
   4127     }
   4128     if (x < 0 && type == 'u') {
   4129         type = 'd';
   4130     }
   4131     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
   4132         sign = "-";
   4133     else
   4134         sign = "";
   4135     if (prec < 0)
   4136         prec = 1;
   4137 
   4138     if ((flags & F_ALT) &&
   4139         (type == 'x' || type == 'X')) {
   4140         /* When converting under %#x or %#X, there are a number
   4141          * of issues that cause pain:
   4142          * - when 0 is being converted, the C standard leaves off
   4143          *   the '0x' or '0X', which is inconsistent with other
   4144          *   %#x/%#X conversions and inconsistent with Python's
   4145          *   hex() function
   4146          * - there are platforms that violate the standard and
   4147          *   convert 0 with the '0x' or '0X'
   4148          *   (Metrowerks, Compaq Tru64)
   4149          * - there are platforms that give '0x' when converting
   4150          *   under %#X, but convert 0 in accordance with the
   4151          *   standard (OS/2 EMX)
   4152          *
   4153          * We can achieve the desired consistency by inserting our
   4154          * own '0x' or '0X' prefix, and substituting %x/%X in place
   4155          * of %#x/%#X.
   4156          *
   4157          * Note that this is the same approach as used in
   4158          * formatint() in unicodeobject.c
   4159          */
   4160         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
   4161                       sign, type, prec, type);
   4162     }
   4163     else {
   4164         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
   4165                       sign, (flags&F_ALT) ? "#" : "",
   4166                       prec, type);
   4167     }
   4168 
   4169     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
   4170      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
   4171      */
   4172     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
   4173         PyErr_SetString(PyExc_OverflowError,
   4174             "formatted integer is too long (precision too large?)");
   4175         return -1;
   4176     }
   4177     if (sign[0])
   4178         PyOS_snprintf(buf, buflen, fmt, -x);
   4179     else
   4180         PyOS_snprintf(buf, buflen, fmt, x);
   4181     return (int)strlen(buf);
   4182 }
   4183 
   4184 Py_LOCAL_INLINE(int)
   4185 formatchar(char *buf, size_t buflen, PyObject *v)
   4186 {
   4187     /* presume that the buffer is at least 2 characters long */
   4188     if (PyString_Check(v)) {
   4189         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
   4190             return -1;
   4191     }
   4192     else {
   4193         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
   4194             return -1;
   4195     }
   4196     buf[1] = '\0';
   4197     return 1;
   4198 }
   4199 
   4200 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
   4201 
   4202    FORMATBUFLEN is the length of the buffer in which the ints &
   4203    chars are formatted. XXX This is a magic number. Each formatting
   4204    routine does bounds checking to ensure no overflow, but a better
   4205    solution may be to malloc a buffer of appropriate size for each
   4206    format. For now, the current solution is sufficient.
   4207 */
   4208 #define FORMATBUFLEN (size_t)120
   4209 
   4210 PyObject *
   4211 PyString_Format(PyObject *format, PyObject *args)
   4212 {
   4213     char *fmt, *res;
   4214     Py_ssize_t arglen, argidx;
   4215     Py_ssize_t reslen, rescnt, fmtcnt;
   4216     int args_owned = 0;
   4217     PyObject *result, *orig_args;
   4218 #ifdef Py_USING_UNICODE
   4219     PyObject *v, *w;
   4220 #endif
   4221     PyObject *dict = NULL;
   4222     if (format == NULL || !PyString_Check(format) || args == NULL) {
   4223         PyErr_BadInternalCall();
   4224         return NULL;
   4225     }
   4226     orig_args = args;
   4227     fmt = PyString_AS_STRING(format);
   4228     fmtcnt = PyString_GET_SIZE(format);
   4229     reslen = rescnt = fmtcnt + 100;
   4230     result = PyString_FromStringAndSize((char *)NULL, reslen);
   4231     if (result == NULL)
   4232         return NULL;
   4233     res = PyString_AsString(result);
   4234     if (PyTuple_Check(args)) {
   4235         arglen = PyTuple_GET_SIZE(args);
   4236         argidx = 0;
   4237     }
   4238     else {
   4239         arglen = -1;
   4240         argidx = -2;
   4241     }
   4242     if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
   4243         !PyObject_TypeCheck(args, &PyBaseString_Type))
   4244         dict = args;
   4245     while (--fmtcnt >= 0) {
   4246         if (*fmt != '%') {
   4247             if (--rescnt < 0) {
   4248                 rescnt = fmtcnt + 100;
   4249                 reslen += rescnt;
   4250                 if (_PyString_Resize(&result, reslen))
   4251                     return NULL;
   4252                 res = PyString_AS_STRING(result)
   4253                     + reslen - rescnt;
   4254                 --rescnt;
   4255             }
   4256             *res++ = *fmt++;
   4257         }
   4258         else {
   4259             /* Got a format specifier */
   4260             int flags = 0;
   4261             Py_ssize_t width = -1;
   4262             int prec = -1;
   4263             int c = '\0';
   4264             int fill;
   4265             int isnumok;
   4266             PyObject *v     = NULL;
   4267             PyObject *temp  = NULL;
   4268             char *pbuf      = NULL;
   4269             int sign;
   4270             Py_ssize_t len;
   4271             char formatbuf[FORMATBUFLEN];
   4272                  /* For format{int,char}() */
   4273 #ifdef Py_USING_UNICODE
   4274             char *fmt_start = fmt;
   4275             Py_ssize_t argidx_start = argidx;
   4276 #endif
   4277 
   4278             fmt++;
   4279             if (*fmt == '(') {
   4280                 char *keystart;
   4281                 Py_ssize_t keylen;
   4282                 PyObject *key;
   4283                 int pcount = 1;
   4284 
   4285                 if (dict == NULL) {
   4286                     PyErr_SetString(PyExc_TypeError,
   4287                              "format requires a mapping");
   4288                     goto error;
   4289                 }
   4290                 ++fmt;
   4291                 --fmtcnt;
   4292                 keystart = fmt;
   4293                 /* Skip over balanced parentheses */
   4294                 while (pcount > 0 && --fmtcnt >= 0) {
   4295                     if (*fmt == ')')
   4296                         --pcount;
   4297                     else if (*fmt == '(')
   4298                         ++pcount;
   4299                     fmt++;
   4300                 }
   4301                 keylen = fmt - keystart - 1;
   4302                 if (fmtcnt < 0 || pcount > 0) {
   4303                     PyErr_SetString(PyExc_ValueError,
   4304                                "incomplete format key");
   4305                     goto error;
   4306                 }
   4307                 key = PyString_FromStringAndSize(keystart,
   4308                                                  keylen);
   4309                 if (key == NULL)
   4310                     goto error;
   4311                 if (args_owned) {
   4312                     Py_DECREF(args);
   4313                     args_owned = 0;
   4314                 }
   4315                 args = PyObject_GetItem(dict, key);
   4316                 Py_DECREF(key);
   4317                 if (args == NULL) {
   4318                     goto error;
   4319                 }
   4320                 args_owned = 1;
   4321                 arglen = -1;
   4322                 argidx = -2;
   4323             }
   4324             while (--fmtcnt >= 0) {
   4325                 switch (c = *fmt++) {
   4326                 case '-': flags |= F_LJUST; continue;
   4327                 case '+': flags |= F_SIGN; continue;
   4328                 case ' ': flags |= F_BLANK; continue;
   4329                 case '#': flags |= F_ALT; continue;
   4330                 case '0': flags |= F_ZERO; continue;
   4331                 }
   4332                 break;
   4333             }
   4334             if (c == '*') {
   4335                 v = getnextarg(args, arglen, &argidx);
   4336                 if (v == NULL)
   4337                     goto error;
   4338                 if (!PyInt_Check(v)) {
   4339                     PyErr_SetString(PyExc_TypeError,
   4340                                     "* wants int");
   4341                     goto error;
   4342                 }
   4343                 width = PyInt_AsLong(v);
   4344                 if (width < 0) {
   4345                     flags |= F_LJUST;
   4346                     width = -width;
   4347                 }
   4348                 if (--fmtcnt >= 0)
   4349                     c = *fmt++;
   4350             }
   4351             else if (c >= 0 && isdigit(c)) {
   4352                 width = c - '0';
   4353                 while (--fmtcnt >= 0) {
   4354                     c = Py_CHARMASK(*fmt++);
   4355                     if (!isdigit(c))
   4356                         break;
   4357                     if ((width*10) / 10 != width) {
   4358                         PyErr_SetString(
   4359                             PyExc_ValueError,
   4360                             "width too big");
   4361                         goto error;
   4362                     }
   4363                     width = width*10 + (c - '0');
   4364                 }
   4365             }
   4366             if (c == '.') {
   4367                 prec = 0;
   4368                 if (--fmtcnt >= 0)
   4369                     c = *fmt++;
   4370                 if (c == '*') {
   4371                     v = getnextarg(args, arglen, &argidx);
   4372                     if (v == NULL)
   4373                         goto error;
   4374                     if (!PyInt_Check(v)) {
   4375                         PyErr_SetString(
   4376                             PyExc_TypeError,
   4377                             "* wants int");
   4378                         goto error;
   4379                     }
   4380                     prec = PyInt_AsLong(v);
   4381                     if (prec < 0)
   4382                         prec = 0;
   4383                     if (--fmtcnt >= 0)
   4384                         c = *fmt++;
   4385                 }
   4386                 else if (c >= 0 && isdigit(c)) {
   4387                     prec = c - '0';
   4388                     while (--fmtcnt >= 0) {
   4389                         c = Py_CHARMASK(*fmt++);
   4390                         if (!isdigit(c))
   4391                             break;
   4392                         if ((prec*10) / 10 != prec) {
   4393                             PyErr_SetString(
   4394                                 PyExc_ValueError,
   4395                                 "prec too big");
   4396                             goto error;
   4397                         }
   4398                         prec = prec*10 + (c - '0');
   4399                     }
   4400                 }
   4401             } /* prec */
   4402             if (fmtcnt >= 0) {
   4403                 if (c == 'h' || c == 'l' || c == 'L') {
   4404                     if (--fmtcnt >= 0)
   4405                         c = *fmt++;
   4406                 }
   4407             }
   4408             if (fmtcnt < 0) {
   4409                 PyErr_SetString(PyExc_ValueError,
   4410                                 "incomplete format");
   4411                 goto error;
   4412             }
   4413             if (c != '%') {
   4414                 v = getnextarg(args, arglen, &argidx);
   4415                 if (v == NULL)
   4416                     goto error;
   4417             }
   4418             sign = 0;
   4419             fill = ' ';
   4420             switch (c) {
   4421             case '%':
   4422                 pbuf = "%";
   4423                 len = 1;
   4424                 break;
   4425             case 's':
   4426 #ifdef Py_USING_UNICODE
   4427                 if (PyUnicode_Check(v)) {
   4428                     fmt = fmt_start;
   4429                     argidx = argidx_start;
   4430                     goto unicode;
   4431                 }
   4432 #endif
   4433                 temp = _PyObject_Str(v);
   4434 #ifdef Py_USING_UNICODE
   4435                 if (temp != NULL && PyUnicode_Check(temp)) {
   4436                     Py_DECREF(temp);
   4437                     fmt = fmt_start;
   4438                     argidx = argidx_start;
   4439                     goto unicode;
   4440                 }
   4441 #endif
   4442                 /* Fall through */
   4443             case 'r':
   4444                 if (c == 'r')
   4445                     temp = PyObject_Repr(v);
   4446                 if (temp == NULL)
   4447                     goto error;
   4448                 if (!PyString_Check(temp)) {
   4449                     PyErr_SetString(PyExc_TypeError,
   4450                       "%s argument has non-string str()");
   4451                     Py_DECREF(temp);
   4452                     goto error;
   4453                 }
   4454                 pbuf = PyString_AS_STRING(temp);
   4455                 len = PyString_GET_SIZE(temp);
   4456                 if (prec >= 0 && len > prec)
   4457                     len = prec;
   4458                 break;
   4459             case 'i':
   4460             case 'd':
   4461             case 'u':
   4462             case 'o':
   4463             case 'x':
   4464             case 'X':
   4465                 if (c == 'i')
   4466                     c = 'd';
   4467                 isnumok = 0;
   4468                 if (PyNumber_Check(v)) {
   4469                     PyObject *iobj=NULL;
   4470 
   4471                     if (PyInt_Check(v) || (PyLong_Check(v))) {
   4472                         iobj = v;
   4473                         Py_INCREF(iobj);
   4474                     }
   4475                     else {
   4476                         iobj = PyNumber_Int(v);
   4477                         if (iobj==NULL) iobj = PyNumber_Long(v);
   4478                     }
   4479                     if (iobj!=NULL) {
   4480                         if (PyInt_Check(iobj)) {
   4481                             isnumok = 1;
   4482                             pbuf = formatbuf;
   4483                             len = formatint(pbuf,
   4484                                             sizeof(formatbuf),
   4485                                             flags, prec, c, iobj);
   4486                             Py_DECREF(iobj);
   4487                             if (len < 0)
   4488                                 goto error;
   4489                             sign = 1;
   4490                         }
   4491                         else if (PyLong_Check(iobj)) {
   4492                             int ilen;
   4493 
   4494                             isnumok = 1;
   4495                             temp = _PyString_FormatLong(iobj, flags,
   4496                                 prec, c, &pbuf, &ilen);
   4497                             Py_DECREF(iobj);
   4498                             len = ilen;
   4499                             if (!temp)
   4500                                 goto error;
   4501                             sign = 1;
   4502                         }
   4503                         else {
   4504                             Py_DECREF(iobj);
   4505                         }
   4506                     }
   4507                 }
   4508                 if (!isnumok) {
   4509                     PyErr_Format(PyExc_TypeError,
   4510                         "%%%c format: a number is required, "
   4511                         "not %.200s", c, Py_TYPE(v)->tp_name);
   4512                     goto error;
   4513                 }
   4514                 if (flags & F_ZERO)
   4515                     fill = '0';
   4516                 break;
   4517             case 'e':
   4518             case 'E':
   4519             case 'f':
   4520             case 'F':
   4521             case 'g':
   4522             case 'G':
   4523                 temp = formatfloat(v, flags, prec, c);
   4524                 if (temp == NULL)
   4525                     goto error;
   4526                 pbuf = PyString_AS_STRING(temp);
   4527                 len = PyString_GET_SIZE(temp);
   4528                 sign = 1;
   4529                 if (flags & F_ZERO)
   4530                     fill = '0';
   4531                 break;
   4532             case 'c':
   4533 #ifdef Py_USING_UNICODE
   4534                 if (PyUnicode_Check(v)) {
   4535                     fmt = fmt_start;
   4536                     argidx = argidx_start;
   4537                     goto unicode;
   4538                 }
   4539 #endif
   4540                 pbuf = formatbuf;
   4541                 len = formatchar(pbuf, sizeof(formatbuf), v);
   4542                 if (len < 0)
   4543                     goto error;
   4544                 break;
   4545             default:
   4546                 PyErr_Format(PyExc_ValueError,
   4547                   "unsupported format character '%c' (0x%x) "
   4548                   "at index %zd",
   4549                   c, c,
   4550                   (Py_ssize_t)(fmt - 1 -
   4551                                PyString_AsString(format)));
   4552                 goto error;
   4553             }
   4554             if (sign) {
   4555                 if (*pbuf == '-' || *pbuf == '+') {
   4556                     sign = *pbuf++;
   4557                     len--;
   4558                 }
   4559                 else if (flags & F_SIGN)
   4560                     sign = '+';
   4561                 else if (flags & F_BLANK)
   4562                     sign = ' ';
   4563                 else
   4564                     sign = 0;
   4565             }
   4566             if (width < len)
   4567                 width = len;
   4568             if (rescnt - (sign != 0) < width) {
   4569                 reslen -= rescnt;
   4570                 rescnt = width + fmtcnt + 100;
   4571                 reslen += rescnt;
   4572                 if (reslen < 0) {
   4573                     Py_DECREF(result);
   4574                     Py_XDECREF(temp);
   4575                     return PyErr_NoMemory();
   4576                 }
   4577                 if (_PyString_Resize(&result, reslen)) {
   4578                     Py_XDECREF(temp);
   4579                     return NULL;
   4580                 }
   4581                 res = PyString_AS_STRING(result)
   4582                     + reslen - rescnt;
   4583             }
   4584             if (sign) {
   4585                 if (fill != ' ')
   4586                     *res++ = sign;
   4587                 rescnt--;
   4588                 if (width > len)
   4589                     width--;
   4590             }
   4591             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
   4592                 assert(pbuf[0] == '0');
   4593                 assert(pbuf[1] == c);
   4594                 if (fill != ' ') {
   4595                     *res++ = *pbuf++;
   4596                     *res++ = *pbuf++;
   4597                 }
   4598                 rescnt -= 2;
   4599                 width -= 2;
   4600                 if (width < 0)
   4601                     width = 0;
   4602                 len -= 2;
   4603             }
   4604             if (width > len && !(flags & F_LJUST)) {
   4605                 do {
   4606                     --rescnt;
   4607                     *res++ = fill;
   4608                 } while (--width > len);
   4609             }
   4610             if (fill == ' ') {
   4611                 if (sign)
   4612                     *res++ = sign;
   4613                 if ((flags & F_ALT) &&
   4614                     (c == 'x' || c == 'X')) {
   4615                     assert(pbuf[0] == '0');
   4616                     assert(pbuf[1] == c);
   4617                     *res++ = *pbuf++;
   4618                     *res++ = *pbuf++;
   4619                 }
   4620             }
   4621             Py_MEMCPY(res, pbuf, len);
   4622             res += len;
   4623             rescnt -= len;
   4624             while (--width >= len) {
   4625                 --rescnt;
   4626                 *res++ = ' ';
   4627             }
   4628             if (dict && (argidx < arglen) && c != '%') {
   4629                 PyErr_SetString(PyExc_TypeError,
   4630                            "not all arguments converted during string formatting");
   4631                 Py_XDECREF(temp);
   4632                 goto error;
   4633             }
   4634             Py_XDECREF(temp);
   4635         } /* '%' */
   4636     } /* until end */
   4637     if (argidx < arglen && !dict) {
   4638         PyErr_SetString(PyExc_TypeError,
   4639                         "not all arguments converted during string formatting");
   4640         goto error;
   4641     }
   4642     if (args_owned) {
   4643         Py_DECREF(args);
   4644     }
   4645     if (_PyString_Resize(&result, reslen - rescnt))
   4646         return NULL;
   4647     return result;
   4648 
   4649 #ifdef Py_USING_UNICODE
   4650  unicode:
   4651     if (args_owned) {
   4652         Py_DECREF(args);
   4653         args_owned = 0;
   4654     }
   4655     /* Fiddle args right (remove the first argidx arguments) */
   4656     if (PyTuple_Check(orig_args) && argidx > 0) {
   4657         PyObject *v;
   4658         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
   4659         v = PyTuple_New(n);
   4660         if (v == NULL)
   4661             goto error;
   4662         while (--n >= 0) {
   4663             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
   4664             Py_INCREF(w);
   4665             PyTuple_SET_ITEM(v, n, w);
   4666         }
   4667         args = v;
   4668     } else {
   4669         Py_INCREF(orig_args);
   4670         args = orig_args;
   4671     }
   4672     args_owned = 1;
   4673     /* Take what we have of the result and let the Unicode formatting
   4674        function format the rest of the input. */
   4675     rescnt = res - PyString_AS_STRING(result);
   4676     if (_PyString_Resize(&result, rescnt))
   4677         goto error;
   4678     fmtcnt = PyString_GET_SIZE(format) - \
   4679              (fmt - PyString_AS_STRING(format));
   4680     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
   4681     if (format == NULL)
   4682         goto error;
   4683     v = PyUnicode_Format(format, args);
   4684     Py_DECREF(format);
   4685     if (v == NULL)
   4686         goto error;
   4687     /* Paste what we have (result) to what the Unicode formatting
   4688        function returned (v) and return the result (or error) */
   4689     w = PyUnicode_Concat(result, v);
   4690     Py_DECREF(result);
   4691     Py_DECREF(v);
   4692     Py_DECREF(args);
   4693     return w;
   4694 #endif /* Py_USING_UNICODE */
   4695 
   4696  error:
   4697     Py_DECREF(result);
   4698     if (args_owned) {
   4699         Py_DECREF(args);
   4700     }
   4701     return NULL;
   4702 }
   4703 
   4704 void
   4705 PyString_InternInPlace(PyObject **p)
   4706 {
   4707     register PyStringObject *s = (PyStringObject *)(*p);
   4708     PyObject *t;
   4709     if (s == NULL || !PyString_Check(s))
   4710         Py_FatalError("PyString_InternInPlace: strings only please!");
   4711     /* If it's a string subclass, we don't really know what putting
   4712        it in the interned dict might do. */
   4713     if (!PyString_CheckExact(s))
   4714         return;
   4715     if (PyString_CHECK_INTERNED(s))
   4716         return;
   4717     if (interned == NULL) {
   4718         interned = PyDict_New();
   4719         if (interned == NULL) {
   4720             PyErr_Clear(); /* Don't leave an exception */
   4721             return;
   4722         }
   4723     }
   4724     t = PyDict_GetItem(interned, (PyObject *)s);
   4725     if (t) {
   4726         Py_INCREF(t);
   4727         Py_DECREF(*p);
   4728         *p = t;
   4729         return;
   4730     }
   4731 
   4732     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
   4733         PyErr_Clear();
   4734         return;
   4735     }
   4736     /* The two references in interned are not counted by refcnt.
   4737        The string deallocator will take care of this */
   4738     Py_REFCNT(s) -= 2;
   4739     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
   4740 }
   4741 
   4742 void
   4743 PyString_InternImmortal(PyObject **p)
   4744 {
   4745     PyString_InternInPlace(p);
   4746     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
   4747         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
   4748         Py_INCREF(*p);
   4749     }
   4750 }
   4751 
   4752 
   4753 PyObject *
   4754 PyString_InternFromString(const char *cp)
   4755 {
   4756     PyObject *s = PyString_FromString(cp);
   4757     if (s == NULL)
   4758         return NULL;
   4759     PyString_InternInPlace(&s);
   4760     return s;
   4761 }
   4762 
   4763 void
   4764 PyString_Fini(void)
   4765 {
   4766     int i;
   4767     for (i = 0; i < UCHAR_MAX + 1; i++) {
   4768         Py_XDECREF(characters[i]);
   4769         characters[i] = NULL;
   4770     }
   4771     Py_XDECREF(nullstring);
   4772     nullstring = NULL;
   4773 }
   4774 
   4775 void _Py_ReleaseInternedStrings(void)
   4776 {
   4777     PyObject *keys;
   4778     PyStringObject *s;
   4779     Py_ssize_t i, n;
   4780     Py_ssize_t immortal_size = 0, mortal_size = 0;
   4781 
   4782     if (interned == NULL || !PyDict_Check(interned))
   4783         return;
   4784     keys = PyDict_Keys(interned);
   4785     if (keys == NULL || !PyList_Check(keys)) {
   4786         PyErr_Clear();
   4787         return;
   4788     }
   4789 
   4790     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
   4791        detector, interned strings are not forcibly deallocated; rather, we
   4792        give them their stolen references back, and then clear and DECREF
   4793        the interned dict. */
   4794 
   4795     n = PyList_GET_SIZE(keys);
   4796     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
   4797         n);
   4798     for (i = 0; i < n; i++) {
   4799         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
   4800         switch (s->ob_sstate) {
   4801         case SSTATE_NOT_INTERNED:
   4802             /* XXX Shouldn't happen */
   4803             break;
   4804         case SSTATE_INTERNED_IMMORTAL:
   4805             Py_REFCNT(s) += 1;
   4806             immortal_size += Py_SIZE(s);
   4807             break;
   4808         case SSTATE_INTERNED_MORTAL:
   4809             Py_REFCNT(s) += 2;
   4810             mortal_size += Py_SIZE(s);
   4811             break;
   4812         default:
   4813             Py_FatalError("Inconsistent interned string state.");
   4814         }
   4815         s->ob_sstate = SSTATE_NOT_INTERNED;
   4816     }
   4817     fprintf(stderr, "total size of all interned strings: "
   4818                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
   4819                     "mortal/immortal\n", mortal_size, immortal_size);
   4820     Py_DECREF(keys);
   4821     PyDict_Clear(interned);
   4822     Py_DECREF(interned);
   4823     interned = NULL;
   4824 }
   4825