Home | History | Annotate | Download | only in Objects
      1 /* String (str/bytes) object implementation */
      2 
      3 #define PY_SSIZE_T_CLEAN
      4 
      5 #include "Python.h"
      6 #include <ctype.h>
      7 #include <stddef.h>
      8 
      9 #ifdef COUNT_ALLOCS
     10 Py_ssize_t null_strings, one_strings;
     11 #endif
     12 
     13 static PyStringObject *characters[UCHAR_MAX + 1];
     14 static PyStringObject *nullstring;
     15 
     16 /* This dictionary holds all interned strings.  Note that references to
     17    strings in this dictionary are *not* counted in the string's ob_refcnt.
     18    When the interned string reaches a refcnt of 0 the string deallocation
     19    function will delete the reference from this dictionary.
     20 
     21    Another way to look at this is that to say that the actual reference
     22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
     23 */
     24 static PyObject *interned;
     25 
     26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
     27    for a string of length n should request PyStringObject_SIZE + n bytes.
     28 
     29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
     30    3 bytes per string allocation on a typical system.
     31 */
     32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
     33 
     34 /*
     35    For PyString_FromString(), the parameter `str' points to a null-terminated
     36    string containing exactly `size' bytes.
     37 
     38    For PyString_FromStringAndSize(), the parameter the parameter `str' is
     39    either NULL or else points to a string containing at least `size' bytes.
     40    For PyString_FromStringAndSize(), the string in the `str' parameter does
     41    not have to be null-terminated.  (Therefore it is safe to construct a
     42    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
     43    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
     44    bytes (setting the last byte to the null terminating character) and you can
     45    fill in the data yourself.  If `str' is non-NULL then the resulting
     46    PyString object must be treated as immutable and you must not fill in nor
     47    alter the data yourself, since the strings may be shared.
     48 
     49    The PyObject member `op->ob_size', which denotes the number of "extra
     50    items" in a variable-size object, will contain the number of bytes
     51    allocated for string data, not counting the null terminating character.
     52    It is therefore equal to the `size' parameter (for
     53    PyString_FromStringAndSize()) or the length of the string in the `str'
     54    parameter (for PyString_FromString()).
     55 */
     56 PyObject *
     57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
     58 {
     59     register PyStringObject *op;
     60     if (size < 0) {
     61         PyErr_SetString(PyExc_SystemError,
     62             "Negative size passed to PyString_FromStringAndSize");
     63         return NULL;
     64     }
     65     if (size == 0 && (op = nullstring) != NULL) {
     66 #ifdef COUNT_ALLOCS
     67         null_strings++;
     68 #endif
     69         Py_INCREF(op);
     70         return (PyObject *)op;
     71     }
     72     if (size == 1 && str != NULL &&
     73         (op = characters[*str & UCHAR_MAX]) != NULL)
     74     {
     75 #ifdef COUNT_ALLOCS
     76         one_strings++;
     77 #endif
     78         Py_INCREF(op);
     79         return (PyObject *)op;
     80     }
     81 
     82     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
     83         PyErr_SetString(PyExc_OverflowError, "string is too large");
     84         return NULL;
     85     }
     86 
     87     /* Inline PyObject_NewVar */
     88     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
     89     if (op == NULL)
     90         return PyErr_NoMemory();
     91     PyObject_INIT_VAR(op, &PyString_Type, size);
     92     op->ob_shash = -1;
     93     op->ob_sstate = SSTATE_NOT_INTERNED;
     94     if (str != NULL)
     95         Py_MEMCPY(op->ob_sval, str, size);
     96     op->ob_sval[size] = '\0';
     97     /* share short strings */
     98     if (size == 0) {
     99         PyObject *t = (PyObject *)op;
    100         PyString_InternInPlace(&t);
    101         op = (PyStringObject *)t;
    102         nullstring = op;
    103         Py_INCREF(op);
    104     } else if (size == 1 && str != NULL) {
    105         PyObject *t = (PyObject *)op;
    106         PyString_InternInPlace(&t);
    107         op = (PyStringObject *)t;
    108         characters[*str & UCHAR_MAX] = op;
    109         Py_INCREF(op);
    110     }
    111     return (PyObject *) op;
    112 }
    113 
    114 PyObject *
    115 PyString_FromString(const char *str)
    116 {
    117     register size_t size;
    118     register PyStringObject *op;
    119 
    120     assert(str != NULL);
    121     size = strlen(str);
    122     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
    123         PyErr_SetString(PyExc_OverflowError,
    124             "string is too long for a Python string");
    125         return NULL;
    126     }
    127     if (size == 0 && (op = nullstring) != NULL) {
    128 #ifdef COUNT_ALLOCS
    129         null_strings++;
    130 #endif
    131         Py_INCREF(op);
    132         return (PyObject *)op;
    133     }
    134     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
    135 #ifdef COUNT_ALLOCS
    136         one_strings++;
    137 #endif
    138         Py_INCREF(op);
    139         return (PyObject *)op;
    140     }
    141 
    142     /* Inline PyObject_NewVar */
    143     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
    144     if (op == NULL)
    145         return PyErr_NoMemory();
    146     PyObject_INIT_VAR(op, &PyString_Type, size);
    147     op->ob_shash = -1;
    148     op->ob_sstate = SSTATE_NOT_INTERNED;
    149     Py_MEMCPY(op->ob_sval, str, size+1);
    150     /* share short strings */
    151     if (size == 0) {
    152         PyObject *t = (PyObject *)op;
    153         PyString_InternInPlace(&t);
    154         op = (PyStringObject *)t;
    155         nullstring = op;
    156         Py_INCREF(op);
    157     } else if (size == 1) {
    158         PyObject *t = (PyObject *)op;
    159         PyString_InternInPlace(&t);
    160         op = (PyStringObject *)t;
    161         characters[*str & UCHAR_MAX] = op;
    162         Py_INCREF(op);
    163     }
    164     return (PyObject *) op;
    165 }
    166 
    167 PyObject *
    168 PyString_FromFormatV(const char *format, va_list vargs)
    169 {
    170     va_list count;
    171     Py_ssize_t n = 0;
    172     const char* f;
    173     char *s;
    174     PyObject* string;
    175 
    176 #ifdef VA_LIST_IS_ARRAY
    177     Py_MEMCPY(count, vargs, sizeof(va_list));
    178 #else
    179 #ifdef  __va_copy
    180     __va_copy(count, vargs);
    181 #else
    182     count = vargs;
    183 #endif
    184 #endif
    185     /* step 1: figure out how large a buffer we need */
    186     for (f = format; *f; f++) {
    187         if (*f == '%') {
    188 #ifdef HAVE_LONG_LONG
    189             int longlongflag = 0;
    190 #endif
    191             const char* p = f;
    192             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
    193                 ;
    194 
    195             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
    196              * they don't affect the amount of space we reserve.
    197              */
    198             if (*f == 'l') {
    199                 if (f[1] == 'd' || f[1] == 'u') {
    200                     ++f;
    201                 }
    202 #ifdef HAVE_LONG_LONG
    203                 else if (f[1] == 'l' &&
    204                          (f[2] == 'd' || f[2] == 'u')) {
    205                     longlongflag = 1;
    206                     f += 2;
    207                 }
    208 #endif
    209             }
    210             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
    211                 ++f;
    212             }
    213 
    214             switch (*f) {
    215             case 'c':
    216                 (void)va_arg(count, int);
    217                 /* fall through... */
    218             case '%':
    219                 n++;
    220                 break;
    221             case 'd': case 'u': case 'i': case 'x':
    222                 (void) va_arg(count, int);
    223 #ifdef HAVE_LONG_LONG
    224                 /* Need at most
    225                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
    226                    plus 1 for the sign.  53/22 is an upper
    227                    bound for log10(256). */
    228                 if (longlongflag)
    229                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
    230                 else
    231 #endif
    232                     /* 20 bytes is enough to hold a 64-bit
    233                        integer.  Decimal takes the most
    234                        space.  This isn't enough for
    235                        octal. */
    236                     n += 20;
    237 
    238                 break;
    239             case 's':
    240                 s = va_arg(count, char*);
    241                 n += strlen(s);
    242                 break;
    243             case 'p':
    244                 (void) va_arg(count, int);
    245                 /* maximum 64-bit pointer representation:
    246                  * 0xffffffffffffffff
    247                  * so 19 characters is enough.
    248                  * XXX I count 18 -- what's the extra for?
    249                  */
    250                 n += 19;
    251                 break;
    252             default:
    253                 /* if we stumble upon an unknown
    254                    formatting code, copy the rest of
    255                    the format string to the output
    256                    string. (we cannot just skip the
    257                    code, since there's no way to know
    258                    what's in the argument list) */
    259                 n += strlen(p);
    260                 goto expand;
    261             }
    262         } else
    263             n++;
    264     }
    265  expand:
    266     /* step 2: fill the buffer */
    267     /* Since we've analyzed how much space we need for the worst case,
    268        use sprintf directly instead of the slower PyOS_snprintf. */
    269     string = PyString_FromStringAndSize(NULL, n);
    270     if (!string)
    271         return NULL;
    272 
    273     s = PyString_AsString(string);
    274 
    275     for (f = format; *f; f++) {
    276         if (*f == '%') {
    277             const char* p = f++;
    278             Py_ssize_t i;
    279             int longflag = 0;
    280 #ifdef HAVE_LONG_LONG
    281             int longlongflag = 0;
    282 #endif
    283             int size_tflag = 0;
    284             /* parse the width.precision part (we're only
    285                interested in the precision value, if any) */
    286             n = 0;
    287             while (isdigit(Py_CHARMASK(*f)))
    288                 n = (n*10) + *f++ - '0';
    289             if (*f == '.') {
    290                 f++;
    291                 n = 0;
    292                 while (isdigit(Py_CHARMASK(*f)))
    293                     n = (n*10) + *f++ - '0';
    294             }
    295             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
    296                 f++;
    297             /* Handle %ld, %lu, %lld and %llu. */
    298             if (*f == 'l') {
    299                 if (f[1] == 'd' || f[1] == 'u') {
    300                     longflag = 1;
    301                     ++f;
    302                 }
    303 #ifdef HAVE_LONG_LONG
    304                 else if (f[1] == 'l' &&
    305                          (f[2] == 'd' || f[2] == 'u')) {
    306                     longlongflag = 1;
    307                     f += 2;
    308                 }
    309 #endif
    310             }
    311             /* handle the size_t flag. */
    312             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
    313                 size_tflag = 1;
    314                 ++f;
    315             }
    316 
    317             switch (*f) {
    318             case 'c':
    319                 *s++ = va_arg(vargs, int);
    320                 break;
    321             case 'd':
    322                 if (longflag)
    323                     sprintf(s, "%ld", va_arg(vargs, long));
    324 #ifdef HAVE_LONG_LONG
    325                 else if (longlongflag)
    326                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
    327                         va_arg(vargs, PY_LONG_LONG));
    328 #endif
    329                 else if (size_tflag)
    330                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
    331                         va_arg(vargs, Py_ssize_t));
    332                 else
    333                     sprintf(s, "%d", va_arg(vargs, int));
    334                 s += strlen(s);
    335                 break;
    336             case 'u':
    337                 if (longflag)
    338                     sprintf(s, "%lu",
    339                         va_arg(vargs, unsigned long));
    340 #ifdef HAVE_LONG_LONG
    341                 else if (longlongflag)
    342                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
    343                         va_arg(vargs, PY_LONG_LONG));
    344 #endif
    345                 else if (size_tflag)
    346                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
    347                         va_arg(vargs, size_t));
    348                 else
    349                     sprintf(s, "%u",
    350                         va_arg(vargs, unsigned int));
    351                 s += strlen(s);
    352                 break;
    353             case 'i':
    354                 sprintf(s, "%i", va_arg(vargs, int));
    355                 s += strlen(s);
    356                 break;
    357             case 'x':
    358                 sprintf(s, "%x", va_arg(vargs, int));
    359                 s += strlen(s);
    360                 break;
    361             case 's':
    362                 p = va_arg(vargs, char*);
    363                 i = strlen(p);
    364                 if (n > 0 && i > n)
    365                     i = n;
    366                 Py_MEMCPY(s, p, i);
    367                 s += i;
    368                 break;
    369             case 'p':
    370                 sprintf(s, "%p", va_arg(vargs, void*));
    371                 /* %p is ill-defined:  ensure leading 0x. */
    372                 if (s[1] == 'X')
    373                     s[1] = 'x';
    374                 else if (s[1] != 'x') {
    375                     memmove(s+2, s, strlen(s)+1);
    376                     s[0] = '0';
    377                     s[1] = 'x';
    378                 }
    379                 s += strlen(s);
    380                 break;
    381             case '%':
    382                 *s++ = '%';
    383                 break;
    384             default:
    385                 strcpy(s, p);
    386                 s += strlen(s);
    387                 goto end;
    388             }
    389         } else
    390             *s++ = *f;
    391     }
    392 
    393  end:
    394     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
    395         return NULL;
    396     return string;
    397 }
    398 
    399 PyObject *
    400 PyString_FromFormat(const char *format, ...)
    401 {
    402     PyObject* ret;
    403     va_list vargs;
    404 
    405 #ifdef HAVE_STDARG_PROTOTYPES
    406     va_start(vargs, format);
    407 #else
    408     va_start(vargs);
    409 #endif
    410     ret = PyString_FromFormatV(format, vargs);
    411     va_end(vargs);
    412     return ret;
    413 }
    414 
    415 
    416 PyObject *PyString_Decode(const char *s,
    417                           Py_ssize_t size,
    418                           const char *encoding,
    419                           const char *errors)
    420 {
    421     PyObject *v, *str;
    422 
    423     str = PyString_FromStringAndSize(s, size);
    424     if (str == NULL)
    425         return NULL;
    426     v = PyString_AsDecodedString(str, encoding, errors);
    427     Py_DECREF(str);
    428     return v;
    429 }
    430 
    431 PyObject *PyString_AsDecodedObject(PyObject *str,
    432                                    const char *encoding,
    433                                    const char *errors)
    434 {
    435     PyObject *v;
    436 
    437     if (!PyString_Check(str)) {
    438         PyErr_BadArgument();
    439         goto onError;
    440     }
    441 
    442     if (encoding == NULL) {
    443 #ifdef Py_USING_UNICODE
    444         encoding = PyUnicode_GetDefaultEncoding();
    445 #else
    446         PyErr_SetString(PyExc_ValueError, "no encoding specified");
    447         goto onError;
    448 #endif
    449     }
    450 
    451     /* Decode via the codec registry */
    452     v = PyCodec_Decode(str, encoding, errors);
    453     if (v == NULL)
    454         goto onError;
    455 
    456     return v;
    457 
    458  onError:
    459     return NULL;
    460 }
    461 
    462 PyObject *PyString_AsDecodedString(PyObject *str,
    463                                    const char *encoding,
    464                                    const char *errors)
    465 {
    466     PyObject *v;
    467 
    468     v = PyString_AsDecodedObject(str, encoding, errors);
    469     if (v == NULL)
    470         goto onError;
    471 
    472 #ifdef Py_USING_UNICODE
    473     /* Convert Unicode to a string using the default encoding */
    474     if (PyUnicode_Check(v)) {
    475         PyObject *temp = v;
    476         v = PyUnicode_AsEncodedString(v, NULL, NULL);
    477         Py_DECREF(temp);
    478         if (v == NULL)
    479             goto onError;
    480     }
    481 #endif
    482     if (!PyString_Check(v)) {
    483         PyErr_Format(PyExc_TypeError,
    484                      "decoder did not return a string object (type=%.400s)",
    485                      Py_TYPE(v)->tp_name);
    486         Py_DECREF(v);
    487         goto onError;
    488     }
    489 
    490     return v;
    491 
    492  onError:
    493     return NULL;
    494 }
    495 
    496 PyObject *PyString_Encode(const char *s,
    497                           Py_ssize_t size,
    498                           const char *encoding,
    499                           const char *errors)
    500 {
    501     PyObject *v, *str;
    502 
    503     str = PyString_FromStringAndSize(s, size);
    504     if (str == NULL)
    505         return NULL;
    506     v = PyString_AsEncodedString(str, encoding, errors);
    507     Py_DECREF(str);
    508     return v;
    509 }
    510 
    511 PyObject *PyString_AsEncodedObject(PyObject *str,
    512                                    const char *encoding,
    513                                    const char *errors)
    514 {
    515     PyObject *v;
    516 
    517     if (!PyString_Check(str)) {
    518         PyErr_BadArgument();
    519         goto onError;
    520     }
    521 
    522     if (encoding == NULL) {
    523 #ifdef Py_USING_UNICODE
    524         encoding = PyUnicode_GetDefaultEncoding();
    525 #else
    526         PyErr_SetString(PyExc_ValueError, "no encoding specified");
    527         goto onError;
    528 #endif
    529     }
    530 
    531     /* Encode via the codec registry */
    532     v = PyCodec_Encode(str, encoding, errors);
    533     if (v == NULL)
    534         goto onError;
    535 
    536     return v;
    537 
    538  onError:
    539     return NULL;
    540 }
    541 
    542 PyObject *PyString_AsEncodedString(PyObject *str,
    543                                    const char *encoding,
    544                                    const char *errors)
    545 {
    546     PyObject *v;
    547 
    548     v = PyString_AsEncodedObject(str, encoding, errors);
    549     if (v == NULL)
    550         goto onError;
    551 
    552 #ifdef Py_USING_UNICODE
    553     /* Convert Unicode to a string using the default encoding */
    554     if (PyUnicode_Check(v)) {
    555         PyObject *temp = v;
    556         v = PyUnicode_AsEncodedString(v, NULL, NULL);
    557         Py_DECREF(temp);
    558         if (v == NULL)
    559             goto onError;
    560     }
    561 #endif
    562     if (!PyString_Check(v)) {
    563         PyErr_Format(PyExc_TypeError,
    564                      "encoder did not return a string object (type=%.400s)",
    565                      Py_TYPE(v)->tp_name);
    566         Py_DECREF(v);
    567         goto onError;
    568     }
    569 
    570     return v;
    571 
    572  onError:
    573     return NULL;
    574 }
    575 
    576 static void
    577 string_dealloc(PyObject *op)
    578 {
    579     switch (PyString_CHECK_INTERNED(op)) {
    580         case SSTATE_NOT_INTERNED:
    581             break;
    582 
    583         case SSTATE_INTERNED_MORTAL:
    584             /* revive dead object temporarily for DelItem */
    585             Py_REFCNT(op) = 3;
    586             if (PyDict_DelItem(interned, op) != 0)
    587                 Py_FatalError(
    588                     "deletion of interned string failed");
    589             break;
    590 
    591         case SSTATE_INTERNED_IMMORTAL:
    592             Py_FatalError("Immortal interned string died.");
    593 
    594         default:
    595             Py_FatalError("Inconsistent interned string state.");
    596     }
    597     Py_TYPE(op)->tp_free(op);
    598 }
    599 
    600 /* Unescape a backslash-escaped string. If unicode is non-zero,
    601    the string is a u-literal. If recode_encoding is non-zero,
    602    the string is UTF-8 encoded and should be re-encoded in the
    603    specified encoding.  */
    604 
    605 PyObject *PyString_DecodeEscape(const char *s,
    606                                 Py_ssize_t len,
    607                                 const char *errors,
    608                                 Py_ssize_t unicode,
    609                                 const char *recode_encoding)
    610 {
    611     int c;
    612     char *p, *buf;
    613     const char *end;
    614     PyObject *v;
    615     Py_ssize_t newlen = recode_encoding ? 4*len:len;
    616     v = PyString_FromStringAndSize((char *)NULL, newlen);
    617     if (v == NULL)
    618         return NULL;
    619     p = buf = PyString_AsString(v);
    620     end = s + len;
    621     while (s < end) {
    622         if (*s != '\\') {
    623           non_esc:
    624 #ifdef Py_USING_UNICODE
    625             if (recode_encoding && (*s & 0x80)) {
    626                 PyObject *u, *w;
    627                 char *r;
    628                 const char* t;
    629                 Py_ssize_t rn;
    630                 t = s;
    631                 /* Decode non-ASCII bytes as UTF-8. */
    632                 while (t < end && (*t & 0x80)) t++;
    633                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
    634                 if(!u) goto failed;
    635 
    636                 /* Recode them in target encoding. */
    637                 w = PyUnicode_AsEncodedString(
    638                     u, recode_encoding, errors);
    639                 Py_DECREF(u);
    640                 if (!w)                 goto failed;
    641 
    642                 /* Append bytes to output buffer. */
    643                 assert(PyString_Check(w));
    644                 r = PyString_AS_STRING(w);
    645                 rn = PyString_GET_SIZE(w);
    646                 Py_MEMCPY(p, r, rn);
    647                 p += rn;
    648                 Py_DECREF(w);
    649                 s = t;
    650             } else {
    651                 *p++ = *s++;
    652             }
    653 #else
    654             *p++ = *s++;
    655 #endif
    656             continue;
    657         }
    658         s++;
    659         if (s==end) {
    660             PyErr_SetString(PyExc_ValueError,
    661                             "Trailing \\ in string");
    662             goto failed;
    663         }
    664         switch (*s++) {
    665         /* XXX This assumes ASCII! */
    666         case '\n': break;
    667         case '\\': *p++ = '\\'; break;
    668         case '\'': *p++ = '\''; break;
    669         case '\"': *p++ = '\"'; break;
    670         case 'b': *p++ = '\b'; break;
    671         case 'f': *p++ = '\014'; break; /* FF */
    672         case 't': *p++ = '\t'; break;
    673         case 'n': *p++ = '\n'; break;
    674         case 'r': *p++ = '\r'; break;
    675         case 'v': *p++ = '\013'; break; /* VT */
    676         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
    677         case '0': case '1': case '2': case '3':
    678         case '4': case '5': case '6': case '7':
    679             c = s[-1] - '0';
    680             if (s < end && '0' <= *s && *s <= '7') {
    681                 c = (c<<3) + *s++ - '0';
    682                 if (s < end && '0' <= *s && *s <= '7')
    683                     c = (c<<3) + *s++ - '0';
    684             }
    685             *p++ = c;
    686             break;
    687         case 'x':
    688             if (s+1 < end &&
    689                 isxdigit(Py_CHARMASK(s[0])) &&
    690                 isxdigit(Py_CHARMASK(s[1])))
    691             {
    692                 unsigned int x = 0;
    693                 c = Py_CHARMASK(*s);
    694                 s++;
    695                 if (isdigit(c))
    696                     x = c - '0';
    697                 else if (islower(c))
    698                     x = 10 + c - 'a';
    699                 else
    700                     x = 10 + c - 'A';
    701                 x = x << 4;
    702                 c = Py_CHARMASK(*s);
    703                 s++;
    704                 if (isdigit(c))
    705                     x += c - '0';
    706                 else if (islower(c))
    707                     x += 10 + c - 'a';
    708                 else
    709                     x += 10 + c - 'A';
    710                 *p++ = x;
    711                 break;
    712             }
    713             if (!errors || strcmp(errors, "strict") == 0) {
    714                 PyErr_SetString(PyExc_ValueError,
    715                                 "invalid \\x escape");
    716                 goto failed;
    717             }
    718             if (strcmp(errors, "replace") == 0) {
    719                 *p++ = '?';
    720             } else if (strcmp(errors, "ignore") == 0)
    721                 /* do nothing */;
    722             else {
    723                 PyErr_Format(PyExc_ValueError,
    724                              "decoding error; "
    725                              "unknown error handling code: %.400s",
    726                              errors);
    727                 goto failed;
    728             }
    729             /* skip \x */
    730             if (s < end && isxdigit(Py_CHARMASK(s[0])))
    731                 s++; /* and a hexdigit */
    732             break;
    733 #ifndef Py_USING_UNICODE
    734         case 'u':
    735         case 'U':
    736         case 'N':
    737             if (unicode) {
    738                 PyErr_SetString(PyExc_ValueError,
    739                           "Unicode escapes not legal "
    740                           "when Unicode disabled");
    741                 goto failed;
    742             }
    743 #endif
    744         default:
    745             *p++ = '\\';
    746             s--;
    747             goto non_esc; /* an arbitrary number of unescaped
    748                              UTF-8 bytes may follow. */
    749         }
    750     }
    751     if (p-buf < newlen)
    752         _PyString_Resize(&v, p - buf); /* v is cleared on error */
    753     return v;
    754   failed:
    755     Py_DECREF(v);
    756     return NULL;
    757 }
    758 
    759 /* -------------------------------------------------------------------- */
    760 /* object api */
    761 
    762 static Py_ssize_t
    763 string_getsize(register PyObject *op)
    764 {
    765     char *s;
    766     Py_ssize_t len;
    767     if (PyString_AsStringAndSize(op, &s, &len))
    768         return -1;
    769     return len;
    770 }
    771 
    772 static /*const*/ char *
    773 string_getbuffer(register PyObject *op)
    774 {
    775     char *s;
    776     Py_ssize_t len;
    777     if (PyString_AsStringAndSize(op, &s, &len))
    778         return NULL;
    779     return s;
    780 }
    781 
    782 Py_ssize_t
    783 PyString_Size(register PyObject *op)
    784 {
    785     if (!PyString_Check(op))
    786         return string_getsize(op);
    787     return Py_SIZE(op);
    788 }
    789 
    790 /*const*/ char *
    791 PyString_AsString(register PyObject *op)
    792 {
    793     if (!PyString_Check(op))
    794         return string_getbuffer(op);
    795     return ((PyStringObject *)op) -> ob_sval;
    796 }
    797 
    798 int
    799 PyString_AsStringAndSize(register PyObject *obj,
    800                          register char **s,
    801                          register Py_ssize_t *len)
    802 {
    803     if (s == NULL) {
    804         PyErr_BadInternalCall();
    805         return -1;
    806     }
    807 
    808     if (!PyString_Check(obj)) {
    809 #ifdef Py_USING_UNICODE
    810         if (PyUnicode_Check(obj)) {
    811             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
    812             if (obj == NULL)
    813                 return -1;
    814         }
    815         else
    816 #endif
    817         {
    818             PyErr_Format(PyExc_TypeError,
    819                          "expected string or Unicode object, "
    820                          "%.200s found", Py_TYPE(obj)->tp_name);
    821             return -1;
    822         }
    823     }
    824 
    825     *s = PyString_AS_STRING(obj);
    826     if (len != NULL)
    827         *len = PyString_GET_SIZE(obj);
    828     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
    829         PyErr_SetString(PyExc_TypeError,
    830                         "expected string without null bytes");
    831         return -1;
    832     }
    833     return 0;
    834 }
    835 
    836 /* -------------------------------------------------------------------- */
    837 /* Methods */
    838 
    839 #include "stringlib/stringdefs.h"
    840 #include "stringlib/fastsearch.h"
    841 
    842 #include "stringlib/count.h"
    843 #include "stringlib/find.h"
    844 #include "stringlib/partition.h"
    845 #include "stringlib/split.h"
    846 
    847 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
    848 #include "stringlib/localeutil.h"
    849 
    850 
    851 
    852 static int
    853 string_print(PyStringObject *op, FILE *fp, int flags)
    854 {
    855     Py_ssize_t i, str_len;
    856     char c;
    857     int quote;
    858 
    859     /* XXX Ought to check for interrupts when writing long strings */
    860     if (! PyString_CheckExact(op)) {
    861         int ret;
    862         /* A str subclass may have its own __str__ method. */
    863         op = (PyStringObject *) PyObject_Str((PyObject *)op);
    864         if (op == NULL)
    865             return -1;
    866         ret = string_print(op, fp, flags);
    867         Py_DECREF(op);
    868         return ret;
    869     }
    870     if (flags & Py_PRINT_RAW) {
    871         char *data = op->ob_sval;
    872         Py_ssize_t size = Py_SIZE(op);
    873         Py_BEGIN_ALLOW_THREADS
    874         while (size > INT_MAX) {
    875             /* Very long strings cannot be written atomically.
    876              * But don't write exactly INT_MAX bytes at a time
    877              * to avoid memory aligment issues.
    878              */
    879             const int chunk_size = INT_MAX & ~0x3FFF;
    880             fwrite(data, 1, chunk_size, fp);
    881             data += chunk_size;
    882             size -= chunk_size;
    883         }
    884 #ifdef __VMS
    885         if (size) fwrite(data, (size_t)size, 1, fp);
    886 #else
    887         fwrite(data, 1, (size_t)size, fp);
    888 #endif
    889         Py_END_ALLOW_THREADS
    890         return 0;
    891     }
    892 
    893     /* figure out which quote to use; single is preferred */
    894     quote = '\'';
    895     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
    896         !memchr(op->ob_sval, '"', Py_SIZE(op)))
    897         quote = '"';
    898 
    899     str_len = Py_SIZE(op);
    900     Py_BEGIN_ALLOW_THREADS
    901     fputc(quote, fp);
    902     for (i = 0; i < str_len; i++) {
    903         /* Since strings are immutable and the caller should have a
    904         reference, accessing the interal buffer should not be an issue
    905         with the GIL released. */
    906         c = op->ob_sval[i];
    907         if (c == quote || c == '\\')
    908             fprintf(fp, "\\%c", c);
    909         else if (c == '\t')
    910             fprintf(fp, "\\t");
    911         else if (c == '\n')
    912             fprintf(fp, "\\n");
    913         else if (c == '\r')
    914             fprintf(fp, "\\r");
    915         else if (c < ' ' || c >= 0x7f)
    916             fprintf(fp, "\\x%02x", c & 0xff);
    917         else
    918             fputc(c, fp);
    919     }
    920     fputc(quote, fp);
    921     Py_END_ALLOW_THREADS
    922     return 0;
    923 }
    924 
    925 PyObject *
    926 PyString_Repr(PyObject *obj, int smartquotes)
    927 {
    928     register PyStringObject* op = (PyStringObject*) obj;
    929     size_t newsize;
    930     PyObject *v;
    931     if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
    932         PyErr_SetString(PyExc_OverflowError,
    933             "string is too large to make repr");
    934         return NULL;
    935     }
    936     newsize = 2 + 4*Py_SIZE(op);
    937     v = PyString_FromStringAndSize((char *)NULL, newsize);
    938     if (v == NULL) {
    939         return NULL;
    940     }
    941     else {
    942         register Py_ssize_t i;
    943         register char c;
    944         register char *p;
    945         int quote;
    946 
    947         /* figure out which quote to use; single is preferred */
    948         quote = '\'';
    949         if (smartquotes &&
    950             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
    951             !memchr(op->ob_sval, '"', Py_SIZE(op)))
    952             quote = '"';
    953 
    954         p = PyString_AS_STRING(v);
    955         *p++ = quote;
    956         for (i = 0; i < Py_SIZE(op); i++) {
    957             /* There's at least enough room for a hex escape
    958                and a closing quote. */
    959             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
    960             c = op->ob_sval[i];
    961             if (c == quote || c == '\\')
    962                 *p++ = '\\', *p++ = c;
    963             else if (c == '\t')
    964                 *p++ = '\\', *p++ = 't';
    965             else if (c == '\n')
    966                 *p++ = '\\', *p++ = 'n';
    967             else if (c == '\r')
    968                 *p++ = '\\', *p++ = 'r';
    969             else if (c < ' ' || c >= 0x7f) {
    970                 /* For performance, we don't want to call
    971                    PyOS_snprintf here (extra layers of
    972                    function call). */
    973                 sprintf(p, "\\x%02x", c & 0xff);
    974                 p += 4;
    975             }
    976             else
    977                 *p++ = c;
    978         }
    979         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
    980         *p++ = quote;
    981         *p = '\0';
    982         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
    983             return NULL;
    984         return v;
    985     }
    986 }
    987 
    988 static PyObject *
    989 string_repr(PyObject *op)
    990 {
    991     return PyString_Repr(op, 1);
    992 }
    993 
    994 static PyObject *
    995 string_str(PyObject *s)
    996 {
    997     assert(PyString_Check(s));
    998     if (PyString_CheckExact(s)) {
    999         Py_INCREF(s);
   1000         return s;
   1001     }
   1002     else {
   1003         /* Subtype -- return genuine string with the same value. */
   1004         PyStringObject *t = (PyStringObject *) s;
   1005         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
   1006     }
   1007 }
   1008 
   1009 static Py_ssize_t
   1010 string_length(PyStringObject *a)
   1011 {
   1012     return Py_SIZE(a);
   1013 }
   1014 
   1015 static PyObject *
   1016 string_concat(register PyStringObject *a, register PyObject *bb)
   1017 {
   1018     register Py_ssize_t size;
   1019     register PyStringObject *op;
   1020     if (!PyString_Check(bb)) {
   1021 #ifdef Py_USING_UNICODE
   1022         if (PyUnicode_Check(bb))
   1023             return PyUnicode_Concat((PyObject *)a, bb);
   1024 #endif
   1025         if (PyByteArray_Check(bb))
   1026             return PyByteArray_Concat((PyObject *)a, bb);
   1027         PyErr_Format(PyExc_TypeError,
   1028                      "cannot concatenate 'str' and '%.200s' objects",
   1029                      Py_TYPE(bb)->tp_name);
   1030         return NULL;
   1031     }
   1032 #define b ((PyStringObject *)bb)
   1033     /* Optimize cases with empty left or right operand */
   1034     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
   1035         PyString_CheckExact(a) && PyString_CheckExact(b)) {
   1036         if (Py_SIZE(a) == 0) {
   1037             Py_INCREF(bb);
   1038             return bb;
   1039         }
   1040         Py_INCREF(a);
   1041         return (PyObject *)a;
   1042     }
   1043     size = Py_SIZE(a) + Py_SIZE(b);
   1044     /* Check that string sizes are not negative, to prevent an
   1045        overflow in cases where we are passed incorrectly-created
   1046        strings with negative lengths (due to a bug in other code).
   1047     */
   1048     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
   1049         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
   1050         PyErr_SetString(PyExc_OverflowError,
   1051                         "strings are too large to concat");
   1052         return NULL;
   1053     }
   1054 
   1055     /* Inline PyObject_NewVar */
   1056     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
   1057         PyErr_SetString(PyExc_OverflowError,
   1058                         "strings are too large to concat");
   1059         return NULL;
   1060     }
   1061     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
   1062     if (op == NULL)
   1063         return PyErr_NoMemory();
   1064     PyObject_INIT_VAR(op, &PyString_Type, size);
   1065     op->ob_shash = -1;
   1066     op->ob_sstate = SSTATE_NOT_INTERNED;
   1067     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
   1068     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
   1069     op->ob_sval[size] = '\0';
   1070     return (PyObject *) op;
   1071 #undef b
   1072 }
   1073 
   1074 static PyObject *
   1075 string_repeat(register PyStringObject *a, register Py_ssize_t n)
   1076 {
   1077     register Py_ssize_t i;
   1078     register Py_ssize_t j;
   1079     register Py_ssize_t size;
   1080     register PyStringObject *op;
   1081     size_t nbytes;
   1082     if (n < 0)
   1083         n = 0;
   1084     /* watch out for overflows:  the size can overflow int,
   1085      * and the # of bytes needed can overflow size_t
   1086      */
   1087     size = Py_SIZE(a) * n;
   1088     if (n && size / n != Py_SIZE(a)) {
   1089         PyErr_SetString(PyExc_OverflowError,
   1090             "repeated string is too long");
   1091         return NULL;
   1092     }
   1093     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
   1094         Py_INCREF(a);
   1095         return (PyObject *)a;
   1096     }
   1097     nbytes = (size_t)size;
   1098     if (nbytes + PyStringObject_SIZE <= nbytes) {
   1099         PyErr_SetString(PyExc_OverflowError,
   1100             "repeated string is too long");
   1101         return NULL;
   1102     }
   1103     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
   1104     if (op == NULL)
   1105         return PyErr_NoMemory();
   1106     PyObject_INIT_VAR(op, &PyString_Type, size);
   1107     op->ob_shash = -1;
   1108     op->ob_sstate = SSTATE_NOT_INTERNED;
   1109     op->ob_sval[size] = '\0';
   1110     if (Py_SIZE(a) == 1 && n > 0) {
   1111         memset(op->ob_sval, a->ob_sval[0] , n);
   1112         return (PyObject *) op;
   1113     }
   1114     i = 0;
   1115     if (i < size) {
   1116         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
   1117         i = Py_SIZE(a);
   1118     }
   1119     while (i < size) {
   1120         j = (i <= size-i)  ?  i  :  size-i;
   1121         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
   1122         i += j;
   1123     }
   1124     return (PyObject *) op;
   1125 }
   1126 
   1127 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
   1128 
   1129 static PyObject *
   1130 string_slice(register PyStringObject *a, register Py_ssize_t i,
   1131              register Py_ssize_t j)
   1132      /* j -- may be negative! */
   1133 {
   1134     if (i < 0)
   1135         i = 0;
   1136     if (j < 0)
   1137         j = 0; /* Avoid signed/unsigned bug in next line */
   1138     if (j > Py_SIZE(a))
   1139         j = Py_SIZE(a);
   1140     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
   1141         /* It's the same as a */
   1142         Py_INCREF(a);
   1143         return (PyObject *)a;
   1144     }
   1145     if (j < i)
   1146         j = i;
   1147     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
   1148 }
   1149 
   1150 static int
   1151 string_contains(PyObject *str_obj, PyObject *sub_obj)
   1152 {
   1153     if (!PyString_CheckExact(sub_obj)) {
   1154 #ifdef Py_USING_UNICODE
   1155         if (PyUnicode_Check(sub_obj))
   1156             return PyUnicode_Contains(str_obj, sub_obj);
   1157 #endif
   1158         if (!PyString_Check(sub_obj)) {
   1159             PyErr_Format(PyExc_TypeError,
   1160                 "'in <string>' requires string as left operand, "
   1161                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
   1162             return -1;
   1163         }
   1164     }
   1165 
   1166     return stringlib_contains_obj(str_obj, sub_obj);
   1167 }
   1168 
   1169 static PyObject *
   1170 string_item(PyStringObject *a, register Py_ssize_t i)
   1171 {
   1172     char pchar;
   1173     PyObject *v;
   1174     if (i < 0 || i >= Py_SIZE(a)) {
   1175         PyErr_SetString(PyExc_IndexError, "string index out of range");
   1176         return NULL;
   1177     }
   1178     pchar = a->ob_sval[i];
   1179     v = (PyObject *)characters[pchar & UCHAR_MAX];
   1180     if (v == NULL)
   1181         v = PyString_FromStringAndSize(&pchar, 1);
   1182     else {
   1183 #ifdef COUNT_ALLOCS
   1184         one_strings++;
   1185 #endif
   1186         Py_INCREF(v);
   1187     }
   1188     return v;
   1189 }
   1190 
   1191 static PyObject*
   1192 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
   1193 {
   1194     int c;
   1195     Py_ssize_t len_a, len_b;
   1196     Py_ssize_t min_len;
   1197     PyObject *result;
   1198 
   1199     /* Make sure both arguments are strings. */
   1200     if (!(PyString_Check(a) && PyString_Check(b))) {
   1201         result = Py_NotImplemented;
   1202         goto out;
   1203     }
   1204     if (a == b) {
   1205         switch (op) {
   1206         case Py_EQ:case Py_LE:case Py_GE:
   1207             result = Py_True;
   1208             goto out;
   1209         case Py_NE:case Py_LT:case Py_GT:
   1210             result = Py_False;
   1211             goto out;
   1212         }
   1213     }
   1214     if (op == Py_EQ) {
   1215         /* Supporting Py_NE here as well does not save
   1216            much time, since Py_NE is rarely used.  */
   1217         if (Py_SIZE(a) == Py_SIZE(b)
   1218             && (a->ob_sval[0] == b->ob_sval[0]
   1219             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
   1220             result = Py_True;
   1221         } else {
   1222             result = Py_False;
   1223         }
   1224         goto out;
   1225     }
   1226     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
   1227     min_len = (len_a < len_b) ? len_a : len_b;
   1228     if (min_len > 0) {
   1229         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
   1230         if (c==0)
   1231             c = memcmp(a->ob_sval, b->ob_sval, min_len);
   1232     } else
   1233         c = 0;
   1234     if (c == 0)
   1235         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
   1236     switch (op) {
   1237     case Py_LT: c = c <  0; break;
   1238     case Py_LE: c = c <= 0; break;
   1239     case Py_EQ: assert(0);  break; /* unreachable */
   1240     case Py_NE: c = c != 0; break;
   1241     case Py_GT: c = c >  0; break;
   1242     case Py_GE: c = c >= 0; break;
   1243     default:
   1244         result = Py_NotImplemented;
   1245         goto out;
   1246     }
   1247     result = c ? Py_True : Py_False;
   1248   out:
   1249     Py_INCREF(result);
   1250     return result;
   1251 }
   1252 
   1253 int
   1254 _PyString_Eq(PyObject *o1, PyObject *o2)
   1255 {
   1256     PyStringObject *a = (PyStringObject*) o1;
   1257     PyStringObject *b = (PyStringObject*) o2;
   1258     return Py_SIZE(a) == Py_SIZE(b)
   1259       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
   1260 }
   1261 
   1262 static long
   1263 string_hash(PyStringObject *a)
   1264 {
   1265     register Py_ssize_t len;
   1266     register unsigned char *p;
   1267     register long x;
   1268 
   1269 #ifdef Py_DEBUG
   1270     assert(_Py_HashSecret_Initialized);
   1271 #endif
   1272     if (a->ob_shash != -1)
   1273         return a->ob_shash;
   1274     len = Py_SIZE(a);
   1275     /*
   1276       We make the hash of the empty string be 0, rather than using
   1277       (prefix ^ suffix), since this slightly obfuscates the hash secret
   1278     */
   1279     if (len == 0) {
   1280         a->ob_shash = 0;
   1281         return 0;
   1282     }
   1283     p = (unsigned char *) a->ob_sval;
   1284     x = _Py_HashSecret.prefix;
   1285     x ^= *p << 7;
   1286     while (--len >= 0)
   1287         x = (1000003*x) ^ *p++;
   1288     x ^= Py_SIZE(a);
   1289     x ^= _Py_HashSecret.suffix;
   1290     if (x == -1)
   1291         x = -2;
   1292     a->ob_shash = x;
   1293     return x;
   1294 }
   1295 
   1296 static PyObject*
   1297 string_subscript(PyStringObject* self, PyObject* item)
   1298 {
   1299     if (PyIndex_Check(item)) {
   1300         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
   1301         if (i == -1 && PyErr_Occurred())
   1302             return NULL;
   1303         if (i < 0)
   1304             i += PyString_GET_SIZE(self);
   1305         return string_item(self, i);
   1306     }
   1307     else if (PySlice_Check(item)) {
   1308         Py_ssize_t start, stop, step, slicelength, cur, i;
   1309         char* source_buf;
   1310         char* result_buf;
   1311         PyObject* result;
   1312 
   1313         if (PySlice_GetIndicesEx((PySliceObject*)item,
   1314                          PyString_GET_SIZE(self),
   1315                          &start, &stop, &step, &slicelength) < 0) {
   1316             return NULL;
   1317         }
   1318 
   1319         if (slicelength <= 0) {
   1320             return PyString_FromStringAndSize("", 0);
   1321         }
   1322         else if (start == 0 && step == 1 &&
   1323                  slicelength == PyString_GET_SIZE(self) &&
   1324                  PyString_CheckExact(self)) {
   1325             Py_INCREF(self);
   1326             return (PyObject *)self;
   1327         }
   1328         else if (step == 1) {
   1329             return PyString_FromStringAndSize(
   1330                 PyString_AS_STRING(self) + start,
   1331                 slicelength);
   1332         }
   1333         else {
   1334             source_buf = PyString_AsString((PyObject*)self);
   1335             result_buf = (char *)PyMem_Malloc(slicelength);
   1336             if (result_buf == NULL)
   1337                 return PyErr_NoMemory();
   1338 
   1339             for (cur = start, i = 0; i < slicelength;
   1340                  cur += step, i++) {
   1341                 result_buf[i] = source_buf[cur];
   1342             }
   1343 
   1344             result = PyString_FromStringAndSize(result_buf,
   1345                                                 slicelength);
   1346             PyMem_Free(result_buf);
   1347             return result;
   1348         }
   1349     }
   1350     else {
   1351         PyErr_Format(PyExc_TypeError,
   1352                      "string indices must be integers, not %.200s",
   1353                      Py_TYPE(item)->tp_name);
   1354         return NULL;
   1355     }
   1356 }
   1357 
   1358 static Py_ssize_t
   1359 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
   1360 {
   1361     if ( index != 0 ) {
   1362         PyErr_SetString(PyExc_SystemError,
   1363                         "accessing non-existent string segment");
   1364         return -1;
   1365     }
   1366     *ptr = (void *)self->ob_sval;
   1367     return Py_SIZE(self);
   1368 }
   1369 
   1370 static Py_ssize_t
   1371 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
   1372 {
   1373     PyErr_SetString(PyExc_TypeError,
   1374                     "Cannot use string as modifiable buffer");
   1375     return -1;
   1376 }
   1377 
   1378 static Py_ssize_t
   1379 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
   1380 {
   1381     if ( lenp )
   1382         *lenp = Py_SIZE(self);
   1383     return 1;
   1384 }
   1385 
   1386 static Py_ssize_t
   1387 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
   1388 {
   1389     if ( index != 0 ) {
   1390         PyErr_SetString(PyExc_SystemError,
   1391                         "accessing non-existent string segment");
   1392         return -1;
   1393     }
   1394     *ptr = self->ob_sval;
   1395     return Py_SIZE(self);
   1396 }
   1397 
   1398 static int
   1399 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
   1400 {
   1401     return PyBuffer_FillInfo(view, (PyObject*)self,
   1402                              (void *)self->ob_sval, Py_SIZE(self),
   1403                              1, flags);
   1404 }
   1405 
   1406 static PySequenceMethods string_as_sequence = {
   1407     (lenfunc)string_length, /*sq_length*/
   1408     (binaryfunc)string_concat, /*sq_concat*/
   1409     (ssizeargfunc)string_repeat, /*sq_repeat*/
   1410     (ssizeargfunc)string_item, /*sq_item*/
   1411     (ssizessizeargfunc)string_slice, /*sq_slice*/
   1412     0,                  /*sq_ass_item*/
   1413     0,                  /*sq_ass_slice*/
   1414     (objobjproc)string_contains /*sq_contains*/
   1415 };
   1416 
   1417 static PyMappingMethods string_as_mapping = {
   1418     (lenfunc)string_length,
   1419     (binaryfunc)string_subscript,
   1420     0,
   1421 };
   1422 
   1423 static PyBufferProcs string_as_buffer = {
   1424     (readbufferproc)string_buffer_getreadbuf,
   1425     (writebufferproc)string_buffer_getwritebuf,
   1426     (segcountproc)string_buffer_getsegcount,
   1427     (charbufferproc)string_buffer_getcharbuf,
   1428     (getbufferproc)string_buffer_getbuffer,
   1429     0, /* XXX */
   1430 };
   1431 
   1432 
   1433 
   1434 #define LEFTSTRIP 0
   1435 #define RIGHTSTRIP 1
   1436 #define BOTHSTRIP 2
   1437 
   1438 /* Arrays indexed by above */
   1439 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
   1440 
   1441 #define STRIPNAME(i) (stripformat[i]+3)
   1442 
   1443 PyDoc_STRVAR(split__doc__,
   1444 "S.split([sep [,maxsplit]]) -> list of strings\n\
   1445 \n\
   1446 Return a list of the words in the string S, using sep as the\n\
   1447 delimiter string.  If maxsplit is given, at most maxsplit\n\
   1448 splits are done. If sep is not specified or is None, any\n\
   1449 whitespace string is a separator and empty strings are removed\n\
   1450 from the result.");
   1451 
   1452 static PyObject *
   1453 string_split(PyStringObject *self, PyObject *args)
   1454 {
   1455     Py_ssize_t len = PyString_GET_SIZE(self), n;
   1456     Py_ssize_t maxsplit = -1;
   1457     const char *s = PyString_AS_STRING(self), *sub;
   1458     PyObject *subobj = Py_None;
   1459 
   1460     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
   1461         return NULL;
   1462     if (maxsplit < 0)
   1463         maxsplit = PY_SSIZE_T_MAX;
   1464     if (subobj == Py_None)
   1465         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
   1466     if (PyString_Check(subobj)) {
   1467         sub = PyString_AS_STRING(subobj);
   1468         n = PyString_GET_SIZE(subobj);
   1469     }
   1470 #ifdef Py_USING_UNICODE
   1471     else if (PyUnicode_Check(subobj))
   1472         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
   1473 #endif
   1474     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
   1475         return NULL;
   1476 
   1477     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
   1478 }
   1479 
   1480 PyDoc_STRVAR(partition__doc__,
   1481 "S.partition(sep) -> (head, sep, tail)\n\
   1482 \n\
   1483 Search for the separator sep in S, and return the part before it,\n\
   1484 the separator itself, and the part after it.  If the separator is not\n\
   1485 found, return S and two empty strings.");
   1486 
   1487 static PyObject *
   1488 string_partition(PyStringObject *self, PyObject *sep_obj)
   1489 {
   1490     const char *sep;
   1491     Py_ssize_t sep_len;
   1492 
   1493     if (PyString_Check(sep_obj)) {
   1494         sep = PyString_AS_STRING(sep_obj);
   1495         sep_len = PyString_GET_SIZE(sep_obj);
   1496     }
   1497 #ifdef Py_USING_UNICODE
   1498     else if (PyUnicode_Check(sep_obj))
   1499         return PyUnicode_Partition((PyObject *) self, sep_obj);
   1500 #endif
   1501     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
   1502         return NULL;
   1503 
   1504     return stringlib_partition(
   1505         (PyObject*) self,
   1506         PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1507         sep_obj, sep, sep_len
   1508         );
   1509 }
   1510 
   1511 PyDoc_STRVAR(rpartition__doc__,
   1512 "S.rpartition(sep) -> (head, sep, tail)\n\
   1513 \n\
   1514 Search for the separator sep in S, starting at the end of S, and return\n\
   1515 the part before it, the separator itself, and the part after it.  If the\n\
   1516 separator is not found, return two empty strings and S.");
   1517 
   1518 static PyObject *
   1519 string_rpartition(PyStringObject *self, PyObject *sep_obj)
   1520 {
   1521     const char *sep;
   1522     Py_ssize_t sep_len;
   1523 
   1524     if (PyString_Check(sep_obj)) {
   1525         sep = PyString_AS_STRING(sep_obj);
   1526         sep_len = PyString_GET_SIZE(sep_obj);
   1527     }
   1528 #ifdef Py_USING_UNICODE
   1529     else if (PyUnicode_Check(sep_obj))
   1530         return PyUnicode_RPartition((PyObject *) self, sep_obj);
   1531 #endif
   1532     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
   1533         return NULL;
   1534 
   1535     return stringlib_rpartition(
   1536         (PyObject*) self,
   1537         PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1538         sep_obj, sep, sep_len
   1539         );
   1540 }
   1541 
   1542 PyDoc_STRVAR(rsplit__doc__,
   1543 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
   1544 \n\
   1545 Return a list of the words in the string S, using sep as the\n\
   1546 delimiter string, starting at the end of the string and working\n\
   1547 to the front.  If maxsplit is given, at most maxsplit splits are\n\
   1548 done. If sep is not specified or is None, any whitespace string\n\
   1549 is a separator.");
   1550 
   1551 static PyObject *
   1552 string_rsplit(PyStringObject *self, PyObject *args)
   1553 {
   1554     Py_ssize_t len = PyString_GET_SIZE(self), n;
   1555     Py_ssize_t maxsplit = -1;
   1556     const char *s = PyString_AS_STRING(self), *sub;
   1557     PyObject *subobj = Py_None;
   1558 
   1559     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
   1560         return NULL;
   1561     if (maxsplit < 0)
   1562         maxsplit = PY_SSIZE_T_MAX;
   1563     if (subobj == Py_None)
   1564         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
   1565     if (PyString_Check(subobj)) {
   1566         sub = PyString_AS_STRING(subobj);
   1567         n = PyString_GET_SIZE(subobj);
   1568     }
   1569 #ifdef Py_USING_UNICODE
   1570     else if (PyUnicode_Check(subobj))
   1571         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
   1572 #endif
   1573     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
   1574         return NULL;
   1575 
   1576     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
   1577 }
   1578 
   1579 
   1580 PyDoc_STRVAR(join__doc__,
   1581 "S.join(iterable) -> string\n\
   1582 \n\
   1583 Return a string which is the concatenation of the strings in the\n\
   1584 iterable.  The separator between elements is S.");
   1585 
   1586 static PyObject *
   1587 string_join(PyStringObject *self, PyObject *orig)
   1588 {
   1589     char *sep = PyString_AS_STRING(self);
   1590     const Py_ssize_t seplen = PyString_GET_SIZE(self);
   1591     PyObject *res = NULL;
   1592     char *p;
   1593     Py_ssize_t seqlen = 0;
   1594     size_t sz = 0;
   1595     Py_ssize_t i;
   1596     PyObject *seq, *item;
   1597 
   1598     seq = PySequence_Fast(orig, "can only join an iterable");
   1599     if (seq == NULL) {
   1600         return NULL;
   1601     }
   1602 
   1603     seqlen = PySequence_Size(seq);
   1604     if (seqlen == 0) {
   1605         Py_DECREF(seq);
   1606         return PyString_FromString("");
   1607     }
   1608     if (seqlen == 1) {
   1609         item = PySequence_Fast_GET_ITEM(seq, 0);
   1610         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
   1611             Py_INCREF(item);
   1612             Py_DECREF(seq);
   1613             return item;
   1614         }
   1615     }
   1616 
   1617     /* There are at least two things to join, or else we have a subclass
   1618      * of the builtin types in the sequence.
   1619      * Do a pre-pass to figure out the total amount of space we'll
   1620      * need (sz), see whether any argument is absurd, and defer to
   1621      * the Unicode join if appropriate.
   1622      */
   1623     for (i = 0; i < seqlen; i++) {
   1624         const size_t old_sz = sz;
   1625         item = PySequence_Fast_GET_ITEM(seq, i);
   1626         if (!PyString_Check(item)){
   1627 #ifdef Py_USING_UNICODE
   1628             if (PyUnicode_Check(item)) {
   1629                 /* Defer to Unicode join.
   1630                  * CAUTION:  There's no gurantee that the
   1631                  * original sequence can be iterated over
   1632                  * again, so we must pass seq here.
   1633                  */
   1634                 PyObject *result;
   1635                 result = PyUnicode_Join((PyObject *)self, seq);
   1636                 Py_DECREF(seq);
   1637                 return result;
   1638             }
   1639 #endif
   1640             PyErr_Format(PyExc_TypeError,
   1641                          "sequence item %zd: expected string,"
   1642                          " %.80s found",
   1643                          i, Py_TYPE(item)->tp_name);
   1644             Py_DECREF(seq);
   1645             return NULL;
   1646         }
   1647         sz += PyString_GET_SIZE(item);
   1648         if (i != 0)
   1649             sz += seplen;
   1650         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
   1651             PyErr_SetString(PyExc_OverflowError,
   1652                 "join() result is too long for a Python string");
   1653             Py_DECREF(seq);
   1654             return NULL;
   1655         }
   1656     }
   1657 
   1658     /* Allocate result space. */
   1659     res = PyString_FromStringAndSize((char*)NULL, sz);
   1660     if (res == NULL) {
   1661         Py_DECREF(seq);
   1662         return NULL;
   1663     }
   1664 
   1665     /* Catenate everything. */
   1666     p = PyString_AS_STRING(res);
   1667     for (i = 0; i < seqlen; ++i) {
   1668         size_t n;
   1669         item = PySequence_Fast_GET_ITEM(seq, i);
   1670         n = PyString_GET_SIZE(item);
   1671         Py_MEMCPY(p, PyString_AS_STRING(item), n);
   1672         p += n;
   1673         if (i < seqlen - 1) {
   1674             Py_MEMCPY(p, sep, seplen);
   1675             p += seplen;
   1676         }
   1677     }
   1678 
   1679     Py_DECREF(seq);
   1680     return res;
   1681 }
   1682 
   1683 PyObject *
   1684 _PyString_Join(PyObject *sep, PyObject *x)
   1685 {
   1686     assert(sep != NULL && PyString_Check(sep));
   1687     assert(x != NULL);
   1688     return string_join((PyStringObject *)sep, x);
   1689 }
   1690 
   1691 /* helper macro to fixup start/end slice values */
   1692 #define ADJUST_INDICES(start, end, len)         \
   1693     if (end > len)                          \
   1694         end = len;                          \
   1695     else if (end < 0) {                     \
   1696         end += len;                         \
   1697         if (end < 0)                        \
   1698         end = 0;                        \
   1699     }                                       \
   1700     if (start < 0) {                        \
   1701         start += len;                       \
   1702         if (start < 0)                      \
   1703         start = 0;                      \
   1704     }
   1705 
   1706 Py_LOCAL_INLINE(Py_ssize_t)
   1707 string_find_internal(PyStringObject *self, PyObject *args, int dir)
   1708 {
   1709     PyObject *subobj;
   1710     const char *sub;
   1711     Py_ssize_t sub_len;
   1712     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
   1713 
   1714     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
   1715                                     args, &subobj, &start, &end))
   1716         return -2;
   1717 
   1718     if (PyString_Check(subobj)) {
   1719         sub = PyString_AS_STRING(subobj);
   1720         sub_len = PyString_GET_SIZE(subobj);
   1721     }
   1722 #ifdef Py_USING_UNICODE
   1723     else if (PyUnicode_Check(subobj))
   1724         return PyUnicode_Find(
   1725             (PyObject *)self, subobj, start, end, dir);
   1726 #endif
   1727     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
   1728         /* XXX - the "expected a character buffer object" is pretty
   1729            confusing for a non-expert.  remap to something else ? */
   1730         return -2;
   1731 
   1732     if (dir > 0)
   1733         return stringlib_find_slice(
   1734             PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1735             sub, sub_len, start, end);
   1736     else
   1737         return stringlib_rfind_slice(
   1738             PyString_AS_STRING(self), PyString_GET_SIZE(self),
   1739             sub, sub_len, start, end);
   1740 }
   1741 
   1742 
   1743 PyDoc_STRVAR(find__doc__,
   1744 "S.find(sub [,start [,end]]) -> int\n\
   1745 \n\
   1746 Return the lowest index in S where substring sub is found,\n\
   1747 such that sub is contained within S[start:end].  Optional\n\
   1748 arguments start and end are interpreted as in slice notation.\n\
   1749 \n\
   1750 Return -1 on failure.");
   1751 
   1752 static PyObject *
   1753 string_find(PyStringObject *self, PyObject *args)
   1754 {
   1755     Py_ssize_t result = string_find_internal(self, args, +1);
   1756     if (result == -2)
   1757         return NULL;
   1758     return PyInt_FromSsize_t(result);
   1759 }
   1760 
   1761 
   1762 PyDoc_STRVAR(index__doc__,
   1763 "S.index(sub [,start [,end]]) -> int\n\
   1764 \n\
   1765 Like S.find() but raise ValueError when the substring is not found.");
   1766 
   1767 static PyObject *
   1768 string_index(PyStringObject *self, PyObject *args)
   1769 {
   1770     Py_ssize_t result = string_find_internal(self, args, +1);
   1771     if (result == -2)
   1772         return NULL;
   1773     if (result == -1) {
   1774         PyErr_SetString(PyExc_ValueError,
   1775                         "substring not found");
   1776         return NULL;
   1777     }
   1778     return PyInt_FromSsize_t(result);
   1779 }
   1780 
   1781 
   1782 PyDoc_STRVAR(rfind__doc__,
   1783 "S.rfind(sub [,start [,end]]) -> int\n\
   1784 \n\
   1785 Return the highest index in S where substring sub is found,\n\
   1786 such that sub is contained within S[start:end].  Optional\n\
   1787 arguments start and end are interpreted as in slice notation.\n\
   1788 \n\
   1789 Return -1 on failure.");
   1790 
   1791 static PyObject *
   1792 string_rfind(PyStringObject *self, PyObject *args)
   1793 {
   1794     Py_ssize_t result = string_find_internal(self, args, -1);
   1795     if (result == -2)
   1796         return NULL;
   1797     return PyInt_FromSsize_t(result);
   1798 }
   1799 
   1800 
   1801 PyDoc_STRVAR(rindex__doc__,
   1802 "S.rindex(sub [,start [,end]]) -> int\n\
   1803 \n\
   1804 Like S.rfind() but raise ValueError when the substring is not found.");
   1805 
   1806 static PyObject *
   1807 string_rindex(PyStringObject *self, PyObject *args)
   1808 {
   1809     Py_ssize_t result = string_find_internal(self, args, -1);
   1810     if (result == -2)
   1811         return NULL;
   1812     if (result == -1) {
   1813         PyErr_SetString(PyExc_ValueError,
   1814                         "substring not found");
   1815         return NULL;
   1816     }
   1817     return PyInt_FromSsize_t(result);
   1818 }
   1819 
   1820 
   1821 Py_LOCAL_INLINE(PyObject *)
   1822 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
   1823 {
   1824     char *s = PyString_AS_STRING(self);
   1825     Py_ssize_t len = PyString_GET_SIZE(self);
   1826     char *sep = PyString_AS_STRING(sepobj);
   1827     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
   1828     Py_ssize_t i, j;
   1829 
   1830     i = 0;
   1831     if (striptype != RIGHTSTRIP) {
   1832         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
   1833             i++;
   1834         }
   1835     }
   1836 
   1837     j = len;
   1838     if (striptype != LEFTSTRIP) {
   1839         do {
   1840             j--;
   1841         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
   1842         j++;
   1843     }
   1844 
   1845     if (i == 0 && j == len && PyString_CheckExact(self)) {
   1846         Py_INCREF(self);
   1847         return (PyObject*)self;
   1848     }
   1849     else
   1850         return PyString_FromStringAndSize(s+i, j-i);
   1851 }
   1852 
   1853 
   1854 Py_LOCAL_INLINE(PyObject *)
   1855 do_strip(PyStringObject *self, int striptype)
   1856 {
   1857     char *s = PyString_AS_STRING(self);
   1858     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
   1859 
   1860     i = 0;
   1861     if (striptype != RIGHTSTRIP) {
   1862         while (i < len && isspace(Py_CHARMASK(s[i]))) {
   1863             i++;
   1864         }
   1865     }
   1866 
   1867     j = len;
   1868     if (striptype != LEFTSTRIP) {
   1869         do {
   1870             j--;
   1871         } while (j >= i && isspace(Py_CHARMASK(s[j])));
   1872         j++;
   1873     }
   1874 
   1875     if (i == 0 && j == len && PyString_CheckExact(self)) {
   1876         Py_INCREF(self);
   1877         return (PyObject*)self;
   1878     }
   1879     else
   1880         return PyString_FromStringAndSize(s+i, j-i);
   1881 }
   1882 
   1883 
   1884 Py_LOCAL_INLINE(PyObject *)
   1885 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
   1886 {
   1887     PyObject *sep = NULL;
   1888 
   1889     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
   1890         return NULL;
   1891 
   1892     if (sep != NULL && sep != Py_None) {
   1893         if (PyString_Check(sep))
   1894             return do_xstrip(self, striptype, sep);
   1895 #ifdef Py_USING_UNICODE
   1896         else if (PyUnicode_Check(sep)) {
   1897             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
   1898             PyObject *res;
   1899             if (uniself==NULL)
   1900                 return NULL;
   1901             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
   1902                 striptype, sep);
   1903             Py_DECREF(uniself);
   1904             return res;
   1905         }
   1906 #endif
   1907         PyErr_Format(PyExc_TypeError,
   1908 #ifdef Py_USING_UNICODE
   1909                      "%s arg must be None, str or unicode",
   1910 #else
   1911                      "%s arg must be None or str",
   1912 #endif
   1913                      STRIPNAME(striptype));
   1914         return NULL;
   1915     }
   1916 
   1917     return do_strip(self, striptype);
   1918 }
   1919 
   1920 
   1921 PyDoc_STRVAR(strip__doc__,
   1922 "S.strip([chars]) -> string or unicode\n\
   1923 \n\
   1924 Return a copy of the string S with leading and trailing\n\
   1925 whitespace removed.\n\
   1926 If chars is given and not None, remove characters in chars instead.\n\
   1927 If chars is unicode, S will be converted to unicode before stripping");
   1928 
   1929 static PyObject *
   1930 string_strip(PyStringObject *self, PyObject *args)
   1931 {
   1932     if (PyTuple_GET_SIZE(args) == 0)
   1933         return do_strip(self, BOTHSTRIP); /* Common case */
   1934     else
   1935         return do_argstrip(self, BOTHSTRIP, args);
   1936 }
   1937 
   1938 
   1939 PyDoc_STRVAR(lstrip__doc__,
   1940 "S.lstrip([chars]) -> string or unicode\n\
   1941 \n\
   1942 Return a copy of the string S with leading whitespace removed.\n\
   1943 If chars is given and not None, remove characters in chars instead.\n\
   1944 If chars is unicode, S will be converted to unicode before stripping");
   1945 
   1946 static PyObject *
   1947 string_lstrip(PyStringObject *self, PyObject *args)
   1948 {
   1949     if (PyTuple_GET_SIZE(args) == 0)
   1950         return do_strip(self, LEFTSTRIP); /* Common case */
   1951     else
   1952         return do_argstrip(self, LEFTSTRIP, args);
   1953 }
   1954 
   1955 
   1956 PyDoc_STRVAR(rstrip__doc__,
   1957 "S.rstrip([chars]) -> string or unicode\n\
   1958 \n\
   1959 Return a copy of the string S with trailing whitespace removed.\n\
   1960 If chars is given and not None, remove characters in chars instead.\n\
   1961 If chars is unicode, S will be converted to unicode before stripping");
   1962 
   1963 static PyObject *
   1964 string_rstrip(PyStringObject *self, PyObject *args)
   1965 {
   1966     if (PyTuple_GET_SIZE(args) == 0)
   1967         return do_strip(self, RIGHTSTRIP); /* Common case */
   1968     else
   1969         return do_argstrip(self, RIGHTSTRIP, args);
   1970 }
   1971 
   1972 
   1973 PyDoc_STRVAR(lower__doc__,
   1974 "S.lower() -> string\n\
   1975 \n\
   1976 Return a copy of the string S converted to lowercase.");
   1977 
   1978 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
   1979 #ifndef _tolower
   1980 #define _tolower tolower
   1981 #endif
   1982 
   1983 static PyObject *
   1984 string_lower(PyStringObject *self)
   1985 {
   1986     char *s;
   1987     Py_ssize_t i, n = PyString_GET_SIZE(self);
   1988     PyObject *newobj;
   1989 
   1990     newobj = PyString_FromStringAndSize(NULL, n);
   1991     if (!newobj)
   1992         return NULL;
   1993 
   1994     s = PyString_AS_STRING(newobj);
   1995 
   1996     Py_MEMCPY(s, PyString_AS_STRING(self), n);
   1997 
   1998     for (i = 0; i < n; i++) {
   1999         int c = Py_CHARMASK(s[i]);
   2000         if (isupper(c))
   2001             s[i] = _tolower(c);
   2002     }
   2003 
   2004     return newobj;
   2005 }
   2006 
   2007 PyDoc_STRVAR(upper__doc__,
   2008 "S.upper() -> string\n\
   2009 \n\
   2010 Return a copy of the string S converted to uppercase.");
   2011 
   2012 #ifndef _toupper
   2013 #define _toupper toupper
   2014 #endif
   2015 
   2016 static PyObject *
   2017 string_upper(PyStringObject *self)
   2018 {
   2019     char *s;
   2020     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2021     PyObject *newobj;
   2022 
   2023     newobj = PyString_FromStringAndSize(NULL, n);
   2024     if (!newobj)
   2025         return NULL;
   2026 
   2027     s = PyString_AS_STRING(newobj);
   2028 
   2029     Py_MEMCPY(s, PyString_AS_STRING(self), n);
   2030 
   2031     for (i = 0; i < n; i++) {
   2032         int c = Py_CHARMASK(s[i]);
   2033         if (islower(c))
   2034             s[i] = _toupper(c);
   2035     }
   2036 
   2037     return newobj;
   2038 }
   2039 
   2040 PyDoc_STRVAR(title__doc__,
   2041 "S.title() -> string\n\
   2042 \n\
   2043 Return a titlecased version of S, i.e. words start with uppercase\n\
   2044 characters, all remaining cased characters have lowercase.");
   2045 
   2046 static PyObject*
   2047 string_title(PyStringObject *self)
   2048 {
   2049     char *s = PyString_AS_STRING(self), *s_new;
   2050     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2051     int previous_is_cased = 0;
   2052     PyObject *newobj;
   2053 
   2054     newobj = PyString_FromStringAndSize(NULL, n);
   2055     if (newobj == NULL)
   2056         return NULL;
   2057     s_new = PyString_AsString(newobj);
   2058     for (i = 0; i < n; i++) {
   2059         int c = Py_CHARMASK(*s++);
   2060         if (islower(c)) {
   2061             if (!previous_is_cased)
   2062                 c = toupper(c);
   2063             previous_is_cased = 1;
   2064         } else if (isupper(c)) {
   2065             if (previous_is_cased)
   2066                 c = tolower(c);
   2067             previous_is_cased = 1;
   2068         } else
   2069             previous_is_cased = 0;
   2070         *s_new++ = c;
   2071     }
   2072     return newobj;
   2073 }
   2074 
   2075 PyDoc_STRVAR(capitalize__doc__,
   2076 "S.capitalize() -> string\n\
   2077 \n\
   2078 Return a copy of the string S with only its first character\n\
   2079 capitalized.");
   2080 
   2081 static PyObject *
   2082 string_capitalize(PyStringObject *self)
   2083 {
   2084     char *s = PyString_AS_STRING(self), *s_new;
   2085     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2086     PyObject *newobj;
   2087 
   2088     newobj = PyString_FromStringAndSize(NULL, n);
   2089     if (newobj == NULL)
   2090         return NULL;
   2091     s_new = PyString_AsString(newobj);
   2092     if (0 < n) {
   2093         int c = Py_CHARMASK(*s++);
   2094         if (islower(c))
   2095             *s_new = toupper(c);
   2096         else
   2097             *s_new = c;
   2098         s_new++;
   2099     }
   2100     for (i = 1; i < n; i++) {
   2101         int c = Py_CHARMASK(*s++);
   2102         if (isupper(c))
   2103             *s_new = tolower(c);
   2104         else
   2105             *s_new = c;
   2106         s_new++;
   2107     }
   2108     return newobj;
   2109 }
   2110 
   2111 
   2112 PyDoc_STRVAR(count__doc__,
   2113 "S.count(sub[, start[, end]]) -> int\n\
   2114 \n\
   2115 Return the number of non-overlapping occurrences of substring sub in\n\
   2116 string S[start:end].  Optional arguments start and end are interpreted\n\
   2117 as in slice notation.");
   2118 
   2119 static PyObject *
   2120 string_count(PyStringObject *self, PyObject *args)
   2121 {
   2122     PyObject *sub_obj;
   2123     const char *str = PyString_AS_STRING(self), *sub;
   2124     Py_ssize_t sub_len;
   2125     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
   2126 
   2127     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
   2128         return NULL;
   2129 
   2130     if (PyString_Check(sub_obj)) {
   2131         sub = PyString_AS_STRING(sub_obj);
   2132         sub_len = PyString_GET_SIZE(sub_obj);
   2133     }
   2134 #ifdef Py_USING_UNICODE
   2135     else if (PyUnicode_Check(sub_obj)) {
   2136         Py_ssize_t count;
   2137         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
   2138         if (count == -1)
   2139             return NULL;
   2140         else
   2141             return PyInt_FromSsize_t(count);
   2142     }
   2143 #endif
   2144     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
   2145         return NULL;
   2146 
   2147     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
   2148 
   2149     return PyInt_FromSsize_t(
   2150         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
   2151         );
   2152 }
   2153 
   2154 PyDoc_STRVAR(swapcase__doc__,
   2155 "S.swapcase() -> string\n\
   2156 \n\
   2157 Return a copy of the string S with uppercase characters\n\
   2158 converted to lowercase and vice versa.");
   2159 
   2160 static PyObject *
   2161 string_swapcase(PyStringObject *self)
   2162 {
   2163     char *s = PyString_AS_STRING(self), *s_new;
   2164     Py_ssize_t i, n = PyString_GET_SIZE(self);
   2165     PyObject *newobj;
   2166 
   2167     newobj = PyString_FromStringAndSize(NULL, n);
   2168     if (newobj == NULL)
   2169         return NULL;
   2170     s_new = PyString_AsString(newobj);
   2171     for (i = 0; i < n; i++) {
   2172         int c = Py_CHARMASK(*s++);
   2173         if (islower(c)) {
   2174             *s_new = toupper(c);
   2175         }
   2176         else if (isupper(c)) {
   2177             *s_new = tolower(c);
   2178         }
   2179         else
   2180             *s_new = c;
   2181         s_new++;
   2182     }
   2183     return newobj;
   2184 }
   2185 
   2186 
   2187 PyDoc_STRVAR(translate__doc__,
   2188 "S.translate(table [,deletechars]) -> string\n\
   2189 \n\
   2190 Return a copy of the string S, where all characters occurring\n\
   2191 in the optional argument deletechars are removed, and the\n\
   2192 remaining characters have been mapped through the given\n\
   2193 translation table, which must be a string of length 256 or None.\n\
   2194 If the table argument is None, no translation is applied and\n\
   2195 the operation simply removes the characters in deletechars.");
   2196 
   2197 static PyObject *
   2198 string_translate(PyStringObject *self, PyObject *args)
   2199 {
   2200     register char *input, *output;
   2201     const char *table;
   2202     register Py_ssize_t i, c, changed = 0;
   2203     PyObject *input_obj = (PyObject*)self;
   2204     const char *output_start, *del_table=NULL;
   2205     Py_ssize_t inlen, tablen, dellen = 0;
   2206     PyObject *result;
   2207     int trans_table[256];
   2208     PyObject *tableobj, *delobj = NULL;
   2209 
   2210     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
   2211                           &tableobj, &delobj))
   2212         return NULL;
   2213 
   2214     if (PyString_Check(tableobj)) {
   2215         table = PyString_AS_STRING(tableobj);
   2216         tablen = PyString_GET_SIZE(tableobj);
   2217     }
   2218     else if (tableobj == Py_None) {
   2219         table = NULL;
   2220         tablen = 256;
   2221     }
   2222 #ifdef Py_USING_UNICODE
   2223     else if (PyUnicode_Check(tableobj)) {
   2224         /* Unicode .translate() does not support the deletechars
   2225            parameter; instead a mapping to None will cause characters
   2226            to be deleted. */
   2227         if (delobj != NULL) {
   2228             PyErr_SetString(PyExc_TypeError,
   2229             "deletions are implemented differently for unicode");
   2230             return NULL;
   2231         }
   2232         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
   2233     }
   2234 #endif
   2235     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
   2236         return NULL;
   2237 
   2238     if (tablen != 256) {
   2239         PyErr_SetString(PyExc_ValueError,
   2240           "translation table must be 256 characters long");
   2241         return NULL;
   2242     }
   2243 
   2244     if (delobj != NULL) {
   2245         if (PyString_Check(delobj)) {
   2246             del_table = PyString_AS_STRING(delobj);
   2247             dellen = PyString_GET_SIZE(delobj);
   2248         }
   2249 #ifdef Py_USING_UNICODE
   2250         else if (PyUnicode_Check(delobj)) {
   2251             PyErr_SetString(PyExc_TypeError,
   2252             "deletions are implemented differently for unicode");
   2253             return NULL;
   2254         }
   2255 #endif
   2256         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
   2257             return NULL;
   2258     }
   2259     else {
   2260         del_table = NULL;
   2261         dellen = 0;
   2262     }
   2263 
   2264     inlen = PyString_GET_SIZE(input_obj);
   2265     result = PyString_FromStringAndSize((char *)NULL, inlen);
   2266     if (result == NULL)
   2267         return NULL;
   2268     output_start = output = PyString_AsString(result);
   2269     input = PyString_AS_STRING(input_obj);
   2270 
   2271     if (dellen == 0 && table != NULL) {
   2272         /* If no deletions are required, use faster code */
   2273         for (i = inlen; --i >= 0; ) {
   2274             c = Py_CHARMASK(*input++);
   2275             if (Py_CHARMASK((*output++ = table[c])) != c)
   2276                 changed = 1;
   2277         }
   2278         if (changed || !PyString_CheckExact(input_obj))
   2279             return result;
   2280         Py_DECREF(result);
   2281         Py_INCREF(input_obj);
   2282         return input_obj;
   2283     }
   2284 
   2285     if (table == NULL) {
   2286         for (i = 0; i < 256; i++)
   2287             trans_table[i] = Py_CHARMASK(i);
   2288     } else {
   2289         for (i = 0; i < 256; i++)
   2290             trans_table[i] = Py_CHARMASK(table[i]);
   2291     }
   2292 
   2293     for (i = 0; i < dellen; i++)
   2294         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
   2295 
   2296     for (i = inlen; --i >= 0; ) {
   2297         c = Py_CHARMASK(*input++);
   2298         if (trans_table[c] != -1)
   2299             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
   2300                 continue;
   2301         changed = 1;
   2302     }
   2303     if (!changed && PyString_CheckExact(input_obj)) {
   2304         Py_DECREF(result);
   2305         Py_INCREF(input_obj);
   2306         return input_obj;
   2307     }
   2308     /* Fix the size of the resulting string */
   2309     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
   2310         return NULL;
   2311     return result;
   2312 }
   2313 
   2314 
   2315 /* find and count characters and substrings */
   2316 
   2317 #define findchar(target, target_len, c)                         \
   2318   ((char *)memchr((const void *)(target), c, target_len))
   2319 
   2320 /* String ops must return a string.  */
   2321 /* If the object is subclass of string, create a copy */
   2322 Py_LOCAL(PyStringObject *)
   2323 return_self(PyStringObject *self)
   2324 {
   2325     if (PyString_CheckExact(self)) {
   2326         Py_INCREF(self);
   2327         return self;
   2328     }
   2329     return (PyStringObject *)PyString_FromStringAndSize(
   2330         PyString_AS_STRING(self),
   2331         PyString_GET_SIZE(self));
   2332 }
   2333 
   2334 Py_LOCAL_INLINE(Py_ssize_t)
   2335 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
   2336 {
   2337     Py_ssize_t count=0;
   2338     const char *start=target;
   2339     const char *end=target+target_len;
   2340 
   2341     while ( (start=findchar(start, end-start, c)) != NULL ) {
   2342         count++;
   2343         if (count >= maxcount)
   2344             break;
   2345         start += 1;
   2346     }
   2347     return count;
   2348 }
   2349 
   2350 
   2351 /* Algorithms for different cases of string replacement */
   2352 
   2353 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
   2354 Py_LOCAL(PyStringObject *)
   2355 replace_interleave(PyStringObject *self,
   2356                    const char *to_s, Py_ssize_t to_len,
   2357                    Py_ssize_t maxcount)
   2358 {
   2359     char *self_s, *result_s;
   2360     Py_ssize_t self_len, result_len;
   2361     Py_ssize_t count, i, product;
   2362     PyStringObject *result;
   2363 
   2364     self_len = PyString_GET_SIZE(self);
   2365 
   2366     /* 1 at the end plus 1 after every character */
   2367     count = self_len+1;
   2368     if (maxcount < count)
   2369         count = maxcount;
   2370 
   2371     /* Check for overflow */
   2372     /*   result_len = count * to_len + self_len; */
   2373     product = count * to_len;
   2374     if (product / to_len != count) {
   2375         PyErr_SetString(PyExc_OverflowError,
   2376                         "replace string is too long");
   2377         return NULL;
   2378     }
   2379     result_len = product + self_len;
   2380     if (result_len < 0) {
   2381         PyErr_SetString(PyExc_OverflowError,
   2382                         "replace string is too long");
   2383         return NULL;
   2384     }
   2385 
   2386     if (! (result = (PyStringObject *)
   2387                      PyString_FromStringAndSize(NULL, result_len)) )
   2388         return NULL;
   2389 
   2390     self_s = PyString_AS_STRING(self);
   2391     result_s = PyString_AS_STRING(result);
   2392 
   2393     /* TODO: special case single character, which doesn't need memcpy */
   2394 
   2395     /* Lay the first one down (guaranteed this will occur) */
   2396     Py_MEMCPY(result_s, to_s, to_len);
   2397     result_s += to_len;
   2398     count -= 1;
   2399 
   2400     for (i=0; i<count; i++) {
   2401         *result_s++ = *self_s++;
   2402         Py_MEMCPY(result_s, to_s, to_len);
   2403         result_s += to_len;
   2404     }
   2405 
   2406     /* Copy the rest of the original string */
   2407     Py_MEMCPY(result_s, self_s, self_len-i);
   2408 
   2409     return result;
   2410 }
   2411 
   2412 /* Special case for deleting a single character */
   2413 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
   2414 Py_LOCAL(PyStringObject *)
   2415 replace_delete_single_character(PyStringObject *self,
   2416                                 char from_c, Py_ssize_t maxcount)
   2417 {
   2418     char *self_s, *result_s;
   2419     char *start, *next, *end;
   2420     Py_ssize_t self_len, result_len;
   2421     Py_ssize_t count;
   2422     PyStringObject *result;
   2423 
   2424     self_len = PyString_GET_SIZE(self);
   2425     self_s = PyString_AS_STRING(self);
   2426 
   2427     count = countchar(self_s, self_len, from_c, maxcount);
   2428     if (count == 0) {
   2429         return return_self(self);
   2430     }
   2431 
   2432     result_len = self_len - count;  /* from_len == 1 */
   2433     assert(result_len>=0);
   2434 
   2435     if ( (result = (PyStringObject *)
   2436                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
   2437         return NULL;
   2438     result_s = PyString_AS_STRING(result);
   2439 
   2440     start = self_s;
   2441     end = self_s + self_len;
   2442     while (count-- > 0) {
   2443         next = findchar(start, end-start, from_c);
   2444         if (next == NULL)
   2445             break;
   2446         Py_MEMCPY(result_s, start, next-start);
   2447         result_s += (next-start);
   2448         start = next+1;
   2449     }
   2450     Py_MEMCPY(result_s, start, end-start);
   2451 
   2452     return result;
   2453 }
   2454 
   2455 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
   2456 
   2457 Py_LOCAL(PyStringObject *)
   2458 replace_delete_substring(PyStringObject *self,
   2459                          const char *from_s, Py_ssize_t from_len,
   2460                          Py_ssize_t maxcount) {
   2461     char *self_s, *result_s;
   2462     char *start, *next, *end;
   2463     Py_ssize_t self_len, result_len;
   2464     Py_ssize_t count, offset;
   2465     PyStringObject *result;
   2466 
   2467     self_len = PyString_GET_SIZE(self);
   2468     self_s = PyString_AS_STRING(self);
   2469 
   2470     count = stringlib_count(self_s, self_len,
   2471                             from_s, from_len,
   2472                             maxcount);
   2473 
   2474     if (count == 0) {
   2475         /* no matches */
   2476         return return_self(self);
   2477     }
   2478 
   2479     result_len = self_len - (count * from_len);
   2480     assert (result_len>=0);
   2481 
   2482     if ( (result = (PyStringObject *)
   2483           PyString_FromStringAndSize(NULL, result_len)) == NULL )
   2484         return NULL;
   2485 
   2486     result_s = PyString_AS_STRING(result);
   2487 
   2488     start = self_s;
   2489     end = self_s + self_len;
   2490     while (count-- > 0) {
   2491         offset = stringlib_find(start, end-start,
   2492                                 from_s, from_len,
   2493                                 0);
   2494         if (offset == -1)
   2495             break;
   2496         next = start + offset;
   2497 
   2498         Py_MEMCPY(result_s, start, next-start);
   2499 
   2500         result_s += (next-start);
   2501         start = next+from_len;
   2502     }
   2503     Py_MEMCPY(result_s, start, end-start);
   2504     return result;
   2505 }
   2506 
   2507 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
   2508 Py_LOCAL(PyStringObject *)
   2509 replace_single_character_in_place(PyStringObject *self,
   2510                                   char from_c, char to_c,
   2511                                   Py_ssize_t maxcount)
   2512 {
   2513     char *self_s, *result_s, *start, *end, *next;
   2514     Py_ssize_t self_len;
   2515     PyStringObject *result;
   2516 
   2517     /* The result string will be the same size */
   2518     self_s = PyString_AS_STRING(self);
   2519     self_len = PyString_GET_SIZE(self);
   2520 
   2521     next = findchar(self_s, self_len, from_c);
   2522 
   2523     if (next == NULL) {
   2524         /* No matches; return the original string */
   2525         return return_self(self);
   2526     }
   2527 
   2528     /* Need to make a new string */
   2529     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
   2530     if (result == NULL)
   2531         return NULL;
   2532     result_s = PyString_AS_STRING(result);
   2533     Py_MEMCPY(result_s, self_s, self_len);
   2534 
   2535     /* change everything in-place, starting with this one */
   2536     start =  result_s + (next-self_s);
   2537     *start = to_c;
   2538     start++;
   2539     end = result_s + self_len;
   2540 
   2541     while (--maxcount > 0) {
   2542         next = findchar(start, end-start, from_c);
   2543         if (next == NULL)
   2544             break;
   2545         *next = to_c;
   2546         start = next+1;
   2547     }
   2548 
   2549     return result;
   2550 }
   2551 
   2552 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
   2553 Py_LOCAL(PyStringObject *)
   2554 replace_substring_in_place(PyStringObject *self,
   2555                            const char *from_s, Py_ssize_t from_len,
   2556                            const char *to_s, Py_ssize_t to_len,
   2557                            Py_ssize_t maxcount)
   2558 {
   2559     char *result_s, *start, *end;
   2560     char *self_s;
   2561     Py_ssize_t self_len, offset;
   2562     PyStringObject *result;
   2563 
   2564     /* The result string will be the same size */
   2565 
   2566     self_s = PyString_AS_STRING(self);
   2567     self_len = PyString_GET_SIZE(self);
   2568 
   2569     offset = stringlib_find(self_s, self_len,
   2570                             from_s, from_len,
   2571                             0);
   2572     if (offset == -1) {
   2573         /* No matches; return the original string */
   2574         return return_self(self);
   2575     }
   2576 
   2577     /* Need to make a new string */
   2578     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
   2579     if (result == NULL)
   2580         return NULL;
   2581     result_s = PyString_AS_STRING(result);
   2582     Py_MEMCPY(result_s, self_s, self_len);
   2583 
   2584     /* change everything in-place, starting with this one */
   2585     start =  result_s + offset;
   2586     Py_MEMCPY(start, to_s, from_len);
   2587     start += from_len;
   2588     end = result_s + self_len;
   2589 
   2590     while ( --maxcount > 0) {
   2591         offset = stringlib_find(start, end-start,
   2592                                 from_s, from_len,
   2593                                 0);
   2594         if (offset==-1)
   2595             break;
   2596         Py_MEMCPY(start+offset, to_s, from_len);
   2597         start += offset+from_len;
   2598     }
   2599 
   2600     return result;
   2601 }
   2602 
   2603 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
   2604 Py_LOCAL(PyStringObject *)
   2605 replace_single_character(PyStringObject *self,
   2606                          char from_c,
   2607                          const char *to_s, Py_ssize_t to_len,
   2608                          Py_ssize_t maxcount)
   2609 {
   2610     char *self_s, *result_s;
   2611     char *start, *next, *end;
   2612     Py_ssize_t self_len, result_len;
   2613     Py_ssize_t count, product;
   2614     PyStringObject *result;
   2615 
   2616     self_s = PyString_AS_STRING(self);
   2617     self_len = PyString_GET_SIZE(self);
   2618 
   2619     count = countchar(self_s, self_len, from_c, maxcount);
   2620     if (count == 0) {
   2621         /* no matches, return unchanged */
   2622         return return_self(self);
   2623     }
   2624 
   2625     /* use the difference between current and new, hence the "-1" */
   2626     /*   result_len = self_len + count * (to_len-1)  */
   2627     product = count * (to_len-1);
   2628     if (product / (to_len-1) != count) {
   2629         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2630         return NULL;
   2631     }
   2632     result_len = self_len + product;
   2633     if (result_len < 0) {
   2634         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2635         return NULL;
   2636     }
   2637 
   2638     if ( (result = (PyStringObject *)
   2639           PyString_FromStringAndSize(NULL, result_len)) == NULL)
   2640         return NULL;
   2641     result_s = PyString_AS_STRING(result);
   2642 
   2643     start = self_s;
   2644     end = self_s + self_len;
   2645     while (count-- > 0) {
   2646         next = findchar(start, end-start, from_c);
   2647         if (next == NULL)
   2648             break;
   2649 
   2650         if (next == start) {
   2651             /* replace with the 'to' */
   2652             Py_MEMCPY(result_s, to_s, to_len);
   2653             result_s += to_len;
   2654             start += 1;
   2655         } else {
   2656             /* copy the unchanged old then the 'to' */
   2657             Py_MEMCPY(result_s, start, next-start);
   2658             result_s += (next-start);
   2659             Py_MEMCPY(result_s, to_s, to_len);
   2660             result_s += to_len;
   2661             start = next+1;
   2662         }
   2663     }
   2664     /* Copy the remainder of the remaining string */
   2665     Py_MEMCPY(result_s, start, end-start);
   2666 
   2667     return result;
   2668 }
   2669 
   2670 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
   2671 Py_LOCAL(PyStringObject *)
   2672 replace_substring(PyStringObject *self,
   2673                   const char *from_s, Py_ssize_t from_len,
   2674                   const char *to_s, Py_ssize_t to_len,
   2675                   Py_ssize_t maxcount) {
   2676     char *self_s, *result_s;
   2677     char *start, *next, *end;
   2678     Py_ssize_t self_len, result_len;
   2679     Py_ssize_t count, offset, product;
   2680     PyStringObject *result;
   2681 
   2682     self_s = PyString_AS_STRING(self);
   2683     self_len = PyString_GET_SIZE(self);
   2684 
   2685     count = stringlib_count(self_s, self_len,
   2686                             from_s, from_len,
   2687                             maxcount);
   2688 
   2689     if (count == 0) {
   2690         /* no matches, return unchanged */
   2691         return return_self(self);
   2692     }
   2693 
   2694     /* Check for overflow */
   2695     /*    result_len = self_len + count * (to_len-from_len) */
   2696     product = count * (to_len-from_len);
   2697     if (product / (to_len-from_len) != count) {
   2698         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2699         return NULL;
   2700     }
   2701     result_len = self_len + product;
   2702     if (result_len < 0) {
   2703         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
   2704         return NULL;
   2705     }
   2706 
   2707     if ( (result = (PyStringObject *)
   2708           PyString_FromStringAndSize(NULL, result_len)) == NULL)
   2709         return NULL;
   2710     result_s = PyString_AS_STRING(result);
   2711 
   2712     start = self_s;
   2713     end = self_s + self_len;
   2714     while (count-- > 0) {
   2715         offset = stringlib_find(start, end-start,
   2716                                 from_s, from_len,
   2717                                 0);
   2718         if (offset == -1)
   2719             break;
   2720         next = start+offset;
   2721         if (next == start) {
   2722             /* replace with the 'to' */
   2723             Py_MEMCPY(result_s, to_s, to_len);
   2724             result_s += to_len;
   2725             start += from_len;
   2726         } else {
   2727             /* copy the unchanged old then the 'to' */
   2728             Py_MEMCPY(result_s, start, next-start);
   2729             result_s += (next-start);
   2730             Py_MEMCPY(result_s, to_s, to_len);
   2731             result_s += to_len;
   2732             start = next+from_len;
   2733         }
   2734     }
   2735     /* Copy the remainder of the remaining string */
   2736     Py_MEMCPY(result_s, start, end-start);
   2737 
   2738     return result;
   2739 }
   2740 
   2741 
   2742 Py_LOCAL(PyStringObject *)
   2743 replace(PyStringObject *self,
   2744     const char *from_s, Py_ssize_t from_len,
   2745     const char *to_s, Py_ssize_t to_len,
   2746     Py_ssize_t maxcount)
   2747 {
   2748     if (maxcount < 0) {
   2749         maxcount = PY_SSIZE_T_MAX;
   2750     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
   2751         /* nothing to do; return the original string */
   2752         return return_self(self);
   2753     }
   2754 
   2755     if (maxcount == 0 ||
   2756         (from_len == 0 && to_len == 0)) {
   2757         /* nothing to do; return the original string */
   2758         return return_self(self);
   2759     }
   2760 
   2761     /* Handle zero-length special cases */
   2762 
   2763     if (from_len == 0) {
   2764         /* insert the 'to' string everywhere.   */
   2765         /*    >>> "Python".replace("", ".")     */
   2766         /*    '.P.y.t.h.o.n.'                   */
   2767         return replace_interleave(self, to_s, to_len, maxcount);
   2768     }
   2769 
   2770     /* Except for "".replace("", "A") == "A" there is no way beyond this */
   2771     /* point for an empty self string to generate a non-empty string */
   2772     /* Special case so the remaining code always gets a non-empty string */
   2773     if (PyString_GET_SIZE(self) == 0) {
   2774         return return_self(self);
   2775     }
   2776 
   2777     if (to_len == 0) {
   2778         /* delete all occurances of 'from' string */
   2779         if (from_len == 1) {
   2780             return replace_delete_single_character(
   2781                 self, from_s[0], maxcount);
   2782         } else {
   2783             return replace_delete_substring(self, from_s, from_len, maxcount);
   2784         }
   2785     }
   2786 
   2787     /* Handle special case where both strings have the same length */
   2788 
   2789     if (from_len == to_len) {
   2790         if (from_len == 1) {
   2791             return replace_single_character_in_place(
   2792                 self,
   2793                 from_s[0],
   2794                 to_s[0],
   2795                 maxcount);
   2796         } else {
   2797             return replace_substring_in_place(
   2798                 self, from_s, from_len, to_s, to_len, maxcount);
   2799         }
   2800     }
   2801 
   2802     /* Otherwise use the more generic algorithms */
   2803     if (from_len == 1) {
   2804         return replace_single_character(self, from_s[0],
   2805                                         to_s, to_len, maxcount);
   2806     } else {
   2807         /* len('from')>=2, len('to')>=1 */
   2808         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
   2809     }
   2810 }
   2811 
   2812 PyDoc_STRVAR(replace__doc__,
   2813 "S.replace(old, new[, count]) -> string\n\
   2814 \n\
   2815 Return a copy of string S with all occurrences of substring\n\
   2816 old replaced by new.  If the optional argument count is\n\
   2817 given, only the first count occurrences are replaced.");
   2818 
   2819 static PyObject *
   2820 string_replace(PyStringObject *self, PyObject *args)
   2821 {
   2822     Py_ssize_t count = -1;
   2823     PyObject *from, *to;
   2824     const char *from_s, *to_s;
   2825     Py_ssize_t from_len, to_len;
   2826 
   2827     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
   2828         return NULL;
   2829 
   2830     if (PyString_Check(from)) {
   2831         from_s = PyString_AS_STRING(from);
   2832         from_len = PyString_GET_SIZE(from);
   2833     }
   2834 #ifdef Py_USING_UNICODE
   2835     if (PyUnicode_Check(from))
   2836         return PyUnicode_Replace((PyObject *)self,
   2837                                  from, to, count);
   2838 #endif
   2839     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
   2840         return NULL;
   2841 
   2842     if (PyString_Check(to)) {
   2843         to_s = PyString_AS_STRING(to);
   2844         to_len = PyString_GET_SIZE(to);
   2845     }
   2846 #ifdef Py_USING_UNICODE
   2847     else if (PyUnicode_Check(to))
   2848         return PyUnicode_Replace((PyObject *)self,
   2849                                  from, to, count);
   2850 #endif
   2851     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
   2852         return NULL;
   2853 
   2854     return (PyObject *)replace((PyStringObject *) self,
   2855                                from_s, from_len,
   2856                                to_s, to_len, count);
   2857 }
   2858 
   2859 /** End DALKE **/
   2860 
   2861 /* Matches the end (direction >= 0) or start (direction < 0) of self
   2862  * against substr, using the start and end arguments. Returns
   2863  * -1 on error, 0 if not found and 1 if found.
   2864  */
   2865 Py_LOCAL(int)
   2866 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
   2867                   Py_ssize_t end, int direction)
   2868 {
   2869     Py_ssize_t len = PyString_GET_SIZE(self);
   2870     Py_ssize_t slen;
   2871     const char* sub;
   2872     const char* str;
   2873 
   2874     if (PyString_Check(substr)) {
   2875         sub = PyString_AS_STRING(substr);
   2876         slen = PyString_GET_SIZE(substr);
   2877     }
   2878 #ifdef Py_USING_UNICODE
   2879     else if (PyUnicode_Check(substr))
   2880         return PyUnicode_Tailmatch((PyObject *)self,
   2881                                    substr, start, end, direction);
   2882 #endif
   2883     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
   2884         return -1;
   2885     str = PyString_AS_STRING(self);
   2886 
   2887     ADJUST_INDICES(start, end, len);
   2888 
   2889     if (direction < 0) {
   2890         /* startswith */
   2891         if (start+slen > len)
   2892             return 0;
   2893     } else {
   2894         /* endswith */
   2895         if (end-start < slen || start > len)
   2896             return 0;
   2897 
   2898         if (end-slen > start)
   2899             start = end - slen;
   2900     }
   2901     if (end-start >= slen)
   2902         return ! memcmp(str+start, sub, slen);
   2903     return 0;
   2904 }
   2905 
   2906 
   2907 PyDoc_STRVAR(startswith__doc__,
   2908 "S.startswith(prefix[, start[, end]]) -> bool\n\
   2909 \n\
   2910 Return True if S starts with the specified prefix, False otherwise.\n\
   2911 With optional start, test S beginning at that position.\n\
   2912 With optional end, stop comparing S at that position.\n\
   2913 prefix can also be a tuple of strings to try.");
   2914 
   2915 static PyObject *
   2916 string_startswith(PyStringObject *self, PyObject *args)
   2917 {
   2918     Py_ssize_t start = 0;
   2919     Py_ssize_t end = PY_SSIZE_T_MAX;
   2920     PyObject *subobj;
   2921     int result;
   2922 
   2923     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
   2924         return NULL;
   2925     if (PyTuple_Check(subobj)) {
   2926         Py_ssize_t i;
   2927         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
   2928             result = _string_tailmatch(self,
   2929                             PyTuple_GET_ITEM(subobj, i),
   2930                             start, end, -1);
   2931             if (result == -1)
   2932                 return NULL;
   2933             else if (result) {
   2934                 Py_RETURN_TRUE;
   2935             }
   2936         }
   2937         Py_RETURN_FALSE;
   2938     }
   2939     result = _string_tailmatch(self, subobj, start, end, -1);
   2940     if (result == -1) {
   2941         if (PyErr_ExceptionMatches(PyExc_TypeError))
   2942             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
   2943                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
   2944         return NULL;
   2945     }
   2946     else
   2947         return PyBool_FromLong(result);
   2948 }
   2949 
   2950 
   2951 PyDoc_STRVAR(endswith__doc__,
   2952 "S.endswith(suffix[, start[, end]]) -> bool\n\
   2953 \n\
   2954 Return True if S ends with the specified suffix, False otherwise.\n\
   2955 With optional start, test S beginning at that position.\n\
   2956 With optional end, stop comparing S at that position.\n\
   2957 suffix can also be a tuple of strings to try.");
   2958 
   2959 static PyObject *
   2960 string_endswith(PyStringObject *self, PyObject *args)
   2961 {
   2962     Py_ssize_t start = 0;
   2963     Py_ssize_t end = PY_SSIZE_T_MAX;
   2964     PyObject *subobj;
   2965     int result;
   2966 
   2967     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
   2968         return NULL;
   2969     if (PyTuple_Check(subobj)) {
   2970         Py_ssize_t i;
   2971         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
   2972             result = _string_tailmatch(self,
   2973                             PyTuple_GET_ITEM(subobj, i),
   2974                             start, end, +1);
   2975             if (result == -1)
   2976                 return NULL;
   2977             else if (result) {
   2978                 Py_RETURN_TRUE;
   2979             }
   2980         }
   2981         Py_RETURN_FALSE;
   2982     }
   2983     result = _string_tailmatch(self, subobj, start, end, +1);
   2984     if (result == -1) {
   2985         if (PyErr_ExceptionMatches(PyExc_TypeError))
   2986             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
   2987                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
   2988         return NULL;
   2989     }
   2990     else
   2991         return PyBool_FromLong(result);
   2992 }
   2993 
   2994 
   2995 PyDoc_STRVAR(encode__doc__,
   2996 "S.encode([encoding[,errors]]) -> object\n\
   2997 \n\
   2998 Encodes S using the codec registered for encoding. encoding defaults\n\
   2999 to the default encoding. errors may be given to set a different error\n\
   3000 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
   3001 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
   3002 'xmlcharrefreplace' as well as any other name registered with\n\
   3003 codecs.register_error that is able to handle UnicodeEncodeErrors.");
   3004 
   3005 static PyObject *
   3006 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
   3007 {
   3008     static char *kwlist[] = {"encoding", "errors", 0};
   3009     char *encoding = NULL;
   3010     char *errors = NULL;
   3011     PyObject *v;
   3012 
   3013     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
   3014                                      kwlist, &encoding, &errors))
   3015         return NULL;
   3016     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
   3017     if (v == NULL)
   3018         goto onError;
   3019     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
   3020         PyErr_Format(PyExc_TypeError,
   3021                      "encoder did not return a string/unicode object "
   3022                      "(type=%.400s)",
   3023                      Py_TYPE(v)->tp_name);
   3024         Py_DECREF(v);
   3025         return NULL;
   3026     }
   3027     return v;
   3028 
   3029  onError:
   3030     return NULL;
   3031 }
   3032 
   3033 
   3034 PyDoc_STRVAR(decode__doc__,
   3035 "S.decode([encoding[,errors]]) -> object\n\
   3036 \n\
   3037 Decodes S using the codec registered for encoding. encoding defaults\n\
   3038 to the default encoding. errors may be given to set a different error\n\
   3039 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
   3040 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
   3041 as well as any other name registered with codecs.register_error that is\n\
   3042 able to handle UnicodeDecodeErrors.");
   3043 
   3044 static PyObject *
   3045 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
   3046 {
   3047     static char *kwlist[] = {"encoding", "errors", 0};
   3048     char *encoding = NULL;
   3049     char *errors = NULL;
   3050     PyObject *v;
   3051 
   3052     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
   3053                                      kwlist, &encoding, &errors))
   3054         return NULL;
   3055     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
   3056     if (v == NULL)
   3057         goto onError;
   3058     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
   3059         PyErr_Format(PyExc_TypeError,
   3060                      "decoder did not return a string/unicode object "
   3061                      "(type=%.400s)",
   3062                      Py_TYPE(v)->tp_name);
   3063         Py_DECREF(v);
   3064         return NULL;
   3065     }
   3066     return v;
   3067 
   3068  onError:
   3069     return NULL;
   3070 }
   3071 
   3072 
   3073 PyDoc_STRVAR(expandtabs__doc__,
   3074 "S.expandtabs([tabsize]) -> string\n\
   3075 \n\
   3076 Return a copy of S where all tab characters are expanded using spaces.\n\
   3077 If tabsize is not given, a tab size of 8 characters is assumed.");
   3078 
   3079 static PyObject*
   3080 string_expandtabs(PyStringObject *self, PyObject *args)
   3081 {
   3082     const char *e, *p, *qe;
   3083     char *q;
   3084     Py_ssize_t i, j, incr;
   3085     PyObject *u;
   3086     int tabsize = 8;
   3087 
   3088     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
   3089         return NULL;
   3090 
   3091     /* First pass: determine size of output string */
   3092     i = 0; /* chars up to and including most recent \n or \r */
   3093     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
   3094     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
   3095     for (p = PyString_AS_STRING(self); p < e; p++) {
   3096         if (*p == '\t') {
   3097             if (tabsize > 0) {
   3098                 incr = tabsize - (j % tabsize);
   3099                 if (j > PY_SSIZE_T_MAX - incr)
   3100                     goto overflow1;
   3101                 j += incr;
   3102             }
   3103         }
   3104         else {
   3105             if (j > PY_SSIZE_T_MAX - 1)
   3106                 goto overflow1;
   3107             j++;
   3108             if (*p == '\n' || *p == '\r') {
   3109                 if (i > PY_SSIZE_T_MAX - j)
   3110                     goto overflow1;
   3111                 i += j;
   3112                 j = 0;
   3113             }
   3114         }
   3115     }
   3116 
   3117     if (i > PY_SSIZE_T_MAX - j)
   3118         goto overflow1;
   3119 
   3120     /* Second pass: create output string and fill it */
   3121     u = PyString_FromStringAndSize(NULL, i + j);
   3122     if (!u)
   3123         return NULL;
   3124 
   3125     j = 0; /* same as in first pass */
   3126     q = PyString_AS_STRING(u); /* next output char */
   3127     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
   3128 
   3129     for (p = PyString_AS_STRING(self); p < e; p++) {
   3130         if (*p == '\t') {
   3131             if (tabsize > 0) {
   3132                 i = tabsize - (j % tabsize);
   3133                 j += i;
   3134                 while (i--) {
   3135                     if (q >= qe)
   3136                         goto overflow2;
   3137                     *q++ = ' ';
   3138                 }
   3139             }
   3140         }
   3141         else {
   3142             if (q >= qe)
   3143                 goto overflow2;
   3144             *q++ = *p;
   3145             j++;
   3146             if (*p == '\n' || *p == '\r')
   3147                 j = 0;
   3148         }
   3149     }
   3150 
   3151     return u;
   3152 
   3153   overflow2:
   3154     Py_DECREF(u);
   3155   overflow1:
   3156     PyErr_SetString(PyExc_OverflowError, "new string is too long");
   3157     return NULL;
   3158 }
   3159 
   3160 Py_LOCAL_INLINE(PyObject *)
   3161 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
   3162 {
   3163     PyObject *u;
   3164 
   3165     if (left < 0)
   3166         left = 0;
   3167     if (right < 0)
   3168         right = 0;
   3169 
   3170     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
   3171         Py_INCREF(self);
   3172         return (PyObject *)self;
   3173     }
   3174 
   3175     u = PyString_FromStringAndSize(NULL,
   3176                                    left + PyString_GET_SIZE(self) + right);
   3177     if (u) {
   3178         if (left)
   3179             memset(PyString_AS_STRING(u), fill, left);
   3180         Py_MEMCPY(PyString_AS_STRING(u) + left,
   3181                PyString_AS_STRING(self),
   3182                PyString_GET_SIZE(self));
   3183         if (right)
   3184             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
   3185                fill, right);
   3186     }
   3187 
   3188     return u;
   3189 }
   3190 
   3191 PyDoc_STRVAR(ljust__doc__,
   3192 "S.ljust(width[, fillchar]) -> string\n"
   3193 "\n"
   3194 "Return S left-justified in a string of length width. Padding is\n"
   3195 "done using the specified fill character (default is a space).");
   3196 
   3197 static PyObject *
   3198 string_ljust(PyStringObject *self, PyObject *args)
   3199 {
   3200     Py_ssize_t width;
   3201     char fillchar = ' ';
   3202 
   3203     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
   3204         return NULL;
   3205 
   3206     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
   3207         Py_INCREF(self);
   3208         return (PyObject*) self;
   3209     }
   3210 
   3211     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
   3212 }
   3213 
   3214 
   3215 PyDoc_STRVAR(rjust__doc__,
   3216 "S.rjust(width[, fillchar]) -> string\n"
   3217 "\n"
   3218 "Return S right-justified in a string of length width. Padding is\n"
   3219 "done using the specified fill character (default is a space)");
   3220 
   3221 static PyObject *
   3222 string_rjust(PyStringObject *self, PyObject *args)
   3223 {
   3224     Py_ssize_t width;
   3225     char fillchar = ' ';
   3226 
   3227     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
   3228         return NULL;
   3229 
   3230     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
   3231         Py_INCREF(self);
   3232         return (PyObject*) self;
   3233     }
   3234 
   3235     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
   3236 }
   3237 
   3238 
   3239 PyDoc_STRVAR(center__doc__,
   3240 "S.center(width[, fillchar]) -> string\n"
   3241 "\n"
   3242 "Return S centered in a string of length width. Padding is\n"
   3243 "done using the specified fill character (default is a space)");
   3244 
   3245 static PyObject *
   3246 string_center(PyStringObject *self, PyObject *args)
   3247 {
   3248     Py_ssize_t marg, left;
   3249     Py_ssize_t width;
   3250     char fillchar = ' ';
   3251 
   3252     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
   3253         return NULL;
   3254 
   3255     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
   3256         Py_INCREF(self);
   3257         return (PyObject*) self;
   3258     }
   3259 
   3260     marg = width - PyString_GET_SIZE(self);
   3261     left = marg / 2 + (marg & width & 1);
   3262 
   3263     return pad(self, left, marg - left, fillchar);
   3264 }
   3265 
   3266 PyDoc_STRVAR(zfill__doc__,
   3267 "S.zfill(width) -> string\n"
   3268 "\n"
   3269 "Pad a numeric string S with zeros on the left, to fill a field\n"
   3270 "of the specified width.  The string S is never truncated.");
   3271 
   3272 static PyObject *
   3273 string_zfill(PyStringObject *self, PyObject *args)
   3274 {
   3275     Py_ssize_t fill;
   3276     PyObject *s;
   3277     char *p;
   3278     Py_ssize_t width;
   3279 
   3280     if (!PyArg_ParseTuple(args, "n:zfill", &width))
   3281         return NULL;
   3282 
   3283     if (PyString_GET_SIZE(self) >= width) {
   3284         if (PyString_CheckExact(self)) {
   3285             Py_INCREF(self);
   3286             return (PyObject*) self;
   3287         }
   3288         else
   3289             return PyString_FromStringAndSize(
   3290             PyString_AS_STRING(self),
   3291             PyString_GET_SIZE(self)
   3292             );
   3293     }
   3294 
   3295     fill = width - PyString_GET_SIZE(self);
   3296 
   3297     s = pad(self, fill, 0, '0');
   3298 
   3299     if (s == NULL)
   3300         return NULL;
   3301 
   3302     p = PyString_AS_STRING(s);
   3303     if (p[fill] == '+' || p[fill] == '-') {
   3304         /* move sign to beginning of string */
   3305         p[0] = p[fill];
   3306         p[fill] = '0';
   3307     }
   3308 
   3309     return (PyObject*) s;
   3310 }
   3311 
   3312 PyDoc_STRVAR(isspace__doc__,
   3313 "S.isspace() -> bool\n\
   3314 \n\
   3315 Return True if all characters in S are whitespace\n\
   3316 and there is at least one character in S, False otherwise.");
   3317 
   3318 static PyObject*
   3319 string_isspace(PyStringObject *self)
   3320 {
   3321     register const unsigned char *p
   3322         = (unsigned char *) PyString_AS_STRING(self);
   3323     register const unsigned char *e;
   3324 
   3325     /* Shortcut for single character strings */
   3326     if (PyString_GET_SIZE(self) == 1 &&
   3327         isspace(*p))
   3328         return PyBool_FromLong(1);
   3329 
   3330     /* Special case for empty strings */
   3331     if (PyString_GET_SIZE(self) == 0)
   3332         return PyBool_FromLong(0);
   3333 
   3334     e = p + PyString_GET_SIZE(self);
   3335     for (; p < e; p++) {
   3336         if (!isspace(*p))
   3337             return PyBool_FromLong(0);
   3338     }
   3339     return PyBool_FromLong(1);
   3340 }
   3341 
   3342 
   3343 PyDoc_STRVAR(isalpha__doc__,
   3344 "S.isalpha() -> bool\n\
   3345 \n\
   3346 Return True if all characters in S are alphabetic\n\
   3347 and there is at least one character in S, False otherwise.");
   3348 
   3349 static PyObject*
   3350 string_isalpha(PyStringObject *self)
   3351 {
   3352     register const unsigned char *p
   3353         = (unsigned char *) PyString_AS_STRING(self);
   3354     register const unsigned char *e;
   3355 
   3356     /* Shortcut for single character strings */
   3357     if (PyString_GET_SIZE(self) == 1 &&
   3358         isalpha(*p))
   3359         return PyBool_FromLong(1);
   3360 
   3361     /* Special case for empty strings */
   3362     if (PyString_GET_SIZE(self) == 0)
   3363         return PyBool_FromLong(0);
   3364 
   3365     e = p + PyString_GET_SIZE(self);
   3366     for (; p < e; p++) {
   3367         if (!isalpha(*p))
   3368             return PyBool_FromLong(0);
   3369     }
   3370     return PyBool_FromLong(1);
   3371 }
   3372 
   3373 
   3374 PyDoc_STRVAR(isalnum__doc__,
   3375 "S.isalnum() -> bool\n\
   3376 \n\
   3377 Return True if all characters in S are alphanumeric\n\
   3378 and there is at least one character in S, False otherwise.");
   3379 
   3380 static PyObject*
   3381 string_isalnum(PyStringObject *self)
   3382 {
   3383     register const unsigned char *p
   3384         = (unsigned char *) PyString_AS_STRING(self);
   3385     register const unsigned char *e;
   3386 
   3387     /* Shortcut for single character strings */
   3388     if (PyString_GET_SIZE(self) == 1 &&
   3389         isalnum(*p))
   3390         return PyBool_FromLong(1);
   3391 
   3392     /* Special case for empty strings */
   3393     if (PyString_GET_SIZE(self) == 0)
   3394         return PyBool_FromLong(0);
   3395 
   3396     e = p + PyString_GET_SIZE(self);
   3397     for (; p < e; p++) {
   3398         if (!isalnum(*p))
   3399             return PyBool_FromLong(0);
   3400     }
   3401     return PyBool_FromLong(1);
   3402 }
   3403 
   3404 
   3405 PyDoc_STRVAR(isdigit__doc__,
   3406 "S.isdigit() -> bool\n\
   3407 \n\
   3408 Return True if all characters in S are digits\n\
   3409 and there is at least one character in S, False otherwise.");
   3410 
   3411 static PyObject*
   3412 string_isdigit(PyStringObject *self)
   3413 {
   3414     register const unsigned char *p
   3415         = (unsigned char *) PyString_AS_STRING(self);
   3416     register const unsigned char *e;
   3417 
   3418     /* Shortcut for single character strings */
   3419     if (PyString_GET_SIZE(self) == 1 &&
   3420         isdigit(*p))
   3421         return PyBool_FromLong(1);
   3422 
   3423     /* Special case for empty strings */
   3424     if (PyString_GET_SIZE(self) == 0)
   3425         return PyBool_FromLong(0);
   3426 
   3427     e = p + PyString_GET_SIZE(self);
   3428     for (; p < e; p++) {
   3429         if (!isdigit(*p))
   3430             return PyBool_FromLong(0);
   3431     }
   3432     return PyBool_FromLong(1);
   3433 }
   3434 
   3435 
   3436 PyDoc_STRVAR(islower__doc__,
   3437 "S.islower() -> bool\n\
   3438 \n\
   3439 Return True if all cased characters in S are lowercase and there is\n\
   3440 at least one cased character in S, False otherwise.");
   3441 
   3442 static PyObject*
   3443 string_islower(PyStringObject *self)
   3444 {
   3445     register const unsigned char *p
   3446         = (unsigned char *) PyString_AS_STRING(self);
   3447     register const unsigned char *e;
   3448     int cased;
   3449 
   3450     /* Shortcut for single character strings */
   3451     if (PyString_GET_SIZE(self) == 1)
   3452         return PyBool_FromLong(islower(*p) != 0);
   3453 
   3454     /* Special case for empty strings */
   3455     if (PyString_GET_SIZE(self) == 0)
   3456         return PyBool_FromLong(0);
   3457 
   3458     e = p + PyString_GET_SIZE(self);
   3459     cased = 0;
   3460     for (; p < e; p++) {
   3461         if (isupper(*p))
   3462             return PyBool_FromLong(0);
   3463         else if (!cased && islower(*p))
   3464             cased = 1;
   3465     }
   3466     return PyBool_FromLong(cased);
   3467 }
   3468 
   3469 
   3470 PyDoc_STRVAR(isupper__doc__,
   3471 "S.isupper() -> bool\n\
   3472 \n\
   3473 Return True if all cased characters in S are uppercase and there is\n\
   3474 at least one cased character in S, False otherwise.");
   3475 
   3476 static PyObject*
   3477 string_isupper(PyStringObject *self)
   3478 {
   3479     register const unsigned char *p
   3480         = (unsigned char *) PyString_AS_STRING(self);
   3481     register const unsigned char *e;
   3482     int cased;
   3483 
   3484     /* Shortcut for single character strings */
   3485     if (PyString_GET_SIZE(self) == 1)
   3486         return PyBool_FromLong(isupper(*p) != 0);
   3487 
   3488     /* Special case for empty strings */
   3489     if (PyString_GET_SIZE(self) == 0)
   3490         return PyBool_FromLong(0);
   3491 
   3492     e = p + PyString_GET_SIZE(self);
   3493     cased = 0;
   3494     for (; p < e; p++) {
   3495         if (islower(*p))
   3496             return PyBool_FromLong(0);
   3497         else if (!cased && isupper(*p))
   3498             cased = 1;
   3499     }
   3500     return PyBool_FromLong(cased);
   3501 }
   3502 
   3503 
   3504 PyDoc_STRVAR(istitle__doc__,
   3505 "S.istitle() -> bool\n\
   3506 \n\
   3507 Return True if S is a titlecased string and there is at least one\n\
   3508 character in S, i.e. uppercase characters may only follow uncased\n\
   3509 characters and lowercase characters only cased ones. Return False\n\
   3510 otherwise.");
   3511 
   3512 static PyObject*
   3513 string_istitle(PyStringObject *self, PyObject *uncased)
   3514 {
   3515     register const unsigned char *p
   3516         = (unsigned char *) PyString_AS_STRING(self);
   3517     register const unsigned char *e;
   3518     int cased, previous_is_cased;
   3519 
   3520     /* Shortcut for single character strings */
   3521     if (PyString_GET_SIZE(self) == 1)
   3522         return PyBool_FromLong(isupper(*p) != 0);
   3523 
   3524     /* Special case for empty strings */
   3525     if (PyString_GET_SIZE(self) == 0)
   3526         return PyBool_FromLong(0);
   3527 
   3528     e = p + PyString_GET_SIZE(self);
   3529     cased = 0;
   3530     previous_is_cased = 0;
   3531     for (; p < e; p++) {
   3532         register const unsigned char ch = *p;
   3533 
   3534         if (isupper(ch)) {
   3535             if (previous_is_cased)
   3536                 return PyBool_FromLong(0);
   3537             previous_is_cased = 1;
   3538             cased = 1;
   3539         }
   3540         else if (islower(ch)) {
   3541             if (!previous_is_cased)
   3542                 return PyBool_FromLong(0);
   3543             previous_is_cased = 1;
   3544             cased = 1;
   3545         }
   3546         else
   3547             previous_is_cased = 0;
   3548     }
   3549     return PyBool_FromLong(cased);
   3550 }
   3551 
   3552 
   3553 PyDoc_STRVAR(splitlines__doc__,
   3554 "S.splitlines(keepends=False) -> list of strings\n\
   3555 \n\
   3556 Return a list of the lines in S, breaking at line boundaries.\n\
   3557 Line breaks are not included in the resulting list unless keepends\n\
   3558 is given and true.");
   3559 
   3560 static PyObject*
   3561 string_splitlines(PyStringObject *self, PyObject *args)
   3562 {
   3563     int keepends = 0;
   3564 
   3565     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
   3566         return NULL;
   3567 
   3568     return stringlib_splitlines(
   3569         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
   3570         keepends
   3571     );
   3572 }
   3573 
   3574 PyDoc_STRVAR(sizeof__doc__,
   3575 "S.__sizeof__() -> size of S in memory, in bytes");
   3576 
   3577 static PyObject *
   3578 string_sizeof(PyStringObject *v)
   3579 {
   3580     Py_ssize_t res;
   3581     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
   3582     return PyInt_FromSsize_t(res);
   3583 }
   3584 
   3585 static PyObject *
   3586 string_getnewargs(PyStringObject *v)
   3587 {
   3588     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
   3589 }
   3590 
   3591 
   3592 #include "stringlib/string_format.h"
   3593 
   3594 PyDoc_STRVAR(format__doc__,
   3595 "S.format(*args, **kwargs) -> string\n\
   3596 \n\
   3597 Return a formatted version of S, using substitutions from args and kwargs.\n\
   3598 The substitutions are identified by braces ('{' and '}').");
   3599 
   3600 static PyObject *
   3601 string__format__(PyObject* self, PyObject* args)
   3602 {
   3603     PyObject *format_spec;
   3604     PyObject *result = NULL;
   3605     PyObject *tmp = NULL;
   3606 
   3607     /* If 2.x, convert format_spec to the same type as value */
   3608     /* This is to allow things like u''.format('') */
   3609     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
   3610         goto done;
   3611     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
   3612         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
   3613                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
   3614         goto done;
   3615     }
   3616     tmp = PyObject_Str(format_spec);
   3617     if (tmp == NULL)
   3618         goto done;
   3619     format_spec = tmp;
   3620 
   3621     result = _PyBytes_FormatAdvanced(self,
   3622                                      PyString_AS_STRING(format_spec),
   3623                                      PyString_GET_SIZE(format_spec));
   3624 done:
   3625     Py_XDECREF(tmp);
   3626     return result;
   3627 }
   3628 
   3629 PyDoc_STRVAR(p_format__doc__,
   3630 "S.__format__(format_spec) -> string\n\
   3631 \n\
   3632 Return a formatted version of S as described by format_spec.");
   3633 
   3634 
   3635 static PyMethodDef
   3636 string_methods[] = {
   3637     /* Counterparts of the obsolete stropmodule functions; except
   3638        string.maketrans(). */
   3639     {"join", (PyCFunction)string_join, METH_O, join__doc__},
   3640     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
   3641     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
   3642     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
   3643     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
   3644     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
   3645     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
   3646     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
   3647     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
   3648     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
   3649     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
   3650     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
   3651     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
   3652      capitalize__doc__},
   3653     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
   3654     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
   3655      endswith__doc__},
   3656     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
   3657     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
   3658     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
   3659     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
   3660     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
   3661     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
   3662     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
   3663     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
   3664     {"rpartition", (PyCFunction)string_rpartition, METH_O,
   3665      rpartition__doc__},
   3666     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
   3667      startswith__doc__},
   3668     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
   3669     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
   3670      swapcase__doc__},
   3671     {"translate", (PyCFunction)string_translate, METH_VARARGS,
   3672      translate__doc__},
   3673     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
   3674     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
   3675     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
   3676     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
   3677     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
   3678     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
   3679     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
   3680     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
   3681     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
   3682     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
   3683     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
   3684     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
   3685      expandtabs__doc__},
   3686     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
   3687      splitlines__doc__},
   3688     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
   3689      sizeof__doc__},
   3690     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
   3691     {NULL,     NULL}                         /* sentinel */
   3692 };
   3693 
   3694 static PyObject *
   3695 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
   3696 
   3697 static PyObject *
   3698 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   3699 {
   3700     PyObject *x = NULL;
   3701     static char *kwlist[] = {"object", 0};
   3702 
   3703     if (type != &PyString_Type)
   3704         return str_subtype_new(type, args, kwds);
   3705     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
   3706         return NULL;
   3707     if (x == NULL)
   3708         return PyString_FromString("");
   3709     return PyObject_Str(x);
   3710 }
   3711 
   3712 static PyObject *
   3713 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   3714 {
   3715     PyObject *tmp, *pnew;
   3716     Py_ssize_t n;
   3717 
   3718     assert(PyType_IsSubtype(type, &PyString_Type));
   3719     tmp = string_new(&PyString_Type, args, kwds);
   3720     if (tmp == NULL)
   3721         return NULL;
   3722     assert(PyString_CheckExact(tmp));
   3723     n = PyString_GET_SIZE(tmp);
   3724     pnew = type->tp_alloc(type, n);
   3725     if (pnew != NULL) {
   3726         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
   3727         ((PyStringObject *)pnew)->ob_shash =
   3728             ((PyStringObject *)tmp)->ob_shash;
   3729         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
   3730     }
   3731     Py_DECREF(tmp);
   3732     return pnew;
   3733 }
   3734 
   3735 static PyObject *
   3736 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   3737 {
   3738     PyErr_SetString(PyExc_TypeError,
   3739                     "The basestring type cannot be instantiated");
   3740     return NULL;
   3741 }
   3742 
   3743 static PyObject *
   3744 string_mod(PyObject *v, PyObject *w)
   3745 {
   3746     if (!PyString_Check(v)) {
   3747         Py_INCREF(Py_NotImplemented);
   3748         return Py_NotImplemented;
   3749     }
   3750     return PyString_Format(v, w);
   3751 }
   3752 
   3753 PyDoc_STRVAR(basestring_doc,
   3754 "Type basestring cannot be instantiated; it is the base for str and unicode.");
   3755 
   3756 static PyNumberMethods string_as_number = {
   3757     0,                          /*nb_add*/
   3758     0,                          /*nb_subtract*/
   3759     0,                          /*nb_multiply*/
   3760     0,                          /*nb_divide*/
   3761     string_mod,                 /*nb_remainder*/
   3762 };
   3763 
   3764 
   3765 PyTypeObject PyBaseString_Type = {
   3766     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   3767     "basestring",
   3768     0,
   3769     0,
   3770     0,                                          /* tp_dealloc */
   3771     0,                                          /* tp_print */
   3772     0,                                          /* tp_getattr */
   3773     0,                                          /* tp_setattr */
   3774     0,                                          /* tp_compare */
   3775     0,                                          /* tp_repr */
   3776     0,                                          /* tp_as_number */
   3777     0,                                          /* tp_as_sequence */
   3778     0,                                          /* tp_as_mapping */
   3779     0,                                          /* tp_hash */
   3780     0,                                          /* tp_call */
   3781     0,                                          /* tp_str */
   3782     0,                                          /* tp_getattro */
   3783     0,                                          /* tp_setattro */
   3784     0,                                          /* tp_as_buffer */
   3785     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
   3786     basestring_doc,                             /* tp_doc */
   3787     0,                                          /* tp_traverse */
   3788     0,                                          /* tp_clear */
   3789     0,                                          /* tp_richcompare */
   3790     0,                                          /* tp_weaklistoffset */
   3791     0,                                          /* tp_iter */
   3792     0,                                          /* tp_iternext */
   3793     0,                                          /* tp_methods */
   3794     0,                                          /* tp_members */
   3795     0,                                          /* tp_getset */
   3796     &PyBaseObject_Type,                         /* tp_base */
   3797     0,                                          /* tp_dict */
   3798     0,                                          /* tp_descr_get */
   3799     0,                                          /* tp_descr_set */
   3800     0,                                          /* tp_dictoffset */
   3801     0,                                          /* tp_init */
   3802     0,                                          /* tp_alloc */
   3803     basestring_new,                             /* tp_new */
   3804     0,                                          /* tp_free */
   3805 };
   3806 
   3807 PyDoc_STRVAR(string_doc,
   3808 "str(object='') -> string\n\
   3809 \n\
   3810 Return a nice string representation of the object.\n\
   3811 If the argument is a string, the return value is the same object.");
   3812 
   3813 PyTypeObject PyString_Type = {
   3814     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   3815     "str",
   3816     PyStringObject_SIZE,
   3817     sizeof(char),
   3818     string_dealloc,                             /* tp_dealloc */
   3819     (printfunc)string_print,                    /* tp_print */
   3820     0,                                          /* tp_getattr */
   3821     0,                                          /* tp_setattr */
   3822     0,                                          /* tp_compare */
   3823     string_repr,                                /* tp_repr */
   3824     &string_as_number,                          /* tp_as_number */
   3825     &string_as_sequence,                        /* tp_as_sequence */
   3826     &string_as_mapping,                         /* tp_as_mapping */
   3827     (hashfunc)string_hash,                      /* tp_hash */
   3828     0,                                          /* tp_call */
   3829     string_str,                                 /* tp_str */
   3830     PyObject_GenericGetAttr,                    /* tp_getattro */
   3831     0,                                          /* tp_setattro */
   3832     &string_as_buffer,                          /* tp_as_buffer */
   3833     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
   3834         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
   3835         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
   3836     string_doc,                                 /* tp_doc */
   3837     0,                                          /* tp_traverse */
   3838     0,                                          /* tp_clear */
   3839     (richcmpfunc)string_richcompare,            /* tp_richcompare */
   3840     0,                                          /* tp_weaklistoffset */
   3841     0,                                          /* tp_iter */
   3842     0,                                          /* tp_iternext */
   3843     string_methods,                             /* tp_methods */
   3844     0,                                          /* tp_members */
   3845     0,                                          /* tp_getset */
   3846     &PyBaseString_Type,                         /* tp_base */
   3847     0,                                          /* tp_dict */
   3848     0,                                          /* tp_descr_get */
   3849     0,                                          /* tp_descr_set */
   3850     0,                                          /* tp_dictoffset */
   3851     0,                                          /* tp_init */
   3852     0,                                          /* tp_alloc */
   3853     string_new,                                 /* tp_new */
   3854     PyObject_Del,                               /* tp_free */
   3855 };
   3856 
   3857 void
   3858 PyString_Concat(register PyObject **pv, register PyObject *w)
   3859 {
   3860     register PyObject *v;
   3861     if (*pv == NULL)
   3862         return;
   3863     if (w == NULL || !PyString_Check(*pv)) {
   3864         Py_CLEAR(*pv);
   3865         return;
   3866     }
   3867     v = string_concat((PyStringObject *) *pv, w);
   3868     Py_DECREF(*pv);
   3869     *pv = v;
   3870 }
   3871 
   3872 void
   3873 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
   3874 {
   3875     PyString_Concat(pv, w);
   3876     Py_XDECREF(w);
   3877 }
   3878 
   3879 
   3880 /* The following function breaks the notion that strings are immutable:
   3881    it changes the size of a string.  We get away with this only if there
   3882    is only one module referencing the object.  You can also think of it
   3883    as creating a new string object and destroying the old one, only
   3884    more efficiently.  In any case, don't use this if the string may
   3885    already be known to some other part of the code...
   3886    Note that if there's not enough memory to resize the string, the original
   3887    string object at *pv is deallocated, *pv is set to NULL, an "out of
   3888    memory" exception is set, and -1 is returned.  Else (on success) 0 is
   3889    returned, and the value in *pv may or may not be the same as on input.
   3890    As always, an extra byte is allocated for a trailing \0 byte (newsize
   3891    does *not* include that), and a trailing \0 byte is stored.
   3892 */
   3893 
   3894 int
   3895 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
   3896 {
   3897     register PyObject *v;
   3898     register PyStringObject *sv;
   3899     v = *pv;
   3900     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
   3901         PyString_CHECK_INTERNED(v)) {
   3902         *pv = 0;
   3903         Py_DECREF(v);
   3904         PyErr_BadInternalCall();
   3905         return -1;
   3906     }
   3907     /* XXX UNREF/NEWREF interface should be more symmetrical */
   3908     _Py_DEC_REFTOTAL;
   3909     _Py_ForgetReference(v);
   3910     *pv = (PyObject *)
   3911         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
   3912     if (*pv == NULL) {
   3913         PyObject_Del(v);
   3914         PyErr_NoMemory();
   3915         return -1;
   3916     }
   3917     _Py_NewReference(*pv);
   3918     sv = (PyStringObject *) *pv;
   3919     Py_SIZE(sv) = newsize;
   3920     sv->ob_sval[newsize] = '\0';
   3921     sv->ob_shash = -1;          /* invalidate cached hash value */
   3922     return 0;
   3923 }
   3924 
   3925 /* Helpers for formatstring */
   3926 
   3927 Py_LOCAL_INLINE(PyObject *)
   3928 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
   3929 {
   3930     Py_ssize_t argidx = *p_argidx;
   3931     if (argidx < arglen) {
   3932         (*p_argidx)++;
   3933         if (arglen < 0)
   3934             return args;
   3935         else
   3936             return PyTuple_GetItem(args, argidx);
   3937     }
   3938     PyErr_SetString(PyExc_TypeError,
   3939                     "not enough arguments for format string");
   3940     return NULL;
   3941 }
   3942 
   3943 /* Format codes
   3944  * F_LJUST      '-'
   3945  * F_SIGN       '+'
   3946  * F_BLANK      ' '
   3947  * F_ALT        '#'
   3948  * F_ZERO       '0'
   3949  */
   3950 #define F_LJUST (1<<0)
   3951 #define F_SIGN  (1<<1)
   3952 #define F_BLANK (1<<2)
   3953 #define F_ALT   (1<<3)
   3954 #define F_ZERO  (1<<4)
   3955 
   3956 /* Returns a new reference to a PyString object, or NULL on failure. */
   3957 
   3958 static PyObject *
   3959 formatfloat(PyObject *v, int flags, int prec, int type)
   3960 {
   3961     char *p;
   3962     PyObject *result;
   3963     double x;
   3964 
   3965     x = PyFloat_AsDouble(v);
   3966     if (x == -1.0 && PyErr_Occurred()) {
   3967         PyErr_Format(PyExc_TypeError, "float argument required, "
   3968                      "not %.200s", Py_TYPE(v)->tp_name);
   3969         return NULL;
   3970     }
   3971 
   3972     if (prec < 0)
   3973         prec = 6;
   3974 
   3975     p = PyOS_double_to_string(x, type, prec,
   3976                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
   3977 
   3978     if (p == NULL)
   3979         return NULL;
   3980     result = PyString_FromStringAndSize(p, strlen(p));
   3981     PyMem_Free(p);
   3982     return result;
   3983 }
   3984 
   3985 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
   3986  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
   3987  * Python's regular ints.
   3988  * Return value:  a new PyString*, or NULL if error.
   3989  *  .  *pbuf is set to point into it,
   3990  *     *plen set to the # of chars following that.
   3991  *     Caller must decref it when done using pbuf.
   3992  *     The string starting at *pbuf is of the form
   3993  *         "-"? ("0x" | "0X")? digit+
   3994  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
   3995  *         set in flags.  The case of hex digits will be correct,
   3996  *     There will be at least prec digits, zero-filled on the left if
   3997  *         necessary to get that many.
   3998  * val          object to be converted
   3999  * flags        bitmask of format flags; only F_ALT is looked at
   4000  * prec         minimum number of digits; 0-fill on left if needed
   4001  * type         a character in [duoxX]; u acts the same as d
   4002  *
   4003  * CAUTION:  o, x and X conversions on regular ints can never
   4004  * produce a '-' sign, but can for Python's unbounded ints.
   4005  */
   4006 PyObject*
   4007 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
   4008                      char **pbuf, int *plen)
   4009 {
   4010     PyObject *result = NULL;
   4011     char *buf;
   4012     Py_ssize_t i;
   4013     int sign;           /* 1 if '-', else 0 */
   4014     int len;            /* number of characters */
   4015     Py_ssize_t llen;
   4016     int numdigits;      /* len == numnondigits + numdigits */
   4017     int numnondigits = 0;
   4018 
   4019     switch (type) {
   4020     case 'd':
   4021     case 'u':
   4022         result = Py_TYPE(val)->tp_str(val);
   4023         break;
   4024     case 'o':
   4025         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
   4026         break;
   4027     case 'x':
   4028     case 'X':
   4029         numnondigits = 2;
   4030         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
   4031         break;
   4032     default:
   4033         assert(!"'type' not in [duoxX]");
   4034     }
   4035     if (!result)
   4036         return NULL;
   4037 
   4038     buf = PyString_AsString(result);
   4039     if (!buf) {
   4040         Py_DECREF(result);
   4041         return NULL;
   4042     }
   4043 
   4044     /* To modify the string in-place, there can only be one reference. */
   4045     if (Py_REFCNT(result) != 1) {
   4046         PyErr_BadInternalCall();
   4047         return NULL;
   4048     }
   4049     llen = PyString_Size(result);
   4050     if (llen > INT_MAX) {
   4051         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
   4052         return NULL;
   4053     }
   4054     len = (int)llen;
   4055     if (buf[len-1] == 'L') {
   4056         --len;
   4057         buf[len] = '\0';
   4058     }
   4059     sign = buf[0] == '-';
   4060     numnondigits += sign;
   4061     numdigits = len - numnondigits;
   4062     assert(numdigits > 0);
   4063 
   4064     /* Get rid of base marker unless F_ALT */
   4065     if ((flags & F_ALT) == 0) {
   4066         /* Need to skip 0x, 0X or 0. */
   4067         int skipped = 0;
   4068         switch (type) {
   4069         case 'o':
   4070             assert(buf[sign] == '0');
   4071             /* If 0 is only digit, leave it alone. */
   4072             if (numdigits > 1) {
   4073                 skipped = 1;
   4074                 --numdigits;
   4075             }
   4076             break;
   4077         case 'x':
   4078         case 'X':
   4079             assert(buf[sign] == '0');
   4080             assert(buf[sign + 1] == 'x');
   4081             skipped = 2;
   4082             numnondigits -= 2;
   4083             break;
   4084         }
   4085         if (skipped) {
   4086             buf += skipped;
   4087             len -= skipped;
   4088             if (sign)
   4089                 buf[0] = '-';
   4090         }
   4091         assert(len == numnondigits + numdigits);
   4092         assert(numdigits > 0);
   4093     }
   4094 
   4095     /* Fill with leading zeroes to meet minimum width. */
   4096     if (prec > numdigits) {
   4097         PyObject *r1 = PyString_FromStringAndSize(NULL,
   4098                                 numnondigits + prec);
   4099         char *b1;
   4100         if (!r1) {
   4101             Py_DECREF(result);
   4102             return NULL;
   4103         }
   4104         b1 = PyString_AS_STRING(r1);
   4105         for (i = 0; i < numnondigits; ++i)
   4106             *b1++ = *buf++;
   4107         for (i = 0; i < prec - numdigits; i++)
   4108             *b1++ = '0';
   4109         for (i = 0; i < numdigits; i++)
   4110             *b1++ = *buf++;
   4111         *b1 = '\0';
   4112         Py_DECREF(result);
   4113         result = r1;
   4114         buf = PyString_AS_STRING(result);
   4115         len = numnondigits + prec;
   4116     }
   4117 
   4118     /* Fix up case for hex conversions. */
   4119     if (type == 'X') {
   4120         /* Need to convert all lower case letters to upper case.
   4121            and need to convert 0x to 0X (and -0x to -0X). */
   4122         for (i = 0; i < len; i++)
   4123             if (buf[i] >= 'a' && buf[i] <= 'x')
   4124                 buf[i] -= 'a'-'A';
   4125     }
   4126     *pbuf = buf;
   4127     *plen = len;
   4128     return result;
   4129 }
   4130 
   4131 Py_LOCAL_INLINE(int)
   4132 formatint(char *buf, size_t buflen, int flags,
   4133           int prec, int type, PyObject *v)
   4134 {
   4135     /* fmt = '%#.' + `prec` + 'l' + `type`
   4136        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
   4137        + 1 + 1 = 24 */
   4138     char fmt[64];       /* plenty big enough! */
   4139     char *sign;
   4140     long x;
   4141 
   4142     x = PyInt_AsLong(v);
   4143     if (x == -1 && PyErr_Occurred()) {
   4144         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
   4145                      Py_TYPE(v)->tp_name);
   4146         return -1;
   4147     }
   4148     if (x < 0 && type == 'u') {
   4149         type = 'd';
   4150     }
   4151     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
   4152         sign = "-";
   4153     else
   4154         sign = "";
   4155     if (prec < 0)
   4156         prec = 1;
   4157 
   4158     if ((flags & F_ALT) &&
   4159         (type == 'x' || type == 'X')) {
   4160         /* When converting under %#x or %#X, there are a number
   4161          * of issues that cause pain:
   4162          * - when 0 is being converted, the C standard leaves off
   4163          *   the '0x' or '0X', which is inconsistent with other
   4164          *   %#x/%#X conversions and inconsistent with Python's
   4165          *   hex() function
   4166          * - there are platforms that violate the standard and
   4167          *   convert 0 with the '0x' or '0X'
   4168          *   (Metrowerks, Compaq Tru64)
   4169          * - there are platforms that give '0x' when converting
   4170          *   under %#X, but convert 0 in accordance with the
   4171          *   standard (OS/2 EMX)
   4172          *
   4173          * We can achieve the desired consistency by inserting our
   4174          * own '0x' or '0X' prefix, and substituting %x/%X in place
   4175          * of %#x/%#X.
   4176          *
   4177          * Note that this is the same approach as used in
   4178          * formatint() in unicodeobject.c
   4179          */
   4180         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
   4181                       sign, type, prec, type);
   4182     }
   4183     else {
   4184         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
   4185                       sign, (flags&F_ALT) ? "#" : "",
   4186                       prec, type);
   4187     }
   4188 
   4189     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
   4190      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
   4191      */
   4192     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
   4193         PyErr_SetString(PyExc_OverflowError,
   4194             "formatted integer is too long (precision too large?)");
   4195         return -1;
   4196     }
   4197     if (sign[0])
   4198         PyOS_snprintf(buf, buflen, fmt, -x);
   4199     else
   4200         PyOS_snprintf(buf, buflen, fmt, x);
   4201     return (int)strlen(buf);
   4202 }
   4203 
   4204 Py_LOCAL_INLINE(int)
   4205 formatchar(char *buf, size_t buflen, PyObject *v)
   4206 {
   4207     /* presume that the buffer is at least 2 characters long */
   4208     if (PyString_Check(v)) {
   4209         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
   4210             return -1;
   4211     }
   4212     else {
   4213         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
   4214             return -1;
   4215     }
   4216     buf[1] = '\0';
   4217     return 1;
   4218 }
   4219 
   4220 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
   4221 
   4222    FORMATBUFLEN is the length of the buffer in which the ints &
   4223    chars are formatted. XXX This is a magic number. Each formatting
   4224    routine does bounds checking to ensure no overflow, but a better
   4225    solution may be to malloc a buffer of appropriate size for each
   4226    format. For now, the current solution is sufficient.
   4227 */
   4228 #define FORMATBUFLEN (size_t)120
   4229 
   4230 PyObject *
   4231 PyString_Format(PyObject *format, PyObject *args)
   4232 {
   4233     char *fmt, *res;
   4234     Py_ssize_t arglen, argidx;
   4235     Py_ssize_t reslen, rescnt, fmtcnt;
   4236     int args_owned = 0;
   4237     PyObject *result, *orig_args;
   4238 #ifdef Py_USING_UNICODE
   4239     PyObject *v, *w;
   4240 #endif
   4241     PyObject *dict = NULL;
   4242     if (format == NULL || !PyString_Check(format) || args == NULL) {
   4243         PyErr_BadInternalCall();
   4244         return NULL;
   4245     }
   4246     orig_args = args;
   4247     fmt = PyString_AS_STRING(format);
   4248     fmtcnt = PyString_GET_SIZE(format);
   4249     reslen = rescnt = fmtcnt + 100;
   4250     result = PyString_FromStringAndSize((char *)NULL, reslen);
   4251     if (result == NULL)
   4252         return NULL;
   4253     res = PyString_AsString(result);
   4254     if (PyTuple_Check(args)) {
   4255         arglen = PyTuple_GET_SIZE(args);
   4256         argidx = 0;
   4257     }
   4258     else {
   4259         arglen = -1;
   4260         argidx = -2;
   4261     }
   4262     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
   4263         !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
   4264         dict = args;
   4265     while (--fmtcnt >= 0) {
   4266         if (*fmt != '%') {
   4267             if (--rescnt < 0) {
   4268                 rescnt = fmtcnt + 100;
   4269                 reslen += rescnt;
   4270                 if (_PyString_Resize(&result, reslen))
   4271                     return NULL;
   4272                 res = PyString_AS_STRING(result)
   4273                     + reslen - rescnt;
   4274                 --rescnt;
   4275             }
   4276             *res++ = *fmt++;
   4277         }
   4278         else {
   4279             /* Got a format specifier */
   4280             int flags = 0;
   4281             Py_ssize_t width = -1;
   4282             int prec = -1;
   4283             int c = '\0';
   4284             int fill;
   4285             int isnumok;
   4286             PyObject *v = NULL;
   4287             PyObject *temp = NULL;
   4288             char *pbuf;
   4289             int sign;
   4290             Py_ssize_t len;
   4291             char formatbuf[FORMATBUFLEN];
   4292                  /* For format{int,char}() */
   4293 #ifdef Py_USING_UNICODE
   4294             char *fmt_start = fmt;
   4295             Py_ssize_t argidx_start = argidx;
   4296 #endif
   4297 
   4298             fmt++;
   4299             if (*fmt == '(') {
   4300                 char *keystart;
   4301                 Py_ssize_t keylen;
   4302                 PyObject *key;
   4303                 int pcount = 1;
   4304 
   4305                 if (dict == NULL) {
   4306                     PyErr_SetString(PyExc_TypeError,
   4307                              "format requires a mapping");
   4308                     goto error;
   4309                 }
   4310                 ++fmt;
   4311                 --fmtcnt;
   4312                 keystart = fmt;
   4313                 /* Skip over balanced parentheses */
   4314                 while (pcount > 0 && --fmtcnt >= 0) {
   4315                     if (*fmt == ')')
   4316                         --pcount;
   4317                     else if (*fmt == '(')
   4318                         ++pcount;
   4319                     fmt++;
   4320                 }
   4321                 keylen = fmt - keystart - 1;
   4322                 if (fmtcnt < 0 || pcount > 0) {
   4323                     PyErr_SetString(PyExc_ValueError,
   4324                                "incomplete format key");
   4325                     goto error;
   4326                 }
   4327                 key = PyString_FromStringAndSize(keystart,
   4328                                                  keylen);
   4329                 if (key == NULL)
   4330                     goto error;
   4331                 if (args_owned) {
   4332                     Py_DECREF(args);
   4333                     args_owned = 0;
   4334                 }
   4335                 args = PyObject_GetItem(dict, key);
   4336                 Py_DECREF(key);
   4337                 if (args == NULL) {
   4338                     goto error;
   4339                 }
   4340                 args_owned = 1;
   4341                 arglen = -1;
   4342                 argidx = -2;
   4343             }
   4344             while (--fmtcnt >= 0) {
   4345                 switch (c = *fmt++) {
   4346                 case '-': flags |= F_LJUST; continue;
   4347                 case '+': flags |= F_SIGN; continue;
   4348                 case ' ': flags |= F_BLANK; continue;
   4349                 case '#': flags |= F_ALT; continue;
   4350                 case '0': flags |= F_ZERO; continue;
   4351                 }
   4352                 break;
   4353             }
   4354             if (c == '*') {
   4355                 v = getnextarg(args, arglen, &argidx);
   4356                 if (v == NULL)
   4357                     goto error;
   4358                 if (!PyInt_Check(v)) {
   4359                     PyErr_SetString(PyExc_TypeError,
   4360                                     "* wants int");
   4361                     goto error;
   4362                 }
   4363                 width = PyInt_AsSsize_t(v);
   4364                 if (width == -1 && PyErr_Occurred())
   4365                     goto error;
   4366                 if (width < 0) {
   4367                     flags |= F_LJUST;
   4368                     width = -width;
   4369                 }
   4370                 if (--fmtcnt >= 0)
   4371                     c = *fmt++;
   4372             }
   4373             else if (c >= 0 && isdigit(c)) {
   4374                 width = c - '0';
   4375                 while (--fmtcnt >= 0) {
   4376                     c = Py_CHARMASK(*fmt++);
   4377                     if (!isdigit(c))
   4378                         break;
   4379                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
   4380                         PyErr_SetString(
   4381                             PyExc_ValueError,
   4382                             "width too big");
   4383                         goto error;
   4384                     }
   4385                     width = width*10 + (c - '0');
   4386                 }
   4387             }
   4388             if (c == '.') {
   4389                 prec = 0;
   4390                 if (--fmtcnt >= 0)
   4391                     c = *fmt++;
   4392                 if (c == '*') {
   4393                     v = getnextarg(args, arglen, &argidx);
   4394                     if (v == NULL)
   4395                         goto error;
   4396                     if (!PyInt_Check(v)) {
   4397                         PyErr_SetString(
   4398                             PyExc_TypeError,
   4399                             "* wants int");
   4400                         goto error;
   4401                     }
   4402                     prec = _PyInt_AsInt(v);
   4403                     if (prec == -1 && PyErr_Occurred())
   4404                         goto error;
   4405                     if (prec < 0)
   4406                         prec = 0;
   4407                     if (--fmtcnt >= 0)
   4408                         c = *fmt++;
   4409                 }
   4410                 else if (c >= 0 && isdigit(c)) {
   4411                     prec = c - '0';
   4412                     while (--fmtcnt >= 0) {
   4413                         c = Py_CHARMASK(*fmt++);
   4414                         if (!isdigit(c))
   4415                             break;
   4416                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
   4417                             PyErr_SetString(
   4418                                 PyExc_ValueError,
   4419                                 "prec too big");
   4420                             goto error;
   4421                         }
   4422                         prec = prec*10 + (c - '0');
   4423                     }
   4424                 }
   4425             } /* prec */
   4426             if (fmtcnt >= 0) {
   4427                 if (c == 'h' || c == 'l' || c == 'L') {
   4428                     if (--fmtcnt >= 0)
   4429                         c = *fmt++;
   4430                 }
   4431             }
   4432             if (fmtcnt < 0) {
   4433                 PyErr_SetString(PyExc_ValueError,
   4434                                 "incomplete format");
   4435                 goto error;
   4436             }
   4437             if (c != '%') {
   4438                 v = getnextarg(args, arglen, &argidx);
   4439                 if (v == NULL)
   4440                     goto error;
   4441             }
   4442             sign = 0;
   4443             fill = ' ';
   4444             switch (c) {
   4445             case '%':
   4446                 pbuf = "%";
   4447                 len = 1;
   4448                 break;
   4449             case 's':
   4450 #ifdef Py_USING_UNICODE
   4451                 if (PyUnicode_Check(v)) {
   4452                     fmt = fmt_start;
   4453                     argidx = argidx_start;
   4454                     goto unicode;
   4455                 }
   4456 #endif
   4457                 temp = _PyObject_Str(v);
   4458 #ifdef Py_USING_UNICODE
   4459                 if (temp != NULL && PyUnicode_Check(temp)) {
   4460                     Py_DECREF(temp);
   4461                     fmt = fmt_start;
   4462                     argidx = argidx_start;
   4463                     goto unicode;
   4464                 }
   4465 #endif
   4466                 /* Fall through */
   4467             case 'r':
   4468                 if (c == 'r')
   4469                     temp = PyObject_Repr(v);
   4470                 if (temp == NULL)
   4471                     goto error;
   4472                 if (!PyString_Check(temp)) {
   4473                     PyErr_SetString(PyExc_TypeError,
   4474                       "%s argument has non-string str()");
   4475                     Py_DECREF(temp);
   4476                     goto error;
   4477                 }
   4478                 pbuf = PyString_AS_STRING(temp);
   4479                 len = PyString_GET_SIZE(temp);
   4480                 if (prec >= 0 && len > prec)
   4481                     len = prec;
   4482                 break;
   4483             case 'i':
   4484             case 'd':
   4485             case 'u':
   4486             case 'o':
   4487             case 'x':
   4488             case 'X':
   4489                 if (c == 'i')
   4490                     c = 'd';
   4491                 isnumok = 0;
   4492                 if (PyNumber_Check(v)) {
   4493                     PyObject *iobj=NULL;
   4494 
   4495                     if (PyInt_Check(v) || (PyLong_Check(v))) {
   4496                         iobj = v;
   4497                         Py_INCREF(iobj);
   4498                     }
   4499                     else {
   4500                         iobj = PyNumber_Int(v);
   4501                         if (iobj==NULL) {
   4502                             PyErr_Clear();
   4503                             iobj = PyNumber_Long(v);
   4504                         }
   4505                     }
   4506                     if (iobj!=NULL) {
   4507                         if (PyInt_Check(iobj)) {
   4508                             isnumok = 1;
   4509                             pbuf = formatbuf;
   4510                             len = formatint(pbuf,
   4511                                             sizeof(formatbuf),
   4512                                             flags, prec, c, iobj);
   4513                             Py_DECREF(iobj);
   4514                             if (len < 0)
   4515                                 goto error;
   4516                             sign = 1;
   4517                         }
   4518                         else if (PyLong_Check(iobj)) {
   4519                             int ilen;
   4520 
   4521                             isnumok = 1;
   4522                             temp = _PyString_FormatLong(iobj, flags,
   4523                                 prec, c, &pbuf, &ilen);
   4524                             Py_DECREF(iobj);
   4525                             len = ilen;
   4526                             if (!temp)
   4527                                 goto error;
   4528                             sign = 1;
   4529                         }
   4530                         else {
   4531                             Py_DECREF(iobj);
   4532                         }
   4533                     }
   4534                 }
   4535                 if (!isnumok) {
   4536                     PyErr_Format(PyExc_TypeError,
   4537                         "%%%c format: a number is required, "
   4538                         "not %.200s", c, Py_TYPE(v)->tp_name);
   4539                     goto error;
   4540                 }
   4541                 if (flags & F_ZERO)
   4542                     fill = '0';
   4543                 break;
   4544             case 'e':
   4545             case 'E':
   4546             case 'f':
   4547             case 'F':
   4548             case 'g':
   4549             case 'G':
   4550                 temp = formatfloat(v, flags, prec, c);
   4551                 if (temp == NULL)
   4552                     goto error;
   4553                 pbuf = PyString_AS_STRING(temp);
   4554                 len = PyString_GET_SIZE(temp);
   4555                 sign = 1;
   4556                 if (flags & F_ZERO)
   4557                     fill = '0';
   4558                 break;
   4559             case 'c':
   4560 #ifdef Py_USING_UNICODE
   4561                 if (PyUnicode_Check(v)) {
   4562                     fmt = fmt_start;
   4563                     argidx = argidx_start;
   4564                     goto unicode;
   4565                 }
   4566 #endif
   4567                 pbuf = formatbuf;
   4568                 len = formatchar(pbuf, sizeof(formatbuf), v);
   4569                 if (len < 0)
   4570                     goto error;
   4571                 break;
   4572             default:
   4573                 PyErr_Format(PyExc_ValueError,
   4574                   "unsupported format character '%c' (0x%x) "
   4575                   "at index %zd",
   4576                   c, c,
   4577                   (Py_ssize_t)(fmt - 1 -
   4578                                PyString_AsString(format)));
   4579                 goto error;
   4580             }
   4581             if (sign) {
   4582                 if (*pbuf == '-' || *pbuf == '+') {
   4583                     sign = *pbuf++;
   4584                     len--;
   4585                 }
   4586                 else if (flags & F_SIGN)
   4587                     sign = '+';
   4588                 else if (flags & F_BLANK)
   4589                     sign = ' ';
   4590                 else
   4591                     sign = 0;
   4592             }
   4593             if (width < len)
   4594                 width = len;
   4595             if (rescnt - (sign != 0) < width) {
   4596                 reslen -= rescnt;
   4597                 rescnt = width + fmtcnt + 100;
   4598                 reslen += rescnt;
   4599                 if (reslen < 0) {
   4600                     Py_DECREF(result);
   4601                     Py_XDECREF(temp);
   4602                     return PyErr_NoMemory();
   4603                 }
   4604                 if (_PyString_Resize(&result, reslen)) {
   4605                     Py_XDECREF(temp);
   4606                     return NULL;
   4607                 }
   4608                 res = PyString_AS_STRING(result)
   4609                     + reslen - rescnt;
   4610             }
   4611             if (sign) {
   4612                 if (fill != ' ')
   4613                     *res++ = sign;
   4614                 rescnt--;
   4615                 if (width > len)
   4616                     width--;
   4617             }
   4618             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
   4619                 assert(pbuf[0] == '0');
   4620                 assert(pbuf[1] == c);
   4621                 if (fill != ' ') {
   4622                     *res++ = *pbuf++;
   4623                     *res++ = *pbuf++;
   4624                 }
   4625                 rescnt -= 2;
   4626                 width -= 2;
   4627                 if (width < 0)
   4628                     width = 0;
   4629                 len -= 2;
   4630             }
   4631             if (width > len && !(flags & F_LJUST)) {
   4632                 do {
   4633                     --rescnt;
   4634                     *res++ = fill;
   4635                 } while (--width > len);
   4636             }
   4637             if (fill == ' ') {
   4638                 if (sign)
   4639                     *res++ = sign;
   4640                 if ((flags & F_ALT) &&
   4641                     (c == 'x' || c == 'X')) {
   4642                     assert(pbuf[0] == '0');
   4643                     assert(pbuf[1] == c);
   4644                     *res++ = *pbuf++;
   4645                     *res++ = *pbuf++;
   4646                 }
   4647             }
   4648             Py_MEMCPY(res, pbuf, len);
   4649             res += len;
   4650             rescnt -= len;
   4651             while (--width >= len) {
   4652                 --rescnt;
   4653                 *res++ = ' ';
   4654             }
   4655             if (dict && (argidx < arglen) && c != '%') {
   4656                 PyErr_SetString(PyExc_TypeError,
   4657                            "not all arguments converted during string formatting");
   4658                 Py_XDECREF(temp);
   4659                 goto error;
   4660             }
   4661             Py_XDECREF(temp);
   4662         } /* '%' */
   4663     } /* until end */
   4664     if (argidx < arglen && !dict) {
   4665         PyErr_SetString(PyExc_TypeError,
   4666                         "not all arguments converted during string formatting");
   4667         goto error;
   4668     }
   4669     if (args_owned) {
   4670         Py_DECREF(args);
   4671     }
   4672     if (_PyString_Resize(&result, reslen - rescnt))
   4673         return NULL;
   4674     return result;
   4675 
   4676 #ifdef Py_USING_UNICODE
   4677  unicode:
   4678     if (args_owned) {
   4679         Py_DECREF(args);
   4680         args_owned = 0;
   4681     }
   4682     /* Fiddle args right (remove the first argidx arguments) */
   4683     if (PyTuple_Check(orig_args) && argidx > 0) {
   4684         PyObject *v;
   4685         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
   4686         v = PyTuple_New(n);
   4687         if (v == NULL)
   4688             goto error;
   4689         while (--n >= 0) {
   4690             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
   4691             Py_INCREF(w);
   4692             PyTuple_SET_ITEM(v, n, w);
   4693         }
   4694         args = v;
   4695     } else {
   4696         Py_INCREF(orig_args);
   4697         args = orig_args;
   4698     }
   4699     args_owned = 1;
   4700     /* Take what we have of the result and let the Unicode formatting
   4701        function format the rest of the input. */
   4702     rescnt = res - PyString_AS_STRING(result);
   4703     if (_PyString_Resize(&result, rescnt))
   4704         goto error;
   4705     fmtcnt = PyString_GET_SIZE(format) - \
   4706              (fmt - PyString_AS_STRING(format));
   4707     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
   4708     if (format == NULL)
   4709         goto error;
   4710     v = PyUnicode_Format(format, args);
   4711     Py_DECREF(format);
   4712     if (v == NULL)
   4713         goto error;
   4714     /* Paste what we have (result) to what the Unicode formatting
   4715        function returned (v) and return the result (or error) */
   4716     w = PyUnicode_Concat(result, v);
   4717     Py_DECREF(result);
   4718     Py_DECREF(v);
   4719     Py_DECREF(args);
   4720     return w;
   4721 #endif /* Py_USING_UNICODE */
   4722 
   4723  error:
   4724     Py_DECREF(result);
   4725     if (args_owned) {
   4726         Py_DECREF(args);
   4727     }
   4728     return NULL;
   4729 }
   4730 
   4731 void
   4732 PyString_InternInPlace(PyObject **p)
   4733 {
   4734     register PyStringObject *s = (PyStringObject *)(*p);
   4735     PyObject *t;
   4736     if (s == NULL || !PyString_Check(s))
   4737         Py_FatalError("PyString_InternInPlace: strings only please!");
   4738     /* If it's a string subclass, we don't really know what putting
   4739        it in the interned dict might do. */
   4740     if (!PyString_CheckExact(s))
   4741         return;
   4742     if (PyString_CHECK_INTERNED(s))
   4743         return;
   4744     if (interned == NULL) {
   4745         interned = PyDict_New();
   4746         if (interned == NULL) {
   4747             PyErr_Clear(); /* Don't leave an exception */
   4748             return;
   4749         }
   4750     }
   4751     t = PyDict_GetItem(interned, (PyObject *)s);
   4752     if (t) {
   4753         Py_INCREF(t);
   4754         Py_DECREF(*p);
   4755         *p = t;
   4756         return;
   4757     }
   4758 
   4759     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
   4760         PyErr_Clear();
   4761         return;
   4762     }
   4763     /* The two references in interned are not counted by refcnt.
   4764        The string deallocator will take care of this */
   4765     Py_REFCNT(s) -= 2;
   4766     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
   4767 }
   4768 
   4769 void
   4770 PyString_InternImmortal(PyObject **p)
   4771 {
   4772     PyString_InternInPlace(p);
   4773     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
   4774         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
   4775         Py_INCREF(*p);
   4776     }
   4777 }
   4778 
   4779 
   4780 PyObject *
   4781 PyString_InternFromString(const char *cp)
   4782 {
   4783     PyObject *s = PyString_FromString(cp);
   4784     if (s == NULL)
   4785         return NULL;
   4786     PyString_InternInPlace(&s);
   4787     return s;
   4788 }
   4789 
   4790 void
   4791 PyString_Fini(void)
   4792 {
   4793     int i;
   4794     for (i = 0; i < UCHAR_MAX + 1; i++)
   4795         Py_CLEAR(characters[i]);
   4796     Py_CLEAR(nullstring);
   4797 }
   4798 
   4799 void _Py_ReleaseInternedStrings(void)
   4800 {
   4801     PyObject *keys;
   4802     PyStringObject *s;
   4803     Py_ssize_t i, n;
   4804     Py_ssize_t immortal_size = 0, mortal_size = 0;
   4805 
   4806     if (interned == NULL || !PyDict_Check(interned))
   4807         return;
   4808     keys = PyDict_Keys(interned);
   4809     if (keys == NULL || !PyList_Check(keys)) {
   4810         PyErr_Clear();
   4811         return;
   4812     }
   4813 
   4814     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
   4815        detector, interned strings are not forcibly deallocated; rather, we
   4816        give them their stolen references back, and then clear and DECREF
   4817        the interned dict. */
   4818 
   4819     n = PyList_GET_SIZE(keys);
   4820     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
   4821         n);
   4822     for (i = 0; i < n; i++) {
   4823         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
   4824         switch (s->ob_sstate) {
   4825         case SSTATE_NOT_INTERNED:
   4826             /* XXX Shouldn't happen */
   4827             break;
   4828         case SSTATE_INTERNED_IMMORTAL:
   4829             Py_REFCNT(s) += 1;
   4830             immortal_size += Py_SIZE(s);
   4831             break;
   4832         case SSTATE_INTERNED_MORTAL:
   4833             Py_REFCNT(s) += 2;
   4834             mortal_size += Py_SIZE(s);
   4835             break;
   4836         default:
   4837             Py_FatalError("Inconsistent interned string state.");
   4838         }
   4839         s->ob_sstate = SSTATE_NOT_INTERNED;
   4840     }
   4841     fprintf(stderr, "total size of all interned strings: "
   4842                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
   4843                     "mortal/immortal\n", mortal_size, immortal_size);
   4844     Py_DECREF(keys);
   4845     PyDict_Clear(interned);
   4846     Py_CLEAR(interned);
   4847 }
   4848