Home | History | Annotate | Download | only in Modules
      1 /* strop module */
      2 
      3 #define PY_SSIZE_T_CLEAN
      4 #include "Python.h"
      5 #include <ctype.h>
      6 
      7 PyDoc_STRVAR(strop_module__doc__,
      8 "Common string manipulations, optimized for speed.\n"
      9 "\n"
     10 "Always use \"import string\" rather than referencing\n"
     11 "this module directly.");
     12 
     13 /* XXX This file assumes that the <ctype.h> is*() functions
     14    XXX are defined for all 8-bit characters! */
     15 
     16 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
     17                "strop functions are obsolete; use string methods")) \
     18          return NULL
     19 
     20 /* The lstrip(), rstrip() and strip() functions are implemented
     21    in do_strip(), which uses an additional parameter to indicate what
     22    type of strip should occur. */
     23 
     24 #define LEFTSTRIP 0
     25 #define RIGHTSTRIP 1
     26 #define BOTHSTRIP 2
     27 
     28 
     29 static PyObject *
     30 split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
     31 {
     32     Py_ssize_t i = 0, j;
     33     int err;
     34     Py_ssize_t countsplit = 0;
     35     PyObject* item;
     36     PyObject *list = PyList_New(0);
     37 
     38     if (list == NULL)
     39         return NULL;
     40 
     41     while (i < len) {
     42         while (i < len && isspace(Py_CHARMASK(s[i]))) {
     43             i = i+1;
     44         }
     45         j = i;
     46         while (i < len && !isspace(Py_CHARMASK(s[i]))) {
     47             i = i+1;
     48         }
     49         if (j < i) {
     50             item = PyString_FromStringAndSize(s+j, i-j);
     51             if (item == NULL)
     52                 goto finally;
     53 
     54             err = PyList_Append(list, item);
     55             Py_DECREF(item);
     56             if (err < 0)
     57                 goto finally;
     58 
     59             countsplit++;
     60             while (i < len && isspace(Py_CHARMASK(s[i]))) {
     61                 i = i+1;
     62             }
     63             if (maxsplit && (countsplit >= maxsplit) && i < len) {
     64                 item = PyString_FromStringAndSize(
     65                     s+i, len - i);
     66                 if (item == NULL)
     67                     goto finally;
     68 
     69                 err = PyList_Append(list, item);
     70                 Py_DECREF(item);
     71                 if (err < 0)
     72                     goto finally;
     73 
     74                 i = len;
     75             }
     76         }
     77     }
     78     return list;
     79   finally:
     80     Py_DECREF(list);
     81     return NULL;
     82 }
     83 
     84 
     85 PyDoc_STRVAR(splitfields__doc__,
     86 "split(s [,sep [,maxsplit]]) -> list of strings\n"
     87 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
     88 "\n"
     89 "Return a list of the words in the string s, using sep as the\n"
     90 "delimiter string.  If maxsplit is nonzero, splits into at most\n"
     91 "maxsplit words.  If sep is not specified, any whitespace string\n"
     92 "is a separator.  Maxsplit defaults to 0.\n"
     93 "\n"
     94 "(split and splitfields are synonymous)");
     95 
     96 static PyObject *
     97 strop_splitfields(PyObject *self, PyObject *args)
     98 {
     99     Py_ssize_t len, n, i, j, err;
    100     Py_ssize_t splitcount, maxsplit;
    101     char *s, *sub;
    102     PyObject *list, *item;
    103 
    104     WARN;
    105     sub = NULL;
    106     n = 0;
    107     splitcount = 0;
    108     maxsplit = 0;
    109     if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
    110         return NULL;
    111     if (sub == NULL)
    112         return split_whitespace(s, len, maxsplit);
    113     if (n == 0) {
    114         PyErr_SetString(PyExc_ValueError, "empty separator");
    115         return NULL;
    116     }
    117 
    118     list = PyList_New(0);
    119     if (list == NULL)
    120         return NULL;
    121 
    122     i = j = 0;
    123     while (i+n <= len) {
    124         if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
    125             item = PyString_FromStringAndSize(s+j, i-j);
    126             if (item == NULL)
    127                 goto fail;
    128             err = PyList_Append(list, item);
    129             Py_DECREF(item);
    130             if (err < 0)
    131                 goto fail;
    132             i = j = i + n;
    133             splitcount++;
    134             if (maxsplit && (splitcount >= maxsplit))
    135                 break;
    136         }
    137         else
    138             i++;
    139     }
    140     item = PyString_FromStringAndSize(s+j, len-j);
    141     if (item == NULL)
    142         goto fail;
    143     err = PyList_Append(list, item);
    144     Py_DECREF(item);
    145     if (err < 0)
    146         goto fail;
    147 
    148     return list;
    149 
    150  fail:
    151     Py_DECREF(list);
    152     return NULL;
    153 }
    154 
    155 
    156 PyDoc_STRVAR(joinfields__doc__,
    157 "join(list [,sep]) -> string\n"
    158 "joinfields(list [,sep]) -> string\n"
    159 "\n"
    160 "Return a string composed of the words in list, with\n"
    161 "intervening occurrences of sep.  Sep defaults to a single\n"
    162 "space.\n"
    163 "\n"
    164 "(join and joinfields are synonymous)");
    165 
    166 static PyObject *
    167 strop_joinfields(PyObject *self, PyObject *args)
    168 {
    169     PyObject *seq;
    170     char *sep = NULL;
    171     Py_ssize_t seqlen, seplen = 0;
    172     Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
    173     PyObject *res = NULL;
    174     char* p = NULL;
    175     ssizeargfunc getitemfunc;
    176 
    177     WARN;
    178     if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
    179         return NULL;
    180     if (sep == NULL) {
    181         sep = " ";
    182         seplen = 1;
    183     }
    184 
    185     seqlen = PySequence_Size(seq);
    186     if (seqlen < 0 && PyErr_Occurred())
    187         return NULL;
    188 
    189     if (seqlen == 1) {
    190         /* Optimization if there's only one item */
    191         PyObject *item = PySequence_GetItem(seq, 0);
    192         if (item && !PyString_Check(item)) {
    193             PyErr_SetString(PyExc_TypeError,
    194                      "first argument must be sequence of strings");
    195             Py_DECREF(item);
    196             return NULL;
    197         }
    198         return item;
    199     }
    200 
    201     if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
    202         return NULL;
    203     p = PyString_AsString(res);
    204 
    205     /* optimize for lists, since it's the most common case.  all others
    206      * (tuples and arbitrary sequences) just use the sequence abstract
    207      * interface.
    208      */
    209     if (PyList_Check(seq)) {
    210         for (i = 0; i < seqlen; i++) {
    211             PyObject *item = PyList_GET_ITEM(seq, i);
    212             if (!PyString_Check(item)) {
    213                 PyErr_SetString(PyExc_TypeError,
    214                 "first argument must be sequence of strings");
    215                 Py_DECREF(res);
    216                 return NULL;
    217             }
    218             slen = PyString_GET_SIZE(item);
    219             if (slen > PY_SSIZE_T_MAX - reslen ||
    220                 seplen > PY_SSIZE_T_MAX - reslen - seplen) {
    221                 PyErr_SetString(PyExc_OverflowError,
    222                                 "input too long");
    223                 Py_DECREF(res);
    224                 return NULL;
    225             }
    226             while (reslen + slen + seplen >= sz) {
    227                 if (_PyString_Resize(&res, sz * 2) < 0)
    228                     return NULL;
    229                 sz *= 2;
    230                 p = PyString_AsString(res) + reslen;
    231             }
    232             if (i > 0) {
    233                 memcpy(p, sep, seplen);
    234                 p += seplen;
    235                 reslen += seplen;
    236             }
    237             memcpy(p, PyString_AS_STRING(item), slen);
    238             p += slen;
    239             reslen += slen;
    240         }
    241         _PyString_Resize(&res, reslen);
    242         return res;
    243     }
    244 
    245     if (seq->ob_type->tp_as_sequence == NULL ||
    246              (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
    247     {
    248         PyErr_SetString(PyExc_TypeError,
    249                         "first argument must be a sequence");
    250         return NULL;
    251     }
    252     /* This is now type safe */
    253     for (i = 0; i < seqlen; i++) {
    254         PyObject *item = getitemfunc(seq, i);
    255         if (!item || !PyString_Check(item)) {
    256             PyErr_SetString(PyExc_TypeError,
    257                      "first argument must be sequence of strings");
    258             Py_DECREF(res);
    259             Py_XDECREF(item);
    260             return NULL;
    261         }
    262         slen = PyString_GET_SIZE(item);
    263         if (slen > PY_SSIZE_T_MAX - reslen ||
    264             seplen > PY_SSIZE_T_MAX - reslen - seplen) {
    265             PyErr_SetString(PyExc_OverflowError,
    266                             "input too long");
    267             Py_DECREF(res);
    268             Py_XDECREF(item);
    269             return NULL;
    270         }
    271         while (reslen + slen + seplen >= sz) {
    272             if (_PyString_Resize(&res, sz * 2) < 0) {
    273                 Py_DECREF(item);
    274                 return NULL;
    275             }
    276             sz *= 2;
    277             p = PyString_AsString(res) + reslen;
    278         }
    279         if (i > 0) {
    280             memcpy(p, sep, seplen);
    281             p += seplen;
    282             reslen += seplen;
    283         }
    284         memcpy(p, PyString_AS_STRING(item), slen);
    285         p += slen;
    286         reslen += slen;
    287         Py_DECREF(item);
    288     }
    289     _PyString_Resize(&res, reslen);
    290     return res;
    291 }
    292 
    293 
    294 PyDoc_STRVAR(find__doc__,
    295 "find(s, sub [,start [,end]]) -> in\n"
    296 "\n"
    297 "Return the lowest index in s where substring sub is found,\n"
    298 "such that sub is contained within s[start,end].  Optional\n"
    299 "arguments start and end are interpreted as in slice notation.\n"
    300 "\n"
    301 "Return -1 on failure.");
    302 
    303 static PyObject *
    304 strop_find(PyObject *self, PyObject *args)
    305 {
    306     char *s, *sub;
    307     Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
    308 
    309     WARN;
    310     if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
    311         return NULL;
    312 
    313     if (last > len)
    314         last = len;
    315     if (last < 0)
    316         last += len;
    317     if (last < 0)
    318         last = 0;
    319     if (i < 0)
    320         i += len;
    321     if (i < 0)
    322         i = 0;
    323 
    324     if (n == 0 && i <= last)
    325         return PyInt_FromLong((long)i);
    326 
    327     last -= n;
    328     for (; i <= last; ++i)
    329         if (s[i] == sub[0] &&
    330             (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
    331             return PyInt_FromLong((long)i);
    332 
    333     return PyInt_FromLong(-1L);
    334 }
    335 
    336 
    337 PyDoc_STRVAR(rfind__doc__,
    338 "rfind(s, sub [,start [,end]]) -> int\n"
    339 "\n"
    340 "Return the highest index in s where substring sub is found,\n"
    341 "such that sub is contained within s[start,end].  Optional\n"
    342 "arguments start and end are interpreted as in slice notation.\n"
    343 "\n"
    344 "Return -1 on failure.");
    345 
    346 static PyObject *
    347 strop_rfind(PyObject *self, PyObject *args)
    348 {
    349     char *s, *sub;
    350     Py_ssize_t len, n, j;
    351     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
    352 
    353     WARN;
    354     if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
    355         return NULL;
    356 
    357     if (last > len)
    358         last = len;
    359     if (last < 0)
    360         last += len;
    361     if (last < 0)
    362         last = 0;
    363     if (i < 0)
    364         i += len;
    365     if (i < 0)
    366         i = 0;
    367 
    368     if (n == 0 && i <= last)
    369         return PyInt_FromLong((long)last);
    370 
    371     for (j = last-n; j >= i; --j)
    372         if (s[j] == sub[0] &&
    373             (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
    374             return PyInt_FromLong((long)j);
    375 
    376     return PyInt_FromLong(-1L);
    377 }
    378 
    379 
    380 static PyObject *
    381 do_strip(PyObject *args, int striptype)
    382 {
    383     char *s;
    384     Py_ssize_t len, i, j;
    385 
    386 
    387     if (PyString_AsStringAndSize(args, &s, &len))
    388         return NULL;
    389 
    390     i = 0;
    391     if (striptype != RIGHTSTRIP) {
    392         while (i < len && isspace(Py_CHARMASK(s[i]))) {
    393             i++;
    394         }
    395     }
    396 
    397     j = len;
    398     if (striptype != LEFTSTRIP) {
    399         do {
    400             j--;
    401         } while (j >= i && isspace(Py_CHARMASK(s[j])));
    402         j++;
    403     }
    404 
    405     if (i == 0 && j == len) {
    406         Py_INCREF(args);
    407         return args;
    408     }
    409     else
    410         return PyString_FromStringAndSize(s+i, j-i);
    411 }
    412 
    413 
    414 PyDoc_STRVAR(strip__doc__,
    415 "strip(s) -> string\n"
    416 "\n"
    417 "Return a copy of the string s with leading and trailing\n"
    418 "whitespace removed.");
    419 
    420 static PyObject *
    421 strop_strip(PyObject *self, PyObject *args)
    422 {
    423     WARN;
    424     return do_strip(args, BOTHSTRIP);
    425 }
    426 
    427 
    428 PyDoc_STRVAR(lstrip__doc__,
    429 "lstrip(s) -> string\n"
    430 "\n"
    431 "Return a copy of the string s with leading whitespace removed.");
    432 
    433 static PyObject *
    434 strop_lstrip(PyObject *self, PyObject *args)
    435 {
    436     WARN;
    437     return do_strip(args, LEFTSTRIP);
    438 }
    439 
    440 
    441 PyDoc_STRVAR(rstrip__doc__,
    442 "rstrip(s) -> string\n"
    443 "\n"
    444 "Return a copy of the string s with trailing whitespace removed.");
    445 
    446 static PyObject *
    447 strop_rstrip(PyObject *self, PyObject *args)
    448 {
    449     WARN;
    450     return do_strip(args, RIGHTSTRIP);
    451 }
    452 
    453 
    454 PyDoc_STRVAR(lower__doc__,
    455 "lower(s) -> string\n"
    456 "\n"
    457 "Return a copy of the string s converted to lowercase.");
    458 
    459 static PyObject *
    460 strop_lower(PyObject *self, PyObject *args)
    461 {
    462     char *s, *s_new;
    463     Py_ssize_t i, n;
    464     PyObject *newstr;
    465     int changed;
    466 
    467     WARN;
    468     if (PyString_AsStringAndSize(args, &s, &n))
    469         return NULL;
    470     newstr = PyString_FromStringAndSize(NULL, n);
    471     if (newstr == NULL)
    472         return NULL;
    473     s_new = PyString_AsString(newstr);
    474     changed = 0;
    475     for (i = 0; i < n; i++) {
    476         int c = Py_CHARMASK(*s++);
    477         if (isupper(c)) {
    478             changed = 1;
    479             *s_new = tolower(c);
    480         } else
    481             *s_new = c;
    482         s_new++;
    483     }
    484     if (!changed) {
    485         Py_DECREF(newstr);
    486         Py_INCREF(args);
    487         return args;
    488     }
    489     return newstr;
    490 }
    491 
    492 
    493 PyDoc_STRVAR(upper__doc__,
    494 "upper(s) -> string\n"
    495 "\n"
    496 "Return a copy of the string s converted to uppercase.");
    497 
    498 static PyObject *
    499 strop_upper(PyObject *self, PyObject *args)
    500 {
    501     char *s, *s_new;
    502     Py_ssize_t i, n;
    503     PyObject *newstr;
    504     int changed;
    505 
    506     WARN;
    507     if (PyString_AsStringAndSize(args, &s, &n))
    508         return NULL;
    509     newstr = PyString_FromStringAndSize(NULL, n);
    510     if (newstr == NULL)
    511         return NULL;
    512     s_new = PyString_AsString(newstr);
    513     changed = 0;
    514     for (i = 0; i < n; i++) {
    515         int c = Py_CHARMASK(*s++);
    516         if (islower(c)) {
    517             changed = 1;
    518             *s_new = toupper(c);
    519         } else
    520             *s_new = c;
    521         s_new++;
    522     }
    523     if (!changed) {
    524         Py_DECREF(newstr);
    525         Py_INCREF(args);
    526         return args;
    527     }
    528     return newstr;
    529 }
    530 
    531 
    532 PyDoc_STRVAR(capitalize__doc__,
    533 "capitalize(s) -> string\n"
    534 "\n"
    535 "Return a copy of the string s with only its first character\n"
    536 "capitalized.");
    537 
    538 static PyObject *
    539 strop_capitalize(PyObject *self, PyObject *args)
    540 {
    541     char *s, *s_new;
    542     Py_ssize_t i, n;
    543     PyObject *newstr;
    544     int changed;
    545 
    546     WARN;
    547     if (PyString_AsStringAndSize(args, &s, &n))
    548         return NULL;
    549     newstr = PyString_FromStringAndSize(NULL, n);
    550     if (newstr == NULL)
    551         return NULL;
    552     s_new = PyString_AsString(newstr);
    553     changed = 0;
    554     if (0 < n) {
    555         int c = Py_CHARMASK(*s++);
    556         if (islower(c)) {
    557             changed = 1;
    558             *s_new = toupper(c);
    559         } else
    560             *s_new = c;
    561         s_new++;
    562     }
    563     for (i = 1; i < n; i++) {
    564         int c = Py_CHARMASK(*s++);
    565         if (isupper(c)) {
    566             changed = 1;
    567             *s_new = tolower(c);
    568         } else
    569             *s_new = c;
    570         s_new++;
    571     }
    572     if (!changed) {
    573         Py_DECREF(newstr);
    574         Py_INCREF(args);
    575         return args;
    576     }
    577     return newstr;
    578 }
    579 
    580 
    581 PyDoc_STRVAR(expandtabs__doc__,
    582 "expandtabs(string, [tabsize]) -> string\n"
    583 "\n"
    584 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
    585 "depending on the current column and the given tab size (default 8).\n"
    586 "The column number is reset to zero after each newline occurring in the\n"
    587 "string.  This doesn't understand other non-printing characters.");
    588 
    589 static PyObject *
    590 strop_expandtabs(PyObject *self, PyObject *args)
    591 {
    592     /* Original by Fredrik Lundh */
    593     char* e;
    594     char* p;
    595     char* q;
    596     Py_ssize_t i, j, old_j;
    597     PyObject* out;
    598     char* string;
    599     Py_ssize_t stringlen;
    600     int tabsize = 8;
    601 
    602     WARN;
    603     /* Get arguments */
    604     if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
    605         return NULL;
    606     if (tabsize < 1) {
    607         PyErr_SetString(PyExc_ValueError,
    608                         "tabsize must be at least 1");
    609         return NULL;
    610     }
    611 
    612     /* First pass: determine size of output string */
    613     i = j = old_j = 0; /* j: current column; i: total of previous lines */
    614     e = string + stringlen;
    615     for (p = string; p < e; p++) {
    616         if (*p == '\t') {
    617             j += tabsize - (j%tabsize);
    618             if (old_j > j) {
    619                 PyErr_SetString(PyExc_OverflowError,
    620                                 "new string is too long");
    621                 return NULL;
    622             }
    623             old_j = j;
    624         } else {
    625             j++;
    626             if (*p == '\n') {
    627                 i += j;
    628                 j = 0;
    629             }
    630         }
    631     }
    632 
    633     if ((i + j) < 0) {
    634         PyErr_SetString(PyExc_OverflowError, "new string is too long");
    635         return NULL;
    636     }
    637 
    638     /* Second pass: create output string and fill it */
    639     out = PyString_FromStringAndSize(NULL, i+j);
    640     if (out == NULL)
    641         return NULL;
    642 
    643     i = 0;
    644     q = PyString_AS_STRING(out);
    645 
    646     for (p = string; p < e; p++) {
    647         if (*p == '\t') {
    648             j = tabsize - (i%tabsize);
    649             i += j;
    650             while (j-- > 0)
    651                 *q++ = ' ';
    652         } else {
    653             *q++ = *p;
    654             i++;
    655             if (*p == '\n')
    656                 i = 0;
    657         }
    658     }
    659 
    660     return out;
    661 }
    662 
    663 
    664 PyDoc_STRVAR(count__doc__,
    665 "count(s, sub[, start[, end]]) -> int\n"
    666 "\n"
    667 "Return the number of occurrences of substring sub in string\n"
    668 "s[start:end].  Optional arguments start and end are\n"
    669 "interpreted as in slice notation.");
    670 
    671 static PyObject *
    672 strop_count(PyObject *self, PyObject *args)
    673 {
    674     char *s, *sub;
    675     Py_ssize_t len, n;
    676     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
    677     Py_ssize_t m, r;
    678 
    679     WARN;
    680     if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
    681         return NULL;
    682     if (last > len)
    683         last = len;
    684     if (last < 0)
    685         last += len;
    686     if (last < 0)
    687         last = 0;
    688     if (i < 0)
    689         i += len;
    690     if (i < 0)
    691         i = 0;
    692     m = last + 1 - n;
    693     if (n == 0)
    694         return PyInt_FromLong((long) (m-i));
    695 
    696     r = 0;
    697     while (i < m) {
    698         if (!memcmp(s+i, sub, n)) {
    699             r++;
    700             i += n;
    701         } else {
    702             i++;
    703         }
    704     }
    705     return PyInt_FromLong((long) r);
    706 }
    707 
    708 
    709 PyDoc_STRVAR(swapcase__doc__,
    710 "swapcase(s) -> string\n"
    711 "\n"
    712 "Return a copy of the string s with upper case characters\n"
    713 "converted to lowercase and vice versa.");
    714 
    715 static PyObject *
    716 strop_swapcase(PyObject *self, PyObject *args)
    717 {
    718     char *s, *s_new;
    719     Py_ssize_t i, n;
    720     PyObject *newstr;
    721     int changed;
    722 
    723     WARN;
    724     if (PyString_AsStringAndSize(args, &s, &n))
    725         return NULL;
    726     newstr = PyString_FromStringAndSize(NULL, n);
    727     if (newstr == NULL)
    728         return NULL;
    729     s_new = PyString_AsString(newstr);
    730     changed = 0;
    731     for (i = 0; i < n; i++) {
    732         int c = Py_CHARMASK(*s++);
    733         if (islower(c)) {
    734             changed = 1;
    735             *s_new = toupper(c);
    736         }
    737         else if (isupper(c)) {
    738             changed = 1;
    739             *s_new = tolower(c);
    740         }
    741         else
    742             *s_new = c;
    743         s_new++;
    744     }
    745     if (!changed) {
    746         Py_DECREF(newstr);
    747         Py_INCREF(args);
    748         return args;
    749     }
    750     return newstr;
    751 }
    752 
    753 
    754 PyDoc_STRVAR(atoi__doc__,
    755 "atoi(s [,base]) -> int\n"
    756 "\n"
    757 "Return the integer represented by the string s in the given\n"
    758 "base, which defaults to 10.  The string s must consist of one\n"
    759 "or more digits, possibly preceded by a sign.  If base is 0, it\n"
    760 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
    761 "0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
    762 "accepted.");
    763 
    764 static PyObject *
    765 strop_atoi(PyObject *self, PyObject *args)
    766 {
    767     char *s, *end;
    768     int base = 10;
    769     long x;
    770     char buffer[256]; /* For errors */
    771 
    772     WARN;
    773     if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
    774         return NULL;
    775 
    776     if ((base != 0 && base < 2) || base > 36) {
    777         PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
    778         return NULL;
    779     }
    780 
    781     while (*s && isspace(Py_CHARMASK(*s)))
    782         s++;
    783     errno = 0;
    784     if (base == 0 && s[0] == '0')
    785         x = (long) PyOS_strtoul(s, &end, base);
    786     else
    787         x = PyOS_strtol(s, &end, base);
    788     if (end == s || !isalnum(Py_CHARMASK(end[-1])))
    789         goto bad;
    790     while (*end && isspace(Py_CHARMASK(*end)))
    791         end++;
    792     if (*end != '\0') {
    793   bad:
    794         PyOS_snprintf(buffer, sizeof(buffer),
    795                       "invalid literal for atoi(): %.200s", s);
    796         PyErr_SetString(PyExc_ValueError, buffer);
    797         return NULL;
    798     }
    799     else if (errno != 0) {
    800         PyOS_snprintf(buffer, sizeof(buffer),
    801                       "atoi() literal too large: %.200s", s);
    802         PyErr_SetString(PyExc_ValueError, buffer);
    803         return NULL;
    804     }
    805     return PyInt_FromLong(x);
    806 }
    807 
    808 
    809 PyDoc_STRVAR(atol__doc__,
    810 "atol(s [,base]) -> long\n"
    811 "\n"
    812 "Return the long integer represented by the string s in the\n"
    813 "given base, which defaults to 10.  The string s must consist\n"
    814 "of one or more digits, possibly preceded by a sign.  If base\n"
    815 "is 0, it is chosen from the leading characters of s, 0 for\n"
    816 "octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
    817 "0x or 0X is accepted.  A trailing L or l is not accepted,\n"
    818 "unless base is 0.");
    819 
    820 static PyObject *
    821 strop_atol(PyObject *self, PyObject *args)
    822 {
    823     char *s, *end;
    824     int base = 10;
    825     PyObject *x;
    826     char buffer[256]; /* For errors */
    827 
    828     WARN;
    829     if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
    830         return NULL;
    831 
    832     if ((base != 0 && base < 2) || base > 36) {
    833         PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
    834         return NULL;
    835     }
    836 
    837     while (*s && isspace(Py_CHARMASK(*s)))
    838         s++;
    839     if (s[0] == '\0') {
    840         PyErr_SetString(PyExc_ValueError, "empty string for atol()");
    841         return NULL;
    842     }
    843     x = PyLong_FromString(s, &end, base);
    844     if (x == NULL)
    845         return NULL;
    846     if (base == 0 && (*end == 'l' || *end == 'L'))
    847         end++;
    848     while (*end && isspace(Py_CHARMASK(*end)))
    849         end++;
    850     if (*end != '\0') {
    851         PyOS_snprintf(buffer, sizeof(buffer),
    852                       "invalid literal for atol(): %.200s", s);
    853         PyErr_SetString(PyExc_ValueError, buffer);
    854         Py_DECREF(x);
    855         return NULL;
    856     }
    857     return x;
    858 }
    859 
    860 
    861 PyDoc_STRVAR(atof__doc__,
    862 "atof(s) -> float\n"
    863 "\n"
    864 "Return the floating point number represented by the string s.");
    865 
    866 static PyObject *
    867 strop_atof(PyObject *self, PyObject *args)
    868 {
    869     char *s, *end;
    870     double x;
    871     char buffer[256]; /* For errors */
    872 
    873     WARN;
    874     if (!PyArg_ParseTuple(args, "s:atof", &s))
    875         return NULL;
    876     while (*s && isspace(Py_CHARMASK(*s)))
    877         s++;
    878     if (s[0] == '\0') {
    879         PyErr_SetString(PyExc_ValueError, "empty string for atof()");
    880         return NULL;
    881     }
    882 
    883     PyFPE_START_PROTECT("strop_atof", return 0)
    884     x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
    885     PyFPE_END_PROTECT(x)
    886     if (x == -1 && PyErr_Occurred())
    887         return NULL;
    888     while (*end && isspace(Py_CHARMASK(*end)))
    889         end++;
    890     if (*end != '\0') {
    891         PyOS_snprintf(buffer, sizeof(buffer),
    892                       "invalid literal for atof(): %.200s", s);
    893         PyErr_SetString(PyExc_ValueError, buffer);
    894         return NULL;
    895     }
    896     return PyFloat_FromDouble(x);
    897 }
    898 
    899 
    900 PyDoc_STRVAR(maketrans__doc__,
    901 "maketrans(frm, to) -> string\n"
    902 "\n"
    903 "Return a translation table (a string of 256 bytes long)\n"
    904 "suitable for use in string.translate.  The strings frm and to\n"
    905 "must be of the same length.");
    906 
    907 static PyObject *
    908 strop_maketrans(PyObject *self, PyObject *args)
    909 {
    910     unsigned char *c, *from=NULL, *to=NULL;
    911     Py_ssize_t i, fromlen=0, tolen=0;
    912     PyObject *result;
    913 
    914     if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
    915         return NULL;
    916 
    917     if (fromlen != tolen) {
    918         PyErr_SetString(PyExc_ValueError,
    919                         "maketrans arguments must have same length");
    920         return NULL;
    921     }
    922 
    923     result = PyString_FromStringAndSize((char *)NULL, 256);
    924     if (result == NULL)
    925         return NULL;
    926     c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
    927     for (i = 0; i < 256; i++)
    928         c[i]=(unsigned char)i;
    929     for (i = 0; i < fromlen; i++)
    930         c[from[i]]=to[i];
    931 
    932     return result;
    933 }
    934 
    935 
    936 PyDoc_STRVAR(translate__doc__,
    937 "translate(s,table [,deletechars]) -> string\n"
    938 "\n"
    939 "Return a copy of the string s, where all characters occurring\n"
    940 "in the optional argument deletechars are removed, and the\n"
    941 "remaining characters have been mapped through the given\n"
    942 "translation table, which must be a string of length 256.");
    943 
    944 static PyObject *
    945 strop_translate(PyObject *self, PyObject *args)
    946 {
    947     register char *input, *table, *output;
    948     Py_ssize_t i;
    949     int c, changed = 0;
    950     PyObject *input_obj;
    951     char *table1, *output_start, *del_table=NULL;
    952     Py_ssize_t inlen, tablen, dellen = 0;
    953     PyObject *result;
    954     int trans_table[256];
    955 
    956     WARN;
    957     if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
    958                           &table1, &tablen, &del_table, &dellen))
    959         return NULL;
    960     if (tablen != 256) {
    961         PyErr_SetString(PyExc_ValueError,
    962                       "translation table must be 256 characters long");
    963         return NULL;
    964     }
    965 
    966     table = table1;
    967     inlen = PyString_GET_SIZE(input_obj);
    968     result = PyString_FromStringAndSize((char *)NULL, inlen);
    969     if (result == NULL)
    970         return NULL;
    971     output_start = output = PyString_AsString(result);
    972     input = PyString_AsString(input_obj);
    973 
    974     if (dellen == 0) {
    975         /* If no deletions are required, use faster code */
    976         for (i = inlen; --i >= 0; ) {
    977             c = Py_CHARMASK(*input++);
    978             if (Py_CHARMASK((*output++ = table[c])) != c)
    979                 changed = 1;
    980         }
    981         if (changed)
    982             return result;
    983         Py_DECREF(result);
    984         Py_INCREF(input_obj);
    985         return input_obj;
    986     }
    987 
    988     for (i = 0; i < 256; i++)
    989         trans_table[i] = Py_CHARMASK(table[i]);
    990 
    991     for (i = 0; i < dellen; i++)
    992         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
    993 
    994     for (i = inlen; --i >= 0; ) {
    995         c = Py_CHARMASK(*input++);
    996         if (trans_table[c] != -1)
    997             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
    998                 continue;
    999         changed = 1;
   1000     }
   1001     if (!changed) {
   1002         Py_DECREF(result);
   1003         Py_INCREF(input_obj);
   1004         return input_obj;
   1005     }
   1006     /* Fix the size of the resulting string */
   1007     if (inlen > 0)
   1008         _PyString_Resize(&result, output - output_start);
   1009     return result;
   1010 }
   1011 
   1012 
   1013 /* What follows is used for implementing replace().  Perry Stoll. */
   1014 
   1015 /*
   1016   mymemfind
   1017 
   1018   strstr replacement for arbitrary blocks of memory.
   1019 
   1020   Locates the first occurrence in the memory pointed to by MEM of the
   1021   contents of memory pointed to by PAT.  Returns the index into MEM if
   1022   found, or -1 if not found.  If len of PAT is greater than length of
   1023   MEM, the function returns -1.
   1024 */
   1025 static Py_ssize_t
   1026 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
   1027 {
   1028     register Py_ssize_t ii;
   1029 
   1030     /* pattern can not occur in the last pat_len-1 chars */
   1031     len -= pat_len;
   1032 
   1033     for (ii = 0; ii <= len; ii++) {
   1034         if (mem[ii] == pat[0] &&
   1035             (pat_len == 1 ||
   1036              memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
   1037             return ii;
   1038         }
   1039     }
   1040     return -1;
   1041 }
   1042 
   1043 /*
   1044   mymemcnt
   1045 
   1046    Return the number of distinct times PAT is found in MEM.
   1047    meaning mem=1111 and pat==11 returns 2.
   1048        mem=11111 and pat==11 also return 2.
   1049  */
   1050 static Py_ssize_t
   1051 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
   1052 {
   1053     register Py_ssize_t offset = 0;
   1054     Py_ssize_t nfound = 0;
   1055 
   1056     while (len >= 0) {
   1057         offset = mymemfind(mem, len, pat, pat_len);
   1058         if (offset == -1)
   1059             break;
   1060         mem += offset + pat_len;
   1061         len -= offset + pat_len;
   1062         nfound++;
   1063     }
   1064     return nfound;
   1065 }
   1066 
   1067 /*
   1068    mymemreplace
   1069 
   1070    Return a string in which all occurrences of PAT in memory STR are
   1071    replaced with SUB.
   1072 
   1073    If length of PAT is less than length of STR or there are no occurrences
   1074    of PAT in STR, then the original string is returned. Otherwise, a new
   1075    string is allocated here and returned.
   1076 
   1077    on return, out_len is:
   1078        the length of output string, or
   1079        -1 if the input string is returned, or
   1080        unchanged if an error occurs (no memory).
   1081 
   1082    return value is:
   1083        the new string allocated locally, or
   1084        NULL if an error occurred.
   1085 */
   1086 static char *
   1087 mymemreplace(const char *str, Py_ssize_t len,           /* input string */
   1088          const char *pat, Py_ssize_t pat_len,           /* pattern string to find */
   1089          const char *sub, Py_ssize_t sub_len,           /* substitution string */
   1090          Py_ssize_t count,                              /* number of replacements */
   1091          Py_ssize_t *out_len)
   1092 {
   1093     char *out_s;
   1094     char *new_s;
   1095     Py_ssize_t nfound, offset, new_len;
   1096 
   1097     if (len == 0 || pat_len > len)
   1098         goto return_same;
   1099 
   1100     /* find length of output string */
   1101     nfound = mymemcnt(str, len, pat, pat_len);
   1102     if (count < 0)
   1103         count = PY_SSIZE_T_MAX;
   1104     else if (nfound > count)
   1105         nfound = count;
   1106     if (nfound == 0)
   1107         goto return_same;
   1108 
   1109     new_len = len + nfound*(sub_len - pat_len);
   1110     if (new_len == 0) {
   1111         /* Have to allocate something for the caller to free(). */
   1112         out_s = (char *)PyMem_MALLOC(1);
   1113         if (out_s == NULL)
   1114             return NULL;
   1115         out_s[0] = '\0';
   1116     }
   1117     else {
   1118         assert(new_len > 0);
   1119         new_s = (char *)PyMem_MALLOC(new_len);
   1120         if (new_s == NULL)
   1121             return NULL;
   1122         out_s = new_s;
   1123 
   1124         for (; count > 0 && len > 0; --count) {
   1125             /* find index of next instance of pattern */
   1126             offset = mymemfind(str, len, pat, pat_len);
   1127             if (offset == -1)
   1128                 break;
   1129 
   1130             /* copy non matching part of input string */
   1131             memcpy(new_s, str, offset);
   1132             str += offset + pat_len;
   1133             len -= offset + pat_len;
   1134 
   1135             /* copy substitute into the output string */
   1136             new_s += offset;
   1137             memcpy(new_s, sub, sub_len);
   1138             new_s += sub_len;
   1139         }
   1140         /* copy any remaining values into output string */
   1141         if (len > 0)
   1142             memcpy(new_s, str, len);
   1143     }
   1144     *out_len = new_len;
   1145     return out_s;
   1146 
   1147   return_same:
   1148     *out_len = -1;
   1149     return (char *)str; /* cast away const */
   1150 }
   1151 
   1152 
   1153 PyDoc_STRVAR(replace__doc__,
   1154 "replace (str, old, new[, maxsplit]) -> string\n"
   1155 "\n"
   1156 "Return a copy of string str with all occurrences of substring\n"
   1157 "old replaced by new. If the optional argument maxsplit is\n"
   1158 "given, only the first maxsplit occurrences are replaced.");
   1159 
   1160 static PyObject *
   1161 strop_replace(PyObject *self, PyObject *args)
   1162 {
   1163     char *str, *pat,*sub,*new_s;
   1164     Py_ssize_t len,pat_len,sub_len,out_len;
   1165     Py_ssize_t count = -1;
   1166     PyObject *newstr;
   1167 
   1168     WARN;
   1169     if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
   1170                           &str, &len, &pat, &pat_len, &sub, &sub_len,
   1171                           &count))
   1172         return NULL;
   1173     if (pat_len <= 0) {
   1174         PyErr_SetString(PyExc_ValueError, "empty pattern string");
   1175         return NULL;
   1176     }
   1177     /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
   1178      * current (2.1) string.py and string methods.  Preserve this for
   1179      * ... well, hard to say for what <wink>.
   1180      */
   1181     if (count == 0)
   1182         count = -1;
   1183     new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
   1184     if (new_s == NULL) {
   1185         PyErr_NoMemory();
   1186         return NULL;
   1187     }
   1188     if (out_len == -1) {
   1189         /* we're returning another reference to the input string */
   1190         newstr = PyTuple_GetItem(args, 0);
   1191         Py_XINCREF(newstr);
   1192     }
   1193     else {
   1194         newstr = PyString_FromStringAndSize(new_s, out_len);
   1195         PyMem_FREE(new_s);
   1196     }
   1197     return newstr;
   1198 }
   1199 
   1200 
   1201 /* List of functions defined in the module */
   1202 
   1203 static PyMethodDef
   1204 strop_methods[] = {
   1205     {"atof",            strop_atof,        METH_VARARGS, atof__doc__},
   1206     {"atoi",            strop_atoi,        METH_VARARGS, atoi__doc__},
   1207     {"atol",            strop_atol,        METH_VARARGS, atol__doc__},
   1208     {"capitalize",      strop_capitalize,  METH_O,       capitalize__doc__},
   1209     {"count",           strop_count,       METH_VARARGS, count__doc__},
   1210     {"expandtabs",      strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
   1211     {"find",            strop_find,        METH_VARARGS, find__doc__},
   1212     {"join",            strop_joinfields,  METH_VARARGS, joinfields__doc__},
   1213     {"joinfields",      strop_joinfields,  METH_VARARGS, joinfields__doc__},
   1214     {"lstrip",          strop_lstrip,      METH_O,       lstrip__doc__},
   1215     {"lower",           strop_lower,       METH_O,       lower__doc__},
   1216     {"maketrans",       strop_maketrans,   METH_VARARGS, maketrans__doc__},
   1217     {"replace",         strop_replace,     METH_VARARGS, replace__doc__},
   1218     {"rfind",           strop_rfind,       METH_VARARGS, rfind__doc__},
   1219     {"rstrip",          strop_rstrip,      METH_O,       rstrip__doc__},
   1220     {"split",           strop_splitfields, METH_VARARGS, splitfields__doc__},
   1221     {"splitfields",     strop_splitfields, METH_VARARGS, splitfields__doc__},
   1222     {"strip",           strop_strip,       METH_O,       strip__doc__},
   1223     {"swapcase",        strop_swapcase,    METH_O,       swapcase__doc__},
   1224     {"translate",       strop_translate,   METH_VARARGS, translate__doc__},
   1225     {"upper",           strop_upper,       METH_O,       upper__doc__},
   1226     {NULL,              NULL}   /* sentinel */
   1227 };
   1228 
   1229 
   1230 PyMODINIT_FUNC
   1231 initstrop(void)
   1232 {
   1233     PyObject *m, *s;
   1234     char buf[256];
   1235     int c, n;
   1236     m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
   1237                        (PyObject*)NULL, PYTHON_API_VERSION);
   1238     if (m == NULL)
   1239         return;
   1240 
   1241     /* Create 'whitespace' object */
   1242     n = 0;
   1243     for (c = 0; c < 256; c++) {
   1244         if (isspace(c))
   1245             buf[n++] = c;
   1246     }
   1247     s = PyString_FromStringAndSize(buf, n);
   1248     if (s)
   1249         PyModule_AddObject(m, "whitespace", s);
   1250 
   1251     /* Create 'lowercase' object */
   1252     n = 0;
   1253     for (c = 0; c < 256; c++) {
   1254         if (islower(c))
   1255             buf[n++] = c;
   1256     }
   1257     s = PyString_FromStringAndSize(buf, n);
   1258     if (s)
   1259         PyModule_AddObject(m, "lowercase", s);
   1260 
   1261     /* Create 'uppercase' object */
   1262     n = 0;
   1263     for (c = 0; c < 256; c++) {
   1264         if (isupper(c))
   1265             buf[n++] = c;
   1266     }
   1267     s = PyString_FromStringAndSize(buf, n);
   1268     if (s)
   1269         PyModule_AddObject(m, "uppercase", s);
   1270 }
   1271