Home | History | Annotate | Download | only in Modules
      1 /* strop module */
      2 
      3 #define PY_SSIZE_T_CLEAN
      4 #include "Python.h"
      5 #include <ctype.h>
      6 
      7 PyDoc_STRVAR(strop_module__doc__,
      8 "Common string manipulations, optimized for speed.\n"
      9 "\n"
     10 "Always use \"import string\" rather than referencing\n"
     11 "this module directly.");
     12 
     13 /* XXX This file assumes that the <ctype.h> is*() functions
     14    XXX are defined for all 8-bit characters! */
     15 
     16 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
     17                "strop functions are obsolete; use string methods")) \
     18          return NULL
     19 
     20 /* The lstrip(), rstrip() and strip() functions are implemented
     21    in do_strip(), which uses an additional parameter to indicate what
     22    type of strip should occur. */
     23 
     24 #define LEFTSTRIP 0
     25 #define RIGHTSTRIP 1
     26 #define BOTHSTRIP 2
     27 
     28 
     29 static PyObject *
     30 split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
     31 {
     32     Py_ssize_t i = 0, j;
     33     int err;
     34     Py_ssize_t countsplit = 0;
     35     PyObject* item;
     36     PyObject *list = PyList_New(0);
     37 
     38     if (list == NULL)
     39         return NULL;
     40 
     41     while (i < len) {
     42         while (i < len && isspace(Py_CHARMASK(s[i]))) {
     43             i = i+1;
     44         }
     45         j = i;
     46         while (i < len && !isspace(Py_CHARMASK(s[i]))) {
     47             i = i+1;
     48         }
     49         if (j < i) {
     50             item = PyString_FromStringAndSize(s+j, i-j);
     51             if (item == NULL)
     52                 goto finally;
     53 
     54             err = PyList_Append(list, item);
     55             Py_DECREF(item);
     56             if (err < 0)
     57                 goto finally;
     58 
     59             countsplit++;
     60             while (i < len && isspace(Py_CHARMASK(s[i]))) {
     61                 i = i+1;
     62             }
     63             if (maxsplit && (countsplit >= maxsplit) && i < len) {
     64                 item = PyString_FromStringAndSize(
     65                     s+i, len - i);
     66                 if (item == NULL)
     67                     goto finally;
     68 
     69                 err = PyList_Append(list, item);
     70                 Py_DECREF(item);
     71                 if (err < 0)
     72                     goto finally;
     73 
     74                 i = len;
     75             }
     76         }
     77     }
     78     return list;
     79   finally:
     80     Py_DECREF(list);
     81     return NULL;
     82 }
     83 
     84 
     85 PyDoc_STRVAR(splitfields__doc__,
     86 "split(s [,sep [,maxsplit]]) -> list of strings\n"
     87 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
     88 "\n"
     89 "Return a list of the words in the string s, using sep as the\n"
     90 "delimiter string.  If maxsplit is nonzero, splits into at most\n"
     91 "maxsplit words.  If sep is not specified, any whitespace string\n"
     92 "is a separator.  Maxsplit defaults to 0.\n"
     93 "\n"
     94 "(split and splitfields are synonymous)");
     95 
     96 static PyObject *
     97 strop_splitfields(PyObject *self, PyObject *args)
     98 {
     99     Py_ssize_t len, n, i, j, err;
    100     Py_ssize_t splitcount, maxsplit;
    101     char *s, *sub;
    102     PyObject *list, *item;
    103 
    104     WARN;
    105     sub = NULL;
    106     n = 0;
    107     splitcount = 0;
    108     maxsplit = 0;
    109     if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
    110         return NULL;
    111     if (sub == NULL)
    112         return split_whitespace(s, len, maxsplit);
    113     if (n == 0) {
    114         PyErr_SetString(PyExc_ValueError, "empty separator");
    115         return NULL;
    116     }
    117 
    118     list = PyList_New(0);
    119     if (list == NULL)
    120         return NULL;
    121 
    122     i = j = 0;
    123     while (i+n <= len) {
    124         if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
    125             item = PyString_FromStringAndSize(s+j, i-j);
    126             if (item == NULL)
    127                 goto fail;
    128             err = PyList_Append(list, item);
    129             Py_DECREF(item);
    130             if (err < 0)
    131                 goto fail;
    132             i = j = i + n;
    133             splitcount++;
    134             if (maxsplit && (splitcount >= maxsplit))
    135                 break;
    136         }
    137         else
    138             i++;
    139     }
    140     item = PyString_FromStringAndSize(s+j, len-j);
    141     if (item == NULL)
    142         goto fail;
    143     err = PyList_Append(list, item);
    144     Py_DECREF(item);
    145     if (err < 0)
    146         goto fail;
    147 
    148     return list;
    149 
    150  fail:
    151     Py_DECREF(list);
    152     return NULL;
    153 }
    154 
    155 
    156 PyDoc_STRVAR(joinfields__doc__,
    157 "join(list [,sep]) -> string\n"
    158 "joinfields(list [,sep]) -> string\n"
    159 "\n"
    160 "Return a string composed of the words in list, with\n"
    161 "intervening occurrences of sep.  Sep defaults to a single\n"
    162 "space.\n"
    163 "\n"
    164 "(join and joinfields are synonymous)");
    165 
    166 static PyObject *
    167 strop_joinfields(PyObject *self, PyObject *args)
    168 {
    169     PyObject *seq;
    170     char *sep = NULL;
    171     Py_ssize_t seqlen, seplen = 0;
    172     Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
    173     PyObject *res = NULL;
    174     char* p = NULL;
    175     ssizeargfunc getitemfunc;
    176 
    177     WARN;
    178     if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
    179         return NULL;
    180     if (sep == NULL) {
    181         sep = " ";
    182         seplen = 1;
    183     }
    184 
    185     seqlen = PySequence_Size(seq);
    186     if (seqlen < 0 && PyErr_Occurred())
    187         return NULL;
    188 
    189     if (seqlen == 1) {
    190         /* Optimization if there's only one item */
    191         PyObject *item = PySequence_GetItem(seq, 0);
    192         if (item && !PyString_Check(item)) {
    193             PyErr_SetString(PyExc_TypeError,
    194                      "first argument must be sequence of strings");
    195             Py_DECREF(item);
    196             return NULL;
    197         }
    198         return item;
    199     }
    200 
    201     if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
    202         return NULL;
    203     p = PyString_AsString(res);
    204 
    205     /* optimize for lists, since it's the most common case.  all others
    206      * (tuples and arbitrary sequences) just use the sequence abstract
    207      * interface.
    208      */
    209     if (PyList_Check(seq)) {
    210         for (i = 0; i < seqlen; i++) {
    211             PyObject *item = PyList_GET_ITEM(seq, i);
    212             if (!PyString_Check(item)) {
    213                 PyErr_SetString(PyExc_TypeError,
    214                 "first argument must be sequence of strings");
    215                 Py_DECREF(res);
    216                 return NULL;
    217             }
    218             slen = PyString_GET_SIZE(item);
    219             if (slen > PY_SSIZE_T_MAX - reslen ||
    220                 seplen > PY_SSIZE_T_MAX - reslen - seplen) {
    221                 PyErr_SetString(PyExc_OverflowError,
    222                                 "input too long");
    223                 Py_DECREF(res);
    224                 return NULL;
    225             }
    226             while (reslen + slen + seplen >= sz) {
    227                 if (_PyString_Resize(&res, sz * 2) < 0)
    228                     return NULL;
    229                 sz *= 2;
    230                 p = PyString_AsString(res) + reslen;
    231             }
    232             if (i > 0) {
    233                 memcpy(p, sep, seplen);
    234                 p += seplen;
    235                 reslen += seplen;
    236             }
    237             memcpy(p, PyString_AS_STRING(item), slen);
    238             p += slen;
    239             reslen += slen;
    240         }
    241         _PyString_Resize(&res, reslen);
    242         return res;
    243     }
    244 
    245     if (seq->ob_type->tp_as_sequence == NULL ||
    246              (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
    247     {
    248         PyErr_SetString(PyExc_TypeError,
    249                         "first argument must be a sequence");
    250         return NULL;
    251     }
    252     /* This is now type safe */
    253     for (i = 0; i < seqlen; i++) {
    254         PyObject *item = getitemfunc(seq, i);
    255         if (!item || !PyString_Check(item)) {
    256             PyErr_SetString(PyExc_TypeError,
    257                      "first argument must be sequence of strings");
    258             Py_DECREF(res);
    259             Py_XDECREF(item);
    260             return NULL;
    261         }
    262         slen = PyString_GET_SIZE(item);
    263         if (slen > PY_SSIZE_T_MAX - reslen ||
    264             seplen > PY_SSIZE_T_MAX - reslen - seplen) {
    265             PyErr_SetString(PyExc_OverflowError,
    266                             "input too long");
    267             Py_DECREF(res);
    268             Py_XDECREF(item);
    269             return NULL;
    270         }
    271         while (reslen + slen + seplen >= sz) {
    272             if (_PyString_Resize(&res, sz * 2) < 0) {
    273                 Py_DECREF(item);
    274                 return NULL;
    275             }
    276             sz *= 2;
    277             p = PyString_AsString(res) + reslen;
    278         }
    279         if (i > 0) {
    280             memcpy(p, sep, seplen);
    281             p += seplen;
    282             reslen += seplen;
    283         }
    284         memcpy(p, PyString_AS_STRING(item), slen);
    285         p += slen;
    286         reslen += slen;
    287         Py_DECREF(item);
    288     }
    289     _PyString_Resize(&res, reslen);
    290     return res;
    291 }
    292 
    293 
    294 PyDoc_STRVAR(find__doc__,
    295 "find(s, sub [,start [,end]]) -> in\n"
    296 "\n"
    297 "Return the lowest index in s where substring sub is found,\n"
    298 "such that sub is contained within s[start,end].  Optional\n"
    299 "arguments start and end are interpreted as in slice notation.\n"
    300 "\n"
    301 "Return -1 on failure.");
    302 
    303 static PyObject *
    304 strop_find(PyObject *self, PyObject *args)
    305 {
    306     char *s, *sub;
    307     Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
    308 
    309     WARN;
    310     if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
    311         return NULL;
    312 
    313     if (last > len)
    314         last = len;
    315     if (last < 0)
    316         last += len;
    317     if (last < 0)
    318         last = 0;
    319     if (i < 0)
    320         i += len;
    321     if (i < 0)
    322         i = 0;
    323 
    324     if (n == 0 && i <= last)
    325         return PyInt_FromLong((long)i);
    326 
    327     last -= n;
    328     for (; i <= last; ++i)
    329         if (s[i] == sub[0] &&
    330             (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
    331             return PyInt_FromLong((long)i);
    332 
    333     return PyInt_FromLong(-1L);
    334 }
    335 
    336 
    337 PyDoc_STRVAR(rfind__doc__,
    338 "rfind(s, sub [,start [,end]]) -> int\n"
    339 "\n"
    340 "Return the highest index in s where substring sub is found,\n"
    341 "such that sub is contained within s[start,end].  Optional\n"
    342 "arguments start and end are interpreted as in slice notation.\n"
    343 "\n"
    344 "Return -1 on failure.");
    345 
    346 static PyObject *
    347 strop_rfind(PyObject *self, PyObject *args)
    348 {
    349     char *s, *sub;
    350     Py_ssize_t len, n, j;
    351     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
    352 
    353     WARN;
    354     if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
    355         return NULL;
    356 
    357     if (last > len)
    358         last = len;
    359     if (last < 0)
    360         last += len;
    361     if (last < 0)
    362         last = 0;
    363     if (i < 0)
    364         i += len;
    365     if (i < 0)
    366         i = 0;
    367 
    368     if (n == 0 && i <= last)
    369         return PyInt_FromLong((long)last);
    370 
    371     for (j = last-n; j >= i; --j)
    372         if (s[j] == sub[0] &&
    373             (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
    374             return PyInt_FromLong((long)j);
    375 
    376     return PyInt_FromLong(-1L);
    377 }
    378 
    379 
    380 static PyObject *
    381 do_strip(PyObject *args, int striptype)
    382 {
    383     char *s;
    384     Py_ssize_t len, i, j;
    385 
    386 
    387     if (PyString_AsStringAndSize(args, &s, &len))
    388         return NULL;
    389 
    390     i = 0;
    391     if (striptype != RIGHTSTRIP) {
    392         while (i < len && isspace(Py_CHARMASK(s[i]))) {
    393             i++;
    394         }
    395     }
    396 
    397     j = len;
    398     if (striptype != LEFTSTRIP) {
    399         do {
    400             j--;
    401         } while (j >= i && isspace(Py_CHARMASK(s[j])));
    402         j++;
    403     }
    404 
    405     if (i == 0 && j == len) {
    406         Py_INCREF(args);
    407         return args;
    408     }
    409     else
    410         return PyString_FromStringAndSize(s+i, j-i);
    411 }
    412 
    413 
    414 PyDoc_STRVAR(strip__doc__,
    415 "strip(s) -> string\n"
    416 "\n"
    417 "Return a copy of the string s with leading and trailing\n"
    418 "whitespace removed.");
    419 
    420 static PyObject *
    421 strop_strip(PyObject *self, PyObject *args)
    422 {
    423     WARN;
    424     return do_strip(args, BOTHSTRIP);
    425 }
    426 
    427 
    428 PyDoc_STRVAR(lstrip__doc__,
    429 "lstrip(s) -> string\n"
    430 "\n"
    431 "Return a copy of the string s with leading whitespace removed.");
    432 
    433 static PyObject *
    434 strop_lstrip(PyObject *self, PyObject *args)
    435 {
    436     WARN;
    437     return do_strip(args, LEFTSTRIP);
    438 }
    439 
    440 
    441 PyDoc_STRVAR(rstrip__doc__,
    442 "rstrip(s) -> string\n"
    443 "\n"
    444 "Return a copy of the string s with trailing whitespace removed.");
    445 
    446 static PyObject *
    447 strop_rstrip(PyObject *self, PyObject *args)
    448 {
    449     WARN;
    450     return do_strip(args, RIGHTSTRIP);
    451 }
    452 
    453 
    454 PyDoc_STRVAR(lower__doc__,
    455 "lower(s) -> string\n"
    456 "\n"
    457 "Return a copy of the string s converted to lowercase.");
    458 
    459 static PyObject *
    460 strop_lower(PyObject *self, PyObject *args)
    461 {
    462     char *s, *s_new;
    463     Py_ssize_t i, n;
    464     PyObject *newstr;
    465     int changed;
    466 
    467     WARN;
    468     if (PyString_AsStringAndSize(args, &s, &n))
    469         return NULL;
    470     newstr = PyString_FromStringAndSize(NULL, n);
    471     if (newstr == NULL)
    472         return NULL;
    473     s_new = PyString_AsString(newstr);
    474     changed = 0;
    475     for (i = 0; i < n; i++) {
    476         int c = Py_CHARMASK(*s++);
    477         if (isupper(c)) {
    478             changed = 1;
    479             *s_new = tolower(c);
    480         } else
    481             *s_new = c;
    482         s_new++;
    483     }
    484     if (!changed) {
    485         Py_DECREF(newstr);
    486         Py_INCREF(args);
    487         return args;
    488     }
    489     return newstr;
    490 }
    491 
    492 
    493 PyDoc_STRVAR(upper__doc__,
    494 "upper(s) -> string\n"
    495 "\n"
    496 "Return a copy of the string s converted to uppercase.");
    497 
    498 static PyObject *
    499 strop_upper(PyObject *self, PyObject *args)
    500 {
    501     char *s, *s_new;
    502     Py_ssize_t i, n;
    503     PyObject *newstr;
    504     int changed;
    505 
    506     WARN;
    507     if (PyString_AsStringAndSize(args, &s, &n))
    508         return NULL;
    509     newstr = PyString_FromStringAndSize(NULL, n);
    510     if (newstr == NULL)
    511         return NULL;
    512     s_new = PyString_AsString(newstr);
    513     changed = 0;
    514     for (i = 0; i < n; i++) {
    515         int c = Py_CHARMASK(*s++);
    516         if (islower(c)) {
    517             changed = 1;
    518             *s_new = toupper(c);
    519         } else
    520             *s_new = c;
    521         s_new++;
    522     }
    523     if (!changed) {
    524         Py_DECREF(newstr);
    525         Py_INCREF(args);
    526         return args;
    527     }
    528     return newstr;
    529 }
    530 
    531 
    532 PyDoc_STRVAR(capitalize__doc__,
    533 "capitalize(s) -> string\n"
    534 "\n"
    535 "Return a copy of the string s with only its first character\n"
    536 "capitalized.");
    537 
    538 static PyObject *
    539 strop_capitalize(PyObject *self, PyObject *args)
    540 {
    541     char *s, *s_new;
    542     Py_ssize_t i, n;
    543     PyObject *newstr;
    544     int changed;
    545 
    546     WARN;
    547     if (PyString_AsStringAndSize(args, &s, &n))
    548         return NULL;
    549     newstr = PyString_FromStringAndSize(NULL, n);
    550     if (newstr == NULL)
    551         return NULL;
    552     s_new = PyString_AsString(newstr);
    553     changed = 0;
    554     if (0 < n) {
    555         int c = Py_CHARMASK(*s++);
    556         if (islower(c)) {
    557             changed = 1;
    558             *s_new = toupper(c);
    559         } else
    560             *s_new = c;
    561         s_new++;
    562     }
    563     for (i = 1; i < n; i++) {
    564         int c = Py_CHARMASK(*s++);
    565         if (isupper(c)) {
    566             changed = 1;
    567             *s_new = tolower(c);
    568         } else
    569             *s_new = c;
    570         s_new++;
    571     }
    572     if (!changed) {
    573         Py_DECREF(newstr);
    574         Py_INCREF(args);
    575         return args;
    576     }
    577     return newstr;
    578 }
    579 
    580 
    581 PyDoc_STRVAR(expandtabs__doc__,
    582 "expandtabs(string, [tabsize]) -> string\n"
    583 "\n"
    584 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
    585 "depending on the current column and the given tab size (default 8).\n"
    586 "The column number is reset to zero after each newline occurring in the\n"
    587 "string.  This doesn't understand other non-printing characters.");
    588 
    589 static PyObject *
    590 strop_expandtabs(PyObject *self, PyObject *args)
    591 {
    592     /* Original by Fredrik Lundh */
    593     char* e;
    594     char* p;
    595     char* q;
    596     Py_ssize_t i, j;
    597     PyObject* out;
    598     char* string;
    599     Py_ssize_t stringlen;
    600     int tabsize = 8;
    601 
    602     WARN;
    603     /* Get arguments */
    604     if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
    605         return NULL;
    606     if (tabsize < 1) {
    607         PyErr_SetString(PyExc_ValueError,
    608                         "tabsize must be at least 1");
    609         return NULL;
    610     }
    611 
    612     /* First pass: determine size of output string */
    613     i = j = 0; /* j: current column; i: total of previous lines */
    614     e = string + stringlen;
    615     for (p = string; p < e; p++) {
    616         if (*p == '\t') {
    617             Py_ssize_t incr = tabsize - (j%tabsize);
    618             if (j > PY_SSIZE_T_MAX - incr)
    619                 goto overflow;
    620             j += incr;
    621         } else {
    622             if (j > PY_SSIZE_T_MAX - 1)
    623                 goto overflow;
    624             j++;
    625             if (*p == '\n') {
    626                 if (i > PY_SSIZE_T_MAX - j)
    627                     goto overflow;
    628                 i += j;
    629                 j = 0;
    630             }
    631         }
    632     }
    633 
    634     if (i > PY_SSIZE_T_MAX - j)
    635         goto overflow;
    636 
    637     /* Second pass: create output string and fill it */
    638     out = PyString_FromStringAndSize(NULL, i+j);
    639     if (out == NULL)
    640         return NULL;
    641 
    642     i = 0;
    643     q = PyString_AS_STRING(out);
    644 
    645     for (p = string; p < e; p++) {
    646         if (*p == '\t') {
    647             j = tabsize - (i%tabsize);
    648             i += j;
    649             while (j-- > 0)
    650                 *q++ = ' ';
    651         } else {
    652             *q++ = *p;
    653             i++;
    654             if (*p == '\n')
    655                 i = 0;
    656         }
    657     }
    658 
    659     return out;
    660   overflow:
    661     PyErr_SetString(PyExc_OverflowError, "result is too long");
    662     return NULL;
    663 }
    664 
    665 
    666 PyDoc_STRVAR(count__doc__,
    667 "count(s, sub[, start[, end]]) -> int\n"
    668 "\n"
    669 "Return the number of occurrences of substring sub in string\n"
    670 "s[start:end].  Optional arguments start and end are\n"
    671 "interpreted as in slice notation.");
    672 
    673 static PyObject *
    674 strop_count(PyObject *self, PyObject *args)
    675 {
    676     char *s, *sub;
    677     Py_ssize_t len, n;
    678     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
    679     Py_ssize_t m, r;
    680 
    681     WARN;
    682     if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
    683         return NULL;
    684     if (last > len)
    685         last = len;
    686     if (last < 0)
    687         last += len;
    688     if (last < 0)
    689         last = 0;
    690     if (i < 0)
    691         i += len;
    692     if (i < 0)
    693         i = 0;
    694     m = last + 1 - n;
    695     if (n == 0)
    696         return PyInt_FromLong((long) (m-i));
    697 
    698     r = 0;
    699     while (i < m) {
    700         if (!memcmp(s+i, sub, n)) {
    701             r++;
    702             i += n;
    703         } else {
    704             i++;
    705         }
    706     }
    707     return PyInt_FromLong((long) r);
    708 }
    709 
    710 
    711 PyDoc_STRVAR(swapcase__doc__,
    712 "swapcase(s) -> string\n"
    713 "\n"
    714 "Return a copy of the string s with upper case characters\n"
    715 "converted to lowercase and vice versa.");
    716 
    717 static PyObject *
    718 strop_swapcase(PyObject *self, PyObject *args)
    719 {
    720     char *s, *s_new;
    721     Py_ssize_t i, n;
    722     PyObject *newstr;
    723     int changed;
    724 
    725     WARN;
    726     if (PyString_AsStringAndSize(args, &s, &n))
    727         return NULL;
    728     newstr = PyString_FromStringAndSize(NULL, n);
    729     if (newstr == NULL)
    730         return NULL;
    731     s_new = PyString_AsString(newstr);
    732     changed = 0;
    733     for (i = 0; i < n; i++) {
    734         int c = Py_CHARMASK(*s++);
    735         if (islower(c)) {
    736             changed = 1;
    737             *s_new = toupper(c);
    738         }
    739         else if (isupper(c)) {
    740             changed = 1;
    741             *s_new = tolower(c);
    742         }
    743         else
    744             *s_new = c;
    745         s_new++;
    746     }
    747     if (!changed) {
    748         Py_DECREF(newstr);
    749         Py_INCREF(args);
    750         return args;
    751     }
    752     return newstr;
    753 }
    754 
    755 
    756 PyDoc_STRVAR(atoi__doc__,
    757 "atoi(s [,base]) -> int\n"
    758 "\n"
    759 "Return the integer represented by the string s in the given\n"
    760 "base, which defaults to 10.  The string s must consist of one\n"
    761 "or more digits, possibly preceded by a sign.  If base is 0, it\n"
    762 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
    763 "0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
    764 "accepted.");
    765 
    766 static PyObject *
    767 strop_atoi(PyObject *self, PyObject *args)
    768 {
    769     char *s, *end;
    770     int base = 10;
    771     long x;
    772     char buffer[256]; /* For errors */
    773 
    774     WARN;
    775     if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
    776         return NULL;
    777 
    778     if ((base != 0 && base < 2) || base > 36) {
    779         PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
    780         return NULL;
    781     }
    782 
    783     while (*s && isspace(Py_CHARMASK(*s)))
    784         s++;
    785     errno = 0;
    786     if (base == 0 && s[0] == '0')
    787         x = (long) PyOS_strtoul(s, &end, base);
    788     else
    789         x = PyOS_strtol(s, &end, base);
    790     if (end == s || !isalnum(Py_CHARMASK(end[-1])))
    791         goto bad;
    792     while (*end && isspace(Py_CHARMASK(*end)))
    793         end++;
    794     if (*end != '\0') {
    795   bad:
    796         PyOS_snprintf(buffer, sizeof(buffer),
    797                       "invalid literal for atoi(): %.200s", s);
    798         PyErr_SetString(PyExc_ValueError, buffer);
    799         return NULL;
    800     }
    801     else if (errno != 0) {
    802         PyOS_snprintf(buffer, sizeof(buffer),
    803                       "atoi() literal too large: %.200s", s);
    804         PyErr_SetString(PyExc_ValueError, buffer);
    805         return NULL;
    806     }
    807     return PyInt_FromLong(x);
    808 }
    809 
    810 
    811 PyDoc_STRVAR(atol__doc__,
    812 "atol(s [,base]) -> long\n"
    813 "\n"
    814 "Return the long integer represented by the string s in the\n"
    815 "given base, which defaults to 10.  The string s must consist\n"
    816 "of one or more digits, possibly preceded by a sign.  If base\n"
    817 "is 0, it is chosen from the leading characters of s, 0 for\n"
    818 "octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
    819 "0x or 0X is accepted.  A trailing L or l is not accepted,\n"
    820 "unless base is 0.");
    821 
    822 static PyObject *
    823 strop_atol(PyObject *self, PyObject *args)
    824 {
    825     char *s, *end;
    826     int base = 10;
    827     PyObject *x;
    828     char buffer[256]; /* For errors */
    829 
    830     WARN;
    831     if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
    832         return NULL;
    833 
    834     if ((base != 0 && base < 2) || base > 36) {
    835         PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
    836         return NULL;
    837     }
    838 
    839     while (*s && isspace(Py_CHARMASK(*s)))
    840         s++;
    841     if (s[0] == '\0') {
    842         PyErr_SetString(PyExc_ValueError, "empty string for atol()");
    843         return NULL;
    844     }
    845     x = PyLong_FromString(s, &end, base);
    846     if (x == NULL)
    847         return NULL;
    848     if (base == 0 && (*end == 'l' || *end == 'L'))
    849         end++;
    850     while (*end && isspace(Py_CHARMASK(*end)))
    851         end++;
    852     if (*end != '\0') {
    853         PyOS_snprintf(buffer, sizeof(buffer),
    854                       "invalid literal for atol(): %.200s", s);
    855         PyErr_SetString(PyExc_ValueError, buffer);
    856         Py_DECREF(x);
    857         return NULL;
    858     }
    859     return x;
    860 }
    861 
    862 
    863 PyDoc_STRVAR(atof__doc__,
    864 "atof(s) -> float\n"
    865 "\n"
    866 "Return the floating point number represented by the string s.");
    867 
    868 static PyObject *
    869 strop_atof(PyObject *self, PyObject *args)
    870 {
    871     char *s, *end;
    872     double x;
    873     char buffer[256]; /* For errors */
    874 
    875     WARN;
    876     if (!PyArg_ParseTuple(args, "s:atof", &s))
    877         return NULL;
    878     while (*s && isspace(Py_CHARMASK(*s)))
    879         s++;
    880     if (s[0] == '\0') {
    881         PyErr_SetString(PyExc_ValueError, "empty string for atof()");
    882         return NULL;
    883     }
    884 
    885     PyFPE_START_PROTECT("strop_atof", return 0)
    886     x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
    887     PyFPE_END_PROTECT(x)
    888     if (x == -1 && PyErr_Occurred())
    889         return NULL;
    890     while (*end && isspace(Py_CHARMASK(*end)))
    891         end++;
    892     if (*end != '\0') {
    893         PyOS_snprintf(buffer, sizeof(buffer),
    894                       "invalid literal for atof(): %.200s", s);
    895         PyErr_SetString(PyExc_ValueError, buffer);
    896         return NULL;
    897     }
    898     return PyFloat_FromDouble(x);
    899 }
    900 
    901 
    902 PyDoc_STRVAR(maketrans__doc__,
    903 "maketrans(frm, to) -> string\n"
    904 "\n"
    905 "Return a translation table (a string of 256 bytes long)\n"
    906 "suitable for use in string.translate.  The strings frm and to\n"
    907 "must be of the same length.");
    908 
    909 static PyObject *
    910 strop_maketrans(PyObject *self, PyObject *args)
    911 {
    912     unsigned char *c, *from=NULL, *to=NULL;
    913     Py_ssize_t i, fromlen=0, tolen=0;
    914     PyObject *result;
    915 
    916     if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
    917         return NULL;
    918 
    919     if (fromlen != tolen) {
    920         PyErr_SetString(PyExc_ValueError,
    921                         "maketrans arguments must have same length");
    922         return NULL;
    923     }
    924 
    925     result = PyString_FromStringAndSize((char *)NULL, 256);
    926     if (result == NULL)
    927         return NULL;
    928     c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
    929     for (i = 0; i < 256; i++)
    930         c[i]=(unsigned char)i;
    931     for (i = 0; i < fromlen; i++)
    932         c[from[i]]=to[i];
    933 
    934     return result;
    935 }
    936 
    937 
    938 PyDoc_STRVAR(translate__doc__,
    939 "translate(s,table [,deletechars]) -> string\n"
    940 "\n"
    941 "Return a copy of the string s, where all characters occurring\n"
    942 "in the optional argument deletechars are removed, and the\n"
    943 "remaining characters have been mapped through the given\n"
    944 "translation table, which must be a string of length 256.");
    945 
    946 static PyObject *
    947 strop_translate(PyObject *self, PyObject *args)
    948 {
    949     register char *input, *table, *output;
    950     Py_ssize_t i;
    951     int c, changed = 0;
    952     PyObject *input_obj;
    953     char *table1, *output_start, *del_table=NULL;
    954     Py_ssize_t inlen, tablen, dellen = 0;
    955     PyObject *result;
    956     int trans_table[256];
    957 
    958     WARN;
    959     if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
    960                           &table1, &tablen, &del_table, &dellen))
    961         return NULL;
    962     if (tablen != 256) {
    963         PyErr_SetString(PyExc_ValueError,
    964                       "translation table must be 256 characters long");
    965         return NULL;
    966     }
    967 
    968     table = table1;
    969     inlen = PyString_GET_SIZE(input_obj);
    970     result = PyString_FromStringAndSize((char *)NULL, inlen);
    971     if (result == NULL)
    972         return NULL;
    973     output_start = output = PyString_AsString(result);
    974     input = PyString_AsString(input_obj);
    975 
    976     if (dellen == 0) {
    977         /* If no deletions are required, use faster code */
    978         for (i = inlen; --i >= 0; ) {
    979             c = Py_CHARMASK(*input++);
    980             if (Py_CHARMASK((*output++ = table[c])) != c)
    981                 changed = 1;
    982         }
    983         if (changed)
    984             return result;
    985         Py_DECREF(result);
    986         Py_INCREF(input_obj);
    987         return input_obj;
    988     }
    989 
    990     for (i = 0; i < 256; i++)
    991         trans_table[i] = Py_CHARMASK(table[i]);
    992 
    993     for (i = 0; i < dellen; i++)
    994         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
    995 
    996     for (i = inlen; --i >= 0; ) {
    997         c = Py_CHARMASK(*input++);
    998         if (trans_table[c] != -1)
    999             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
   1000                 continue;
   1001         changed = 1;
   1002     }
   1003     if (!changed) {
   1004         Py_DECREF(result);
   1005         Py_INCREF(input_obj);
   1006         return input_obj;
   1007     }
   1008     /* Fix the size of the resulting string */
   1009     if (inlen > 0)
   1010         _PyString_Resize(&result, output - output_start);
   1011     return result;
   1012 }
   1013 
   1014 
   1015 /* What follows is used for implementing replace().  Perry Stoll. */
   1016 
   1017 /*
   1018   mymemfind
   1019 
   1020   strstr replacement for arbitrary blocks of memory.
   1021 
   1022   Locates the first occurrence in the memory pointed to by MEM of the
   1023   contents of memory pointed to by PAT.  Returns the index into MEM if
   1024   found, or -1 if not found.  If len of PAT is greater than length of
   1025   MEM, the function returns -1.
   1026 */
   1027 static Py_ssize_t
   1028 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
   1029 {
   1030     register Py_ssize_t ii;
   1031 
   1032     /* pattern can not occur in the last pat_len-1 chars */
   1033     len -= pat_len;
   1034 
   1035     for (ii = 0; ii <= len; ii++) {
   1036         if (mem[ii] == pat[0] &&
   1037             (pat_len == 1 ||
   1038              memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
   1039             return ii;
   1040         }
   1041     }
   1042     return -1;
   1043 }
   1044 
   1045 /*
   1046   mymemcnt
   1047 
   1048    Return the number of distinct times PAT is found in MEM.
   1049    meaning mem=1111 and pat==11 returns 2.
   1050        mem=11111 and pat==11 also return 2.
   1051  */
   1052 static Py_ssize_t
   1053 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
   1054 {
   1055     register Py_ssize_t offset = 0;
   1056     Py_ssize_t nfound = 0;
   1057 
   1058     while (len >= 0) {
   1059         offset = mymemfind(mem, len, pat, pat_len);
   1060         if (offset == -1)
   1061             break;
   1062         mem += offset + pat_len;
   1063         len -= offset + pat_len;
   1064         nfound++;
   1065     }
   1066     return nfound;
   1067 }
   1068 
   1069 /*
   1070    mymemreplace
   1071 
   1072    Return a string in which all occurrences of PAT in memory STR are
   1073    replaced with SUB.
   1074 
   1075    If length of PAT is less than length of STR or there are no occurrences
   1076    of PAT in STR, then the original string is returned. Otherwise, a new
   1077    string is allocated here and returned.
   1078 
   1079    on return, out_len is:
   1080        the length of output string, or
   1081        -1 if the input string is returned, or
   1082        unchanged if an error occurs (no memory).
   1083 
   1084    return value is:
   1085        the new string allocated locally, or
   1086        NULL if an error occurred.
   1087 */
   1088 static char *
   1089 mymemreplace(const char *str, Py_ssize_t len,           /* input string */
   1090          const char *pat, Py_ssize_t pat_len,           /* pattern string to find */
   1091          const char *sub, Py_ssize_t sub_len,           /* substitution string */
   1092          Py_ssize_t count,                              /* number of replacements */
   1093          Py_ssize_t *out_len)
   1094 {
   1095     char *out_s;
   1096     char *new_s;
   1097     Py_ssize_t nfound, offset, new_len, delta_len, abs_delta;
   1098 
   1099     if (len == 0 || pat_len > len)
   1100         goto return_same;
   1101 
   1102     /* find length of output string */
   1103     nfound = mymemcnt(str, len, pat, pat_len);
   1104     if (count < 0)
   1105         count = PY_SSIZE_T_MAX;
   1106     else if (nfound > count)
   1107         nfound = count;
   1108     if (nfound == 0)
   1109         goto return_same;
   1110 
   1111     delta_len = sub_len - pat_len;
   1112     abs_delta = (delta_len < 0) ? -delta_len : delta_len;
   1113     if (PY_SSIZE_T_MAX/nfound < abs_delta)
   1114         return NULL;
   1115     delta_len *= nfound;
   1116     if (PY_SSIZE_T_MAX - len < delta_len)
   1117         return NULL;
   1118     new_len = len + delta_len;
   1119     if (new_len == 0) {
   1120         /* Have to allocate something for the caller to free(). */
   1121         out_s = (char *)PyMem_MALLOC(1);
   1122         if (out_s == NULL)
   1123             return NULL;
   1124         out_s[0] = '\0';
   1125     }
   1126     else {
   1127         assert(new_len > 0);
   1128         new_s = (char *)PyMem_MALLOC(new_len);
   1129         if (new_s == NULL)
   1130             return NULL;
   1131         out_s = new_s;
   1132 
   1133         for (; count > 0 && len > 0; --count) {
   1134             /* find index of next instance of pattern */
   1135             offset = mymemfind(str, len, pat, pat_len);
   1136             if (offset == -1)
   1137                 break;
   1138 
   1139             /* copy non matching part of input string */
   1140             memcpy(new_s, str, offset);
   1141             str += offset + pat_len;
   1142             len -= offset + pat_len;
   1143 
   1144             /* copy substitute into the output string */
   1145             new_s += offset;
   1146             memcpy(new_s, sub, sub_len);
   1147             new_s += sub_len;
   1148         }
   1149         /* copy any remaining values into output string */
   1150         if (len > 0)
   1151             memcpy(new_s, str, len);
   1152     }
   1153     *out_len = new_len;
   1154     return out_s;
   1155 
   1156   return_same:
   1157     *out_len = -1;
   1158     return (char *)str; /* cast away const */
   1159 }
   1160 
   1161 
   1162 PyDoc_STRVAR(replace__doc__,
   1163 "replace (str, old, new[, maxsplit]) -> string\n"
   1164 "\n"
   1165 "Return a copy of string str with all occurrences of substring\n"
   1166 "old replaced by new. If the optional argument maxsplit is\n"
   1167 "given, only the first maxsplit occurrences are replaced.");
   1168 
   1169 static PyObject *
   1170 strop_replace(PyObject *self, PyObject *args)
   1171 {
   1172     char *str, *pat,*sub,*new_s;
   1173     Py_ssize_t len,pat_len,sub_len,out_len;
   1174     Py_ssize_t count = -1;
   1175     PyObject *newstr;
   1176 
   1177     WARN;
   1178     if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
   1179                           &str, &len, &pat, &pat_len, &sub, &sub_len,
   1180                           &count))
   1181         return NULL;
   1182     if (pat_len <= 0) {
   1183         PyErr_SetString(PyExc_ValueError, "empty pattern string");
   1184         return NULL;
   1185     }
   1186     /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
   1187      * current (2.1) string.py and string methods.  Preserve this for
   1188      * ... well, hard to say for what <wink>.
   1189      */
   1190     if (count == 0)
   1191         count = -1;
   1192     new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
   1193     if (new_s == NULL) {
   1194         PyErr_NoMemory();
   1195         return NULL;
   1196     }
   1197     if (out_len == -1) {
   1198         /* we're returning another reference to the input string */
   1199         newstr = PyTuple_GetItem(args, 0);
   1200         Py_XINCREF(newstr);
   1201     }
   1202     else {
   1203         newstr = PyString_FromStringAndSize(new_s, out_len);
   1204         PyMem_FREE(new_s);
   1205     }
   1206     return newstr;
   1207 }
   1208 
   1209 
   1210 /* List of functions defined in the module */
   1211 
   1212 static PyMethodDef
   1213 strop_methods[] = {
   1214     {"atof",            strop_atof,        METH_VARARGS, atof__doc__},
   1215     {"atoi",            strop_atoi,        METH_VARARGS, atoi__doc__},
   1216     {"atol",            strop_atol,        METH_VARARGS, atol__doc__},
   1217     {"capitalize",      strop_capitalize,  METH_O,       capitalize__doc__},
   1218     {"count",           strop_count,       METH_VARARGS, count__doc__},
   1219     {"expandtabs",      strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
   1220     {"find",            strop_find,        METH_VARARGS, find__doc__},
   1221     {"join",            strop_joinfields,  METH_VARARGS, joinfields__doc__},
   1222     {"joinfields",      strop_joinfields,  METH_VARARGS, joinfields__doc__},
   1223     {"lstrip",          strop_lstrip,      METH_O,       lstrip__doc__},
   1224     {"lower",           strop_lower,       METH_O,       lower__doc__},
   1225     {"maketrans",       strop_maketrans,   METH_VARARGS, maketrans__doc__},
   1226     {"replace",         strop_replace,     METH_VARARGS, replace__doc__},
   1227     {"rfind",           strop_rfind,       METH_VARARGS, rfind__doc__},
   1228     {"rstrip",          strop_rstrip,      METH_O,       rstrip__doc__},
   1229     {"split",           strop_splitfields, METH_VARARGS, splitfields__doc__},
   1230     {"splitfields",     strop_splitfields, METH_VARARGS, splitfields__doc__},
   1231     {"strip",           strop_strip,       METH_O,       strip__doc__},
   1232     {"swapcase",        strop_swapcase,    METH_O,       swapcase__doc__},
   1233     {"translate",       strop_translate,   METH_VARARGS, translate__doc__},
   1234     {"upper",           strop_upper,       METH_O,       upper__doc__},
   1235     {NULL,              NULL}   /* sentinel */
   1236 };
   1237 
   1238 
   1239 PyMODINIT_FUNC
   1240 initstrop(void)
   1241 {
   1242     PyObject *m, *s;
   1243     char buf[256];
   1244     int c, n;
   1245     m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
   1246                        (PyObject*)NULL, PYTHON_API_VERSION);
   1247     if (m == NULL)
   1248         return;
   1249 
   1250     /* Create 'whitespace' object */
   1251     n = 0;
   1252     for (c = 0; c < 256; c++) {
   1253         if (isspace(c))
   1254             buf[n++] = c;
   1255     }
   1256     s = PyString_FromStringAndSize(buf, n);
   1257     if (s)
   1258         PyModule_AddObject(m, "whitespace", s);
   1259 
   1260     /* Create 'lowercase' object */
   1261     n = 0;
   1262     for (c = 0; c < 256; c++) {
   1263         if (islower(c))
   1264             buf[n++] = c;
   1265     }
   1266     s = PyString_FromStringAndSize(buf, n);
   1267     if (s)
   1268         PyModule_AddObject(m, "lowercase", s);
   1269 
   1270     /* Create 'uppercase' object */
   1271     n = 0;
   1272     for (c = 0; c < 256; c++) {
   1273         if (isupper(c))
   1274             buf[n++] = c;
   1275     }
   1276     s = PyString_FromStringAndSize(buf, n);
   1277     if (s)
   1278         PyModule_AddObject(m, "uppercase", s);
   1279 }
   1280