1 /* String (str/bytes) object implementation */ 2 3 #define PY_SSIZE_T_CLEAN 4 5 #include "Python.h" 6 #include <ctype.h> 7 #include <stddef.h> 8 9 #ifdef COUNT_ALLOCS 10 Py_ssize_t null_strings, one_strings; 11 #endif 12 13 static PyStringObject *characters[UCHAR_MAX + 1]; 14 static PyStringObject *nullstring; 15 16 /* This dictionary holds all interned strings. Note that references to 17 strings in this dictionary are *not* counted in the string's ob_refcnt. 18 When the interned string reaches a refcnt of 0 the string deallocation 19 function will delete the reference from this dictionary. 20 21 Another way to look at this is that to say that the actual reference 22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0) 23 */ 24 static PyObject *interned; 25 26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation 27 for a string of length n should request PyStringObject_SIZE + n bytes. 28 29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves 30 3 bytes per string allocation on a typical system. 31 */ 32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1) 33 34 /* 35 For PyString_FromString(), the parameter `str' points to a null-terminated 36 string containing exactly `size' bytes. 37 38 For PyString_FromStringAndSize(), the parameter the parameter `str' is 39 either NULL or else points to a string containing at least `size' bytes. 40 For PyString_FromStringAndSize(), the string in the `str' parameter does 41 not have to be null-terminated. (Therefore it is safe to construct a 42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.) 43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1' 44 bytes (setting the last byte to the null terminating character) and you can 45 fill in the data yourself. If `str' is non-NULL then the resulting 46 PyString object must be treated as immutable and you must not fill in nor 47 alter the data yourself, since the strings may be shared. 48 49 The PyObject member `op->ob_size', which denotes the number of "extra 50 items" in a variable-size object, will contain the number of bytes 51 allocated for string data, not counting the null terminating character. 52 It is therefore equal to the `size' parameter (for 53 PyString_FromStringAndSize()) or the length of the string in the `str' 54 parameter (for PyString_FromString()). 55 */ 56 PyObject * 57 PyString_FromStringAndSize(const char *str, Py_ssize_t size) 58 { 59 register PyStringObject *op; 60 if (size < 0) { 61 PyErr_SetString(PyExc_SystemError, 62 "Negative size passed to PyString_FromStringAndSize"); 63 return NULL; 64 } 65 if (size == 0 && (op = nullstring) != NULL) { 66 #ifdef COUNT_ALLOCS 67 null_strings++; 68 #endif 69 Py_INCREF(op); 70 return (PyObject *)op; 71 } 72 if (size == 1 && str != NULL && 73 (op = characters[*str & UCHAR_MAX]) != NULL) 74 { 75 #ifdef COUNT_ALLOCS 76 one_strings++; 77 #endif 78 Py_INCREF(op); 79 return (PyObject *)op; 80 } 81 82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { 83 PyErr_SetString(PyExc_OverflowError, "string is too large"); 84 return NULL; 85 } 86 87 /* Inline PyObject_NewVar */ 88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); 89 if (op == NULL) 90 return PyErr_NoMemory(); 91 PyObject_INIT_VAR(op, &PyString_Type, size); 92 op->ob_shash = -1; 93 op->ob_sstate = SSTATE_NOT_INTERNED; 94 if (str != NULL) 95 Py_MEMCPY(op->ob_sval, str, size); 96 op->ob_sval[size] = '\0'; 97 /* share short strings */ 98 if (size == 0) { 99 PyObject *t = (PyObject *)op; 100 PyString_InternInPlace(&t); 101 op = (PyStringObject *)t; 102 nullstring = op; 103 Py_INCREF(op); 104 } else if (size == 1 && str != NULL) { 105 PyObject *t = (PyObject *)op; 106 PyString_InternInPlace(&t); 107 op = (PyStringObject *)t; 108 characters[*str & UCHAR_MAX] = op; 109 Py_INCREF(op); 110 } 111 return (PyObject *) op; 112 } 113 114 PyObject * 115 PyString_FromString(const char *str) 116 { 117 register size_t size; 118 register PyStringObject *op; 119 120 assert(str != NULL); 121 size = strlen(str); 122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { 123 PyErr_SetString(PyExc_OverflowError, 124 "string is too long for a Python string"); 125 return NULL; 126 } 127 if (size == 0 && (op = nullstring) != NULL) { 128 #ifdef COUNT_ALLOCS 129 null_strings++; 130 #endif 131 Py_INCREF(op); 132 return (PyObject *)op; 133 } 134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { 135 #ifdef COUNT_ALLOCS 136 one_strings++; 137 #endif 138 Py_INCREF(op); 139 return (PyObject *)op; 140 } 141 142 /* Inline PyObject_NewVar */ 143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); 144 if (op == NULL) 145 return PyErr_NoMemory(); 146 PyObject_INIT_VAR(op, &PyString_Type, size); 147 op->ob_shash = -1; 148 op->ob_sstate = SSTATE_NOT_INTERNED; 149 Py_MEMCPY(op->ob_sval, str, size+1); 150 /* share short strings */ 151 if (size == 0) { 152 PyObject *t = (PyObject *)op; 153 PyString_InternInPlace(&t); 154 op = (PyStringObject *)t; 155 nullstring = op; 156 Py_INCREF(op); 157 } else if (size == 1) { 158 PyObject *t = (PyObject *)op; 159 PyString_InternInPlace(&t); 160 op = (PyStringObject *)t; 161 characters[*str & UCHAR_MAX] = op; 162 Py_INCREF(op); 163 } 164 return (PyObject *) op; 165 } 166 167 PyObject * 168 PyString_FromFormatV(const char *format, va_list vargs) 169 { 170 va_list count; 171 Py_ssize_t n = 0; 172 const char* f; 173 char *s; 174 PyObject* string; 175 176 #ifdef VA_LIST_IS_ARRAY 177 Py_MEMCPY(count, vargs, sizeof(va_list)); 178 #else 179 #ifdef __va_copy 180 __va_copy(count, vargs); 181 #else 182 count = vargs; 183 #endif 184 #endif 185 /* step 1: figure out how large a buffer we need */ 186 for (f = format; *f; f++) { 187 if (*f == '%') { 188 #ifdef HAVE_LONG_LONG 189 int longlongflag = 0; 190 #endif 191 const char* p = f; 192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) 193 ; 194 195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since 196 * they don't affect the amount of space we reserve. 197 */ 198 if (*f == 'l') { 199 if (f[1] == 'd' || f[1] == 'u') { 200 ++f; 201 } 202 #ifdef HAVE_LONG_LONG 203 else if (f[1] == 'l' && 204 (f[2] == 'd' || f[2] == 'u')) { 205 longlongflag = 1; 206 f += 2; 207 } 208 #endif 209 } 210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { 211 ++f; 212 } 213 214 switch (*f) { 215 case 'c': 216 (void)va_arg(count, int); 217 /* fall through... */ 218 case '%': 219 n++; 220 break; 221 case 'd': case 'u': case 'i': case 'x': 222 (void) va_arg(count, int); 223 #ifdef HAVE_LONG_LONG 224 /* Need at most 225 ceil(log10(256)*SIZEOF_LONG_LONG) digits, 226 plus 1 for the sign. 53/22 is an upper 227 bound for log10(256). */ 228 if (longlongflag) 229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22; 230 else 231 #endif 232 /* 20 bytes is enough to hold a 64-bit 233 integer. Decimal takes the most 234 space. This isn't enough for 235 octal. */ 236 n += 20; 237 238 break; 239 case 's': 240 s = va_arg(count, char*); 241 n += strlen(s); 242 break; 243 case 'p': 244 (void) va_arg(count, int); 245 /* maximum 64-bit pointer representation: 246 * 0xffffffffffffffff 247 * so 19 characters is enough. 248 * XXX I count 18 -- what's the extra for? 249 */ 250 n += 19; 251 break; 252 default: 253 /* if we stumble upon an unknown 254 formatting code, copy the rest of 255 the format string to the output 256 string. (we cannot just skip the 257 code, since there's no way to know 258 what's in the argument list) */ 259 n += strlen(p); 260 goto expand; 261 } 262 } else 263 n++; 264 } 265 expand: 266 /* step 2: fill the buffer */ 267 /* Since we've analyzed how much space we need for the worst case, 268 use sprintf directly instead of the slower PyOS_snprintf. */ 269 string = PyString_FromStringAndSize(NULL, n); 270 if (!string) 271 return NULL; 272 273 s = PyString_AsString(string); 274 275 for (f = format; *f; f++) { 276 if (*f == '%') { 277 const char* p = f++; 278 Py_ssize_t i; 279 int longflag = 0; 280 #ifdef HAVE_LONG_LONG 281 int longlongflag = 0; 282 #endif 283 int size_tflag = 0; 284 /* parse the width.precision part (we're only 285 interested in the precision value, if any) */ 286 n = 0; 287 while (isdigit(Py_CHARMASK(*f))) 288 n = (n*10) + *f++ - '0'; 289 if (*f == '.') { 290 f++; 291 n = 0; 292 while (isdigit(Py_CHARMASK(*f))) 293 n = (n*10) + *f++ - '0'; 294 } 295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) 296 f++; 297 /* Handle %ld, %lu, %lld and %llu. */ 298 if (*f == 'l') { 299 if (f[1] == 'd' || f[1] == 'u') { 300 longflag = 1; 301 ++f; 302 } 303 #ifdef HAVE_LONG_LONG 304 else if (f[1] == 'l' && 305 (f[2] == 'd' || f[2] == 'u')) { 306 longlongflag = 1; 307 f += 2; 308 } 309 #endif 310 } 311 /* handle the size_t flag. */ 312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { 313 size_tflag = 1; 314 ++f; 315 } 316 317 switch (*f) { 318 case 'c': 319 *s++ = va_arg(vargs, int); 320 break; 321 case 'd': 322 if (longflag) 323 sprintf(s, "%ld", va_arg(vargs, long)); 324 #ifdef HAVE_LONG_LONG 325 else if (longlongflag) 326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d", 327 va_arg(vargs, PY_LONG_LONG)); 328 #endif 329 else if (size_tflag) 330 sprintf(s, "%" PY_FORMAT_SIZE_T "d", 331 va_arg(vargs, Py_ssize_t)); 332 else 333 sprintf(s, "%d", va_arg(vargs, int)); 334 s += strlen(s); 335 break; 336 case 'u': 337 if (longflag) 338 sprintf(s, "%lu", 339 va_arg(vargs, unsigned long)); 340 #ifdef HAVE_LONG_LONG 341 else if (longlongflag) 342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u", 343 va_arg(vargs, PY_LONG_LONG)); 344 #endif 345 else if (size_tflag) 346 sprintf(s, "%" PY_FORMAT_SIZE_T "u", 347 va_arg(vargs, size_t)); 348 else 349 sprintf(s, "%u", 350 va_arg(vargs, unsigned int)); 351 s += strlen(s); 352 break; 353 case 'i': 354 sprintf(s, "%i", va_arg(vargs, int)); 355 s += strlen(s); 356 break; 357 case 'x': 358 sprintf(s, "%x", va_arg(vargs, int)); 359 s += strlen(s); 360 break; 361 case 's': 362 p = va_arg(vargs, char*); 363 i = strlen(p); 364 if (n > 0 && i > n) 365 i = n; 366 Py_MEMCPY(s, p, i); 367 s += i; 368 break; 369 case 'p': 370 sprintf(s, "%p", va_arg(vargs, void*)); 371 /* %p is ill-defined: ensure leading 0x. */ 372 if (s[1] == 'X') 373 s[1] = 'x'; 374 else if (s[1] != 'x') { 375 memmove(s+2, s, strlen(s)+1); 376 s[0] = '0'; 377 s[1] = 'x'; 378 } 379 s += strlen(s); 380 break; 381 case '%': 382 *s++ = '%'; 383 break; 384 default: 385 strcpy(s, p); 386 s += strlen(s); 387 goto end; 388 } 389 } else 390 *s++ = *f; 391 } 392 393 end: 394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string))) 395 return NULL; 396 return string; 397 } 398 399 PyObject * 400 PyString_FromFormat(const char *format, ...) 401 { 402 PyObject* ret; 403 va_list vargs; 404 405 #ifdef HAVE_STDARG_PROTOTYPES 406 va_start(vargs, format); 407 #else 408 va_start(vargs); 409 #endif 410 ret = PyString_FromFormatV(format, vargs); 411 va_end(vargs); 412 return ret; 413 } 414 415 416 PyObject *PyString_Decode(const char *s, 417 Py_ssize_t size, 418 const char *encoding, 419 const char *errors) 420 { 421 PyObject *v, *str; 422 423 str = PyString_FromStringAndSize(s, size); 424 if (str == NULL) 425 return NULL; 426 v = PyString_AsDecodedString(str, encoding, errors); 427 Py_DECREF(str); 428 return v; 429 } 430 431 PyObject *PyString_AsDecodedObject(PyObject *str, 432 const char *encoding, 433 const char *errors) 434 { 435 PyObject *v; 436 437 if (!PyString_Check(str)) { 438 PyErr_BadArgument(); 439 goto onError; 440 } 441 442 if (encoding == NULL) { 443 #ifdef Py_USING_UNICODE 444 encoding = PyUnicode_GetDefaultEncoding(); 445 #else 446 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 447 goto onError; 448 #endif 449 } 450 451 /* Decode via the codec registry */ 452 v = PyCodec_Decode(str, encoding, errors); 453 if (v == NULL) 454 goto onError; 455 456 return v; 457 458 onError: 459 return NULL; 460 } 461 462 PyObject *PyString_AsDecodedString(PyObject *str, 463 const char *encoding, 464 const char *errors) 465 { 466 PyObject *v; 467 468 v = PyString_AsDecodedObject(str, encoding, errors); 469 if (v == NULL) 470 goto onError; 471 472 #ifdef Py_USING_UNICODE 473 /* Convert Unicode to a string using the default encoding */ 474 if (PyUnicode_Check(v)) { 475 PyObject *temp = v; 476 v = PyUnicode_AsEncodedString(v, NULL, NULL); 477 Py_DECREF(temp); 478 if (v == NULL) 479 goto onError; 480 } 481 #endif 482 if (!PyString_Check(v)) { 483 PyErr_Format(PyExc_TypeError, 484 "decoder did not return a string object (type=%.400s)", 485 Py_TYPE(v)->tp_name); 486 Py_DECREF(v); 487 goto onError; 488 } 489 490 return v; 491 492 onError: 493 return NULL; 494 } 495 496 PyObject *PyString_Encode(const char *s, 497 Py_ssize_t size, 498 const char *encoding, 499 const char *errors) 500 { 501 PyObject *v, *str; 502 503 str = PyString_FromStringAndSize(s, size); 504 if (str == NULL) 505 return NULL; 506 v = PyString_AsEncodedString(str, encoding, errors); 507 Py_DECREF(str); 508 return v; 509 } 510 511 PyObject *PyString_AsEncodedObject(PyObject *str, 512 const char *encoding, 513 const char *errors) 514 { 515 PyObject *v; 516 517 if (!PyString_Check(str)) { 518 PyErr_BadArgument(); 519 goto onError; 520 } 521 522 if (encoding == NULL) { 523 #ifdef Py_USING_UNICODE 524 encoding = PyUnicode_GetDefaultEncoding(); 525 #else 526 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 527 goto onError; 528 #endif 529 } 530 531 /* Encode via the codec registry */ 532 v = PyCodec_Encode(str, encoding, errors); 533 if (v == NULL) 534 goto onError; 535 536 return v; 537 538 onError: 539 return NULL; 540 } 541 542 PyObject *PyString_AsEncodedString(PyObject *str, 543 const char *encoding, 544 const char *errors) 545 { 546 PyObject *v; 547 548 v = PyString_AsEncodedObject(str, encoding, errors); 549 if (v == NULL) 550 goto onError; 551 552 #ifdef Py_USING_UNICODE 553 /* Convert Unicode to a string using the default encoding */ 554 if (PyUnicode_Check(v)) { 555 PyObject *temp = v; 556 v = PyUnicode_AsEncodedString(v, NULL, NULL); 557 Py_DECREF(temp); 558 if (v == NULL) 559 goto onError; 560 } 561 #endif 562 if (!PyString_Check(v)) { 563 PyErr_Format(PyExc_TypeError, 564 "encoder did not return a string object (type=%.400s)", 565 Py_TYPE(v)->tp_name); 566 Py_DECREF(v); 567 goto onError; 568 } 569 570 return v; 571 572 onError: 573 return NULL; 574 } 575 576 static void 577 string_dealloc(PyObject *op) 578 { 579 switch (PyString_CHECK_INTERNED(op)) { 580 case SSTATE_NOT_INTERNED: 581 break; 582 583 case SSTATE_INTERNED_MORTAL: 584 /* revive dead object temporarily for DelItem */ 585 Py_REFCNT(op) = 3; 586 if (PyDict_DelItem(interned, op) != 0) 587 Py_FatalError( 588 "deletion of interned string failed"); 589 break; 590 591 case SSTATE_INTERNED_IMMORTAL: 592 Py_FatalError("Immortal interned string died."); 593 594 default: 595 Py_FatalError("Inconsistent interned string state."); 596 } 597 Py_TYPE(op)->tp_free(op); 598 } 599 600 /* Unescape a backslash-escaped string. If unicode is non-zero, 601 the string is a u-literal. If recode_encoding is non-zero, 602 the string is UTF-8 encoded and should be re-encoded in the 603 specified encoding. */ 604 605 PyObject *PyString_DecodeEscape(const char *s, 606 Py_ssize_t len, 607 const char *errors, 608 Py_ssize_t unicode, 609 const char *recode_encoding) 610 { 611 int c; 612 char *p, *buf; 613 const char *end; 614 PyObject *v; 615 Py_ssize_t newlen = recode_encoding ? 4*len:len; 616 v = PyString_FromStringAndSize((char *)NULL, newlen); 617 if (v == NULL) 618 return NULL; 619 p = buf = PyString_AsString(v); 620 end = s + len; 621 while (s < end) { 622 if (*s != '\\') { 623 non_esc: 624 #ifdef Py_USING_UNICODE 625 if (recode_encoding && (*s & 0x80)) { 626 PyObject *u, *w; 627 char *r; 628 const char* t; 629 Py_ssize_t rn; 630 t = s; 631 /* Decode non-ASCII bytes as UTF-8. */ 632 while (t < end && (*t & 0x80)) t++; 633 u = PyUnicode_DecodeUTF8(s, t - s, errors); 634 if(!u) goto failed; 635 636 /* Recode them in target encoding. */ 637 w = PyUnicode_AsEncodedString( 638 u, recode_encoding, errors); 639 Py_DECREF(u); 640 if (!w) goto failed; 641 642 /* Append bytes to output buffer. */ 643 assert(PyString_Check(w)); 644 r = PyString_AS_STRING(w); 645 rn = PyString_GET_SIZE(w); 646 Py_MEMCPY(p, r, rn); 647 p += rn; 648 Py_DECREF(w); 649 s = t; 650 } else { 651 *p++ = *s++; 652 } 653 #else 654 *p++ = *s++; 655 #endif 656 continue; 657 } 658 s++; 659 if (s==end) { 660 PyErr_SetString(PyExc_ValueError, 661 "Trailing \\ in string"); 662 goto failed; 663 } 664 switch (*s++) { 665 /* XXX This assumes ASCII! */ 666 case '\n': break; 667 case '\\': *p++ = '\\'; break; 668 case '\'': *p++ = '\''; break; 669 case '\"': *p++ = '\"'; break; 670 case 'b': *p++ = '\b'; break; 671 case 'f': *p++ = '\014'; break; /* FF */ 672 case 't': *p++ = '\t'; break; 673 case 'n': *p++ = '\n'; break; 674 case 'r': *p++ = '\r'; break; 675 case 'v': *p++ = '\013'; break; /* VT */ 676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */ 677 case '0': case '1': case '2': case '3': 678 case '4': case '5': case '6': case '7': 679 c = s[-1] - '0'; 680 if (s < end && '0' <= *s && *s <= '7') { 681 c = (c<<3) + *s++ - '0'; 682 if (s < end && '0' <= *s && *s <= '7') 683 c = (c<<3) + *s++ - '0'; 684 } 685 *p++ = c; 686 break; 687 case 'x': 688 if (s+1 < end && 689 isxdigit(Py_CHARMASK(s[0])) && 690 isxdigit(Py_CHARMASK(s[1]))) 691 { 692 unsigned int x = 0; 693 c = Py_CHARMASK(*s); 694 s++; 695 if (isdigit(c)) 696 x = c - '0'; 697 else if (islower(c)) 698 x = 10 + c - 'a'; 699 else 700 x = 10 + c - 'A'; 701 x = x << 4; 702 c = Py_CHARMASK(*s); 703 s++; 704 if (isdigit(c)) 705 x += c - '0'; 706 else if (islower(c)) 707 x += 10 + c - 'a'; 708 else 709 x += 10 + c - 'A'; 710 *p++ = x; 711 break; 712 } 713 if (!errors || strcmp(errors, "strict") == 0) { 714 PyErr_SetString(PyExc_ValueError, 715 "invalid \\x escape"); 716 goto failed; 717 } 718 if (strcmp(errors, "replace") == 0) { 719 *p++ = '?'; 720 } else if (strcmp(errors, "ignore") == 0) 721 /* do nothing */; 722 else { 723 PyErr_Format(PyExc_ValueError, 724 "decoding error; " 725 "unknown error handling code: %.400s", 726 errors); 727 goto failed; 728 } 729 #ifndef Py_USING_UNICODE 730 case 'u': 731 case 'U': 732 case 'N': 733 if (unicode) { 734 PyErr_SetString(PyExc_ValueError, 735 "Unicode escapes not legal " 736 "when Unicode disabled"); 737 goto failed; 738 } 739 #endif 740 default: 741 *p++ = '\\'; 742 s--; 743 goto non_esc; /* an arbitrary number of unescaped 744 UTF-8 bytes may follow. */ 745 } 746 } 747 if (p-buf < newlen && _PyString_Resize(&v, p - buf)) 748 goto failed; 749 return v; 750 failed: 751 Py_DECREF(v); 752 return NULL; 753 } 754 755 /* -------------------------------------------------------------------- */ 756 /* object api */ 757 758 static Py_ssize_t 759 string_getsize(register PyObject *op) 760 { 761 char *s; 762 Py_ssize_t len; 763 if (PyString_AsStringAndSize(op, &s, &len)) 764 return -1; 765 return len; 766 } 767 768 static /*const*/ char * 769 string_getbuffer(register PyObject *op) 770 { 771 char *s; 772 Py_ssize_t len; 773 if (PyString_AsStringAndSize(op, &s, &len)) 774 return NULL; 775 return s; 776 } 777 778 Py_ssize_t 779 PyString_Size(register PyObject *op) 780 { 781 if (!PyString_Check(op)) 782 return string_getsize(op); 783 return Py_SIZE(op); 784 } 785 786 /*const*/ char * 787 PyString_AsString(register PyObject *op) 788 { 789 if (!PyString_Check(op)) 790 return string_getbuffer(op); 791 return ((PyStringObject *)op) -> ob_sval; 792 } 793 794 int 795 PyString_AsStringAndSize(register PyObject *obj, 796 register char **s, 797 register Py_ssize_t *len) 798 { 799 if (s == NULL) { 800 PyErr_BadInternalCall(); 801 return -1; 802 } 803 804 if (!PyString_Check(obj)) { 805 #ifdef Py_USING_UNICODE 806 if (PyUnicode_Check(obj)) { 807 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); 808 if (obj == NULL) 809 return -1; 810 } 811 else 812 #endif 813 { 814 PyErr_Format(PyExc_TypeError, 815 "expected string or Unicode object, " 816 "%.200s found", Py_TYPE(obj)->tp_name); 817 return -1; 818 } 819 } 820 821 *s = PyString_AS_STRING(obj); 822 if (len != NULL) 823 *len = PyString_GET_SIZE(obj); 824 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) { 825 PyErr_SetString(PyExc_TypeError, 826 "expected string without null bytes"); 827 return -1; 828 } 829 return 0; 830 } 831 832 /* -------------------------------------------------------------------- */ 833 /* Methods */ 834 835 #include "stringlib/stringdefs.h" 836 #include "stringlib/fastsearch.h" 837 838 #include "stringlib/count.h" 839 #include "stringlib/find.h" 840 #include "stringlib/partition.h" 841 #include "stringlib/split.h" 842 843 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping 844 #include "stringlib/localeutil.h" 845 846 847 848 static int 849 string_print(PyStringObject *op, FILE *fp, int flags) 850 { 851 Py_ssize_t i, str_len; 852 char c; 853 int quote; 854 855 /* XXX Ought to check for interrupts when writing long strings */ 856 if (! PyString_CheckExact(op)) { 857 int ret; 858 /* A str subclass may have its own __str__ method. */ 859 op = (PyStringObject *) PyObject_Str((PyObject *)op); 860 if (op == NULL) 861 return -1; 862 ret = string_print(op, fp, flags); 863 Py_DECREF(op); 864 return ret; 865 } 866 if (flags & Py_PRINT_RAW) { 867 char *data = op->ob_sval; 868 Py_ssize_t size = Py_SIZE(op); 869 Py_BEGIN_ALLOW_THREADS 870 while (size > INT_MAX) { 871 /* Very long strings cannot be written atomically. 872 * But don't write exactly INT_MAX bytes at a time 873 * to avoid memory aligment issues. 874 */ 875 const int chunk_size = INT_MAX & ~0x3FFF; 876 fwrite(data, 1, chunk_size, fp); 877 data += chunk_size; 878 size -= chunk_size; 879 } 880 #ifdef __VMS 881 if (size) fwrite(data, (int)size, 1, fp); 882 #else 883 fwrite(data, 1, (int)size, fp); 884 #endif 885 Py_END_ALLOW_THREADS 886 return 0; 887 } 888 889 /* figure out which quote to use; single is preferred */ 890 quote = '\''; 891 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) && 892 !memchr(op->ob_sval, '"', Py_SIZE(op))) 893 quote = '"'; 894 895 str_len = Py_SIZE(op); 896 Py_BEGIN_ALLOW_THREADS 897 fputc(quote, fp); 898 for (i = 0; i < str_len; i++) { 899 /* Since strings are immutable and the caller should have a 900 reference, accessing the interal buffer should not be an issue 901 with the GIL released. */ 902 c = op->ob_sval[i]; 903 if (c == quote || c == '\\') 904 fprintf(fp, "\\%c", c); 905 else if (c == '\t') 906 fprintf(fp, "\\t"); 907 else if (c == '\n') 908 fprintf(fp, "\\n"); 909 else if (c == '\r') 910 fprintf(fp, "\\r"); 911 else if (c < ' ' || c >= 0x7f) 912 fprintf(fp, "\\x%02x", c & 0xff); 913 else 914 fputc(c, fp); 915 } 916 fputc(quote, fp); 917 Py_END_ALLOW_THREADS 918 return 0; 919 } 920 921 PyObject * 922 PyString_Repr(PyObject *obj, int smartquotes) 923 { 924 register PyStringObject* op = (PyStringObject*) obj; 925 size_t newsize = 2 + 4 * Py_SIZE(op); 926 PyObject *v; 927 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) { 928 PyErr_SetString(PyExc_OverflowError, 929 "string is too large to make repr"); 930 return NULL; 931 } 932 v = PyString_FromStringAndSize((char *)NULL, newsize); 933 if (v == NULL) { 934 return NULL; 935 } 936 else { 937 register Py_ssize_t i; 938 register char c; 939 register char *p; 940 int quote; 941 942 /* figure out which quote to use; single is preferred */ 943 quote = '\''; 944 if (smartquotes && 945 memchr(op->ob_sval, '\'', Py_SIZE(op)) && 946 !memchr(op->ob_sval, '"', Py_SIZE(op))) 947 quote = '"'; 948 949 p = PyString_AS_STRING(v); 950 *p++ = quote; 951 for (i = 0; i < Py_SIZE(op); i++) { 952 /* There's at least enough room for a hex escape 953 and a closing quote. */ 954 assert(newsize - (p - PyString_AS_STRING(v)) >= 5); 955 c = op->ob_sval[i]; 956 if (c == quote || c == '\\') 957 *p++ = '\\', *p++ = c; 958 else if (c == '\t') 959 *p++ = '\\', *p++ = 't'; 960 else if (c == '\n') 961 *p++ = '\\', *p++ = 'n'; 962 else if (c == '\r') 963 *p++ = '\\', *p++ = 'r'; 964 else if (c < ' ' || c >= 0x7f) { 965 /* For performance, we don't want to call 966 PyOS_snprintf here (extra layers of 967 function call). */ 968 sprintf(p, "\\x%02x", c & 0xff); 969 p += 4; 970 } 971 else 972 *p++ = c; 973 } 974 assert(newsize - (p - PyString_AS_STRING(v)) >= 1); 975 *p++ = quote; 976 *p = '\0'; 977 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) 978 return NULL; 979 return v; 980 } 981 } 982 983 static PyObject * 984 string_repr(PyObject *op) 985 { 986 return PyString_Repr(op, 1); 987 } 988 989 static PyObject * 990 string_str(PyObject *s) 991 { 992 assert(PyString_Check(s)); 993 if (PyString_CheckExact(s)) { 994 Py_INCREF(s); 995 return s; 996 } 997 else { 998 /* Subtype -- return genuine string with the same value. */ 999 PyStringObject *t = (PyStringObject *) s; 1000 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t)); 1001 } 1002 } 1003 1004 static Py_ssize_t 1005 string_length(PyStringObject *a) 1006 { 1007 return Py_SIZE(a); 1008 } 1009 1010 static PyObject * 1011 string_concat(register PyStringObject *a, register PyObject *bb) 1012 { 1013 register Py_ssize_t size; 1014 register PyStringObject *op; 1015 if (!PyString_Check(bb)) { 1016 #ifdef Py_USING_UNICODE 1017 if (PyUnicode_Check(bb)) 1018 return PyUnicode_Concat((PyObject *)a, bb); 1019 #endif 1020 if (PyByteArray_Check(bb)) 1021 return PyByteArray_Concat((PyObject *)a, bb); 1022 PyErr_Format(PyExc_TypeError, 1023 "cannot concatenate 'str' and '%.200s' objects", 1024 Py_TYPE(bb)->tp_name); 1025 return NULL; 1026 } 1027 #define b ((PyStringObject *)bb) 1028 /* Optimize cases with empty left or right operand */ 1029 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) && 1030 PyString_CheckExact(a) && PyString_CheckExact(b)) { 1031 if (Py_SIZE(a) == 0) { 1032 Py_INCREF(bb); 1033 return bb; 1034 } 1035 Py_INCREF(a); 1036 return (PyObject *)a; 1037 } 1038 size = Py_SIZE(a) + Py_SIZE(b); 1039 /* Check that string sizes are not negative, to prevent an 1040 overflow in cases where we are passed incorrectly-created 1041 strings with negative lengths (due to a bug in other code). 1042 */ 1043 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 || 1044 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { 1045 PyErr_SetString(PyExc_OverflowError, 1046 "strings are too large to concat"); 1047 return NULL; 1048 } 1049 1050 /* Inline PyObject_NewVar */ 1051 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { 1052 PyErr_SetString(PyExc_OverflowError, 1053 "strings are too large to concat"); 1054 return NULL; 1055 } 1056 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); 1057 if (op == NULL) 1058 return PyErr_NoMemory(); 1059 PyObject_INIT_VAR(op, &PyString_Type, size); 1060 op->ob_shash = -1; 1061 op->ob_sstate = SSTATE_NOT_INTERNED; 1062 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1063 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b)); 1064 op->ob_sval[size] = '\0'; 1065 return (PyObject *) op; 1066 #undef b 1067 } 1068 1069 static PyObject * 1070 string_repeat(register PyStringObject *a, register Py_ssize_t n) 1071 { 1072 register Py_ssize_t i; 1073 register Py_ssize_t j; 1074 register Py_ssize_t size; 1075 register PyStringObject *op; 1076 size_t nbytes; 1077 if (n < 0) 1078 n = 0; 1079 /* watch out for overflows: the size can overflow int, 1080 * and the # of bytes needed can overflow size_t 1081 */ 1082 size = Py_SIZE(a) * n; 1083 if (n && size / n != Py_SIZE(a)) { 1084 PyErr_SetString(PyExc_OverflowError, 1085 "repeated string is too long"); 1086 return NULL; 1087 } 1088 if (size == Py_SIZE(a) && PyString_CheckExact(a)) { 1089 Py_INCREF(a); 1090 return (PyObject *)a; 1091 } 1092 nbytes = (size_t)size; 1093 if (nbytes + PyStringObject_SIZE <= nbytes) { 1094 PyErr_SetString(PyExc_OverflowError, 1095 "repeated string is too long"); 1096 return NULL; 1097 } 1098 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes); 1099 if (op == NULL) 1100 return PyErr_NoMemory(); 1101 PyObject_INIT_VAR(op, &PyString_Type, size); 1102 op->ob_shash = -1; 1103 op->ob_sstate = SSTATE_NOT_INTERNED; 1104 op->ob_sval[size] = '\0'; 1105 if (Py_SIZE(a) == 1 && n > 0) { 1106 memset(op->ob_sval, a->ob_sval[0] , n); 1107 return (PyObject *) op; 1108 } 1109 i = 0; 1110 if (i < size) { 1111 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1112 i = Py_SIZE(a); 1113 } 1114 while (i < size) { 1115 j = (i <= size-i) ? i : size-i; 1116 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); 1117 i += j; 1118 } 1119 return (PyObject *) op; 1120 } 1121 1122 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */ 1123 1124 static PyObject * 1125 string_slice(register PyStringObject *a, register Py_ssize_t i, 1126 register Py_ssize_t j) 1127 /* j -- may be negative! */ 1128 { 1129 if (i < 0) 1130 i = 0; 1131 if (j < 0) 1132 j = 0; /* Avoid signed/unsigned bug in next line */ 1133 if (j > Py_SIZE(a)) 1134 j = Py_SIZE(a); 1135 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) { 1136 /* It's the same as a */ 1137 Py_INCREF(a); 1138 return (PyObject *)a; 1139 } 1140 if (j < i) 1141 j = i; 1142 return PyString_FromStringAndSize(a->ob_sval + i, j-i); 1143 } 1144 1145 static int 1146 string_contains(PyObject *str_obj, PyObject *sub_obj) 1147 { 1148 if (!PyString_CheckExact(sub_obj)) { 1149 #ifdef Py_USING_UNICODE 1150 if (PyUnicode_Check(sub_obj)) 1151 return PyUnicode_Contains(str_obj, sub_obj); 1152 #endif 1153 if (!PyString_Check(sub_obj)) { 1154 PyErr_Format(PyExc_TypeError, 1155 "'in <string>' requires string as left operand, " 1156 "not %.200s", Py_TYPE(sub_obj)->tp_name); 1157 return -1; 1158 } 1159 } 1160 1161 return stringlib_contains_obj(str_obj, sub_obj); 1162 } 1163 1164 static PyObject * 1165 string_item(PyStringObject *a, register Py_ssize_t i) 1166 { 1167 char pchar; 1168 PyObject *v; 1169 if (i < 0 || i >= Py_SIZE(a)) { 1170 PyErr_SetString(PyExc_IndexError, "string index out of range"); 1171 return NULL; 1172 } 1173 pchar = a->ob_sval[i]; 1174 v = (PyObject *)characters[pchar & UCHAR_MAX]; 1175 if (v == NULL) 1176 v = PyString_FromStringAndSize(&pchar, 1); 1177 else { 1178 #ifdef COUNT_ALLOCS 1179 one_strings++; 1180 #endif 1181 Py_INCREF(v); 1182 } 1183 return v; 1184 } 1185 1186 static PyObject* 1187 string_richcompare(PyStringObject *a, PyStringObject *b, int op) 1188 { 1189 int c; 1190 Py_ssize_t len_a, len_b; 1191 Py_ssize_t min_len; 1192 PyObject *result; 1193 1194 /* Make sure both arguments are strings. */ 1195 if (!(PyString_Check(a) && PyString_Check(b))) { 1196 result = Py_NotImplemented; 1197 goto out; 1198 } 1199 if (a == b) { 1200 switch (op) { 1201 case Py_EQ:case Py_LE:case Py_GE: 1202 result = Py_True; 1203 goto out; 1204 case Py_NE:case Py_LT:case Py_GT: 1205 result = Py_False; 1206 goto out; 1207 } 1208 } 1209 if (op == Py_EQ) { 1210 /* Supporting Py_NE here as well does not save 1211 much time, since Py_NE is rarely used. */ 1212 if (Py_SIZE(a) == Py_SIZE(b) 1213 && (a->ob_sval[0] == b->ob_sval[0] 1214 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) { 1215 result = Py_True; 1216 } else { 1217 result = Py_False; 1218 } 1219 goto out; 1220 } 1221 len_a = Py_SIZE(a); len_b = Py_SIZE(b); 1222 min_len = (len_a < len_b) ? len_a : len_b; 1223 if (min_len > 0) { 1224 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); 1225 if (c==0) 1226 c = memcmp(a->ob_sval, b->ob_sval, min_len); 1227 } else 1228 c = 0; 1229 if (c == 0) 1230 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; 1231 switch (op) { 1232 case Py_LT: c = c < 0; break; 1233 case Py_LE: c = c <= 0; break; 1234 case Py_EQ: assert(0); break; /* unreachable */ 1235 case Py_NE: c = c != 0; break; 1236 case Py_GT: c = c > 0; break; 1237 case Py_GE: c = c >= 0; break; 1238 default: 1239 result = Py_NotImplemented; 1240 goto out; 1241 } 1242 result = c ? Py_True : Py_False; 1243 out: 1244 Py_INCREF(result); 1245 return result; 1246 } 1247 1248 int 1249 _PyString_Eq(PyObject *o1, PyObject *o2) 1250 { 1251 PyStringObject *a = (PyStringObject*) o1; 1252 PyStringObject *b = (PyStringObject*) o2; 1253 return Py_SIZE(a) == Py_SIZE(b) 1254 && *a->ob_sval == *b->ob_sval 1255 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0; 1256 } 1257 1258 static long 1259 string_hash(PyStringObject *a) 1260 { 1261 register Py_ssize_t len; 1262 register unsigned char *p; 1263 register long x; 1264 1265 if (a->ob_shash != -1) 1266 return a->ob_shash; 1267 len = Py_SIZE(a); 1268 p = (unsigned char *) a->ob_sval; 1269 x = *p << 7; 1270 while (--len >= 0) 1271 x = (1000003*x) ^ *p++; 1272 x ^= Py_SIZE(a); 1273 if (x == -1) 1274 x = -2; 1275 a->ob_shash = x; 1276 return x; 1277 } 1278 1279 static PyObject* 1280 string_subscript(PyStringObject* self, PyObject* item) 1281 { 1282 if (PyIndex_Check(item)) { 1283 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1284 if (i == -1 && PyErr_Occurred()) 1285 return NULL; 1286 if (i < 0) 1287 i += PyString_GET_SIZE(self); 1288 return string_item(self, i); 1289 } 1290 else if (PySlice_Check(item)) { 1291 Py_ssize_t start, stop, step, slicelength, cur, i; 1292 char* source_buf; 1293 char* result_buf; 1294 PyObject* result; 1295 1296 if (PySlice_GetIndicesEx((PySliceObject*)item, 1297 PyString_GET_SIZE(self), 1298 &start, &stop, &step, &slicelength) < 0) { 1299 return NULL; 1300 } 1301 1302 if (slicelength <= 0) { 1303 return PyString_FromStringAndSize("", 0); 1304 } 1305 else if (start == 0 && step == 1 && 1306 slicelength == PyString_GET_SIZE(self) && 1307 PyString_CheckExact(self)) { 1308 Py_INCREF(self); 1309 return (PyObject *)self; 1310 } 1311 else if (step == 1) { 1312 return PyString_FromStringAndSize( 1313 PyString_AS_STRING(self) + start, 1314 slicelength); 1315 } 1316 else { 1317 source_buf = PyString_AsString((PyObject*)self); 1318 result_buf = (char *)PyMem_Malloc(slicelength); 1319 if (result_buf == NULL) 1320 return PyErr_NoMemory(); 1321 1322 for (cur = start, i = 0; i < slicelength; 1323 cur += step, i++) { 1324 result_buf[i] = source_buf[cur]; 1325 } 1326 1327 result = PyString_FromStringAndSize(result_buf, 1328 slicelength); 1329 PyMem_Free(result_buf); 1330 return result; 1331 } 1332 } 1333 else { 1334 PyErr_Format(PyExc_TypeError, 1335 "string indices must be integers, not %.200s", 1336 Py_TYPE(item)->tp_name); 1337 return NULL; 1338 } 1339 } 1340 1341 static Py_ssize_t 1342 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr) 1343 { 1344 if ( index != 0 ) { 1345 PyErr_SetString(PyExc_SystemError, 1346 "accessing non-existent string segment"); 1347 return -1; 1348 } 1349 *ptr = (void *)self->ob_sval; 1350 return Py_SIZE(self); 1351 } 1352 1353 static Py_ssize_t 1354 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr) 1355 { 1356 PyErr_SetString(PyExc_TypeError, 1357 "Cannot use string as modifiable buffer"); 1358 return -1; 1359 } 1360 1361 static Py_ssize_t 1362 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp) 1363 { 1364 if ( lenp ) 1365 *lenp = Py_SIZE(self); 1366 return 1; 1367 } 1368 1369 static Py_ssize_t 1370 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr) 1371 { 1372 if ( index != 0 ) { 1373 PyErr_SetString(PyExc_SystemError, 1374 "accessing non-existent string segment"); 1375 return -1; 1376 } 1377 *ptr = self->ob_sval; 1378 return Py_SIZE(self); 1379 } 1380 1381 static int 1382 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags) 1383 { 1384 return PyBuffer_FillInfo(view, (PyObject*)self, 1385 (void *)self->ob_sval, Py_SIZE(self), 1386 1, flags); 1387 } 1388 1389 static PySequenceMethods string_as_sequence = { 1390 (lenfunc)string_length, /*sq_length*/ 1391 (binaryfunc)string_concat, /*sq_concat*/ 1392 (ssizeargfunc)string_repeat, /*sq_repeat*/ 1393 (ssizeargfunc)string_item, /*sq_item*/ 1394 (ssizessizeargfunc)string_slice, /*sq_slice*/ 1395 0, /*sq_ass_item*/ 1396 0, /*sq_ass_slice*/ 1397 (objobjproc)string_contains /*sq_contains*/ 1398 }; 1399 1400 static PyMappingMethods string_as_mapping = { 1401 (lenfunc)string_length, 1402 (binaryfunc)string_subscript, 1403 0, 1404 }; 1405 1406 static PyBufferProcs string_as_buffer = { 1407 (readbufferproc)string_buffer_getreadbuf, 1408 (writebufferproc)string_buffer_getwritebuf, 1409 (segcountproc)string_buffer_getsegcount, 1410 (charbufferproc)string_buffer_getcharbuf, 1411 (getbufferproc)string_buffer_getbuffer, 1412 0, /* XXX */ 1413 }; 1414 1415 1416 1417 #define LEFTSTRIP 0 1418 #define RIGHTSTRIP 1 1419 #define BOTHSTRIP 2 1420 1421 /* Arrays indexed by above */ 1422 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; 1423 1424 #define STRIPNAME(i) (stripformat[i]+3) 1425 1426 PyDoc_STRVAR(split__doc__, 1427 "S.split([sep [,maxsplit]]) -> list of strings\n\ 1428 \n\ 1429 Return a list of the words in the string S, using sep as the\n\ 1430 delimiter string. If maxsplit is given, at most maxsplit\n\ 1431 splits are done. If sep is not specified or is None, any\n\ 1432 whitespace string is a separator and empty strings are removed\n\ 1433 from the result."); 1434 1435 static PyObject * 1436 string_split(PyStringObject *self, PyObject *args) 1437 { 1438 Py_ssize_t len = PyString_GET_SIZE(self), n; 1439 Py_ssize_t maxsplit = -1; 1440 const char *s = PyString_AS_STRING(self), *sub; 1441 PyObject *subobj = Py_None; 1442 1443 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) 1444 return NULL; 1445 if (maxsplit < 0) 1446 maxsplit = PY_SSIZE_T_MAX; 1447 if (subobj == Py_None) 1448 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); 1449 if (PyString_Check(subobj)) { 1450 sub = PyString_AS_STRING(subobj); 1451 n = PyString_GET_SIZE(subobj); 1452 } 1453 #ifdef Py_USING_UNICODE 1454 else if (PyUnicode_Check(subobj)) 1455 return PyUnicode_Split((PyObject *)self, subobj, maxsplit); 1456 #endif 1457 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1458 return NULL; 1459 1460 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit); 1461 } 1462 1463 PyDoc_STRVAR(partition__doc__, 1464 "S.partition(sep) -> (head, sep, tail)\n\ 1465 \n\ 1466 Search for the separator sep in S, and return the part before it,\n\ 1467 the separator itself, and the part after it. If the separator is not\n\ 1468 found, return S and two empty strings."); 1469 1470 static PyObject * 1471 string_partition(PyStringObject *self, PyObject *sep_obj) 1472 { 1473 const char *sep; 1474 Py_ssize_t sep_len; 1475 1476 if (PyString_Check(sep_obj)) { 1477 sep = PyString_AS_STRING(sep_obj); 1478 sep_len = PyString_GET_SIZE(sep_obj); 1479 } 1480 #ifdef Py_USING_UNICODE 1481 else if (PyUnicode_Check(sep_obj)) 1482 return PyUnicode_Partition((PyObject *) self, sep_obj); 1483 #endif 1484 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) 1485 return NULL; 1486 1487 return stringlib_partition( 1488 (PyObject*) self, 1489 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1490 sep_obj, sep, sep_len 1491 ); 1492 } 1493 1494 PyDoc_STRVAR(rpartition__doc__, 1495 "S.rpartition(sep) -> (head, sep, tail)\n\ 1496 \n\ 1497 Search for the separator sep in S, starting at the end of S, and return\n\ 1498 the part before it, the separator itself, and the part after it. If the\n\ 1499 separator is not found, return two empty strings and S."); 1500 1501 static PyObject * 1502 string_rpartition(PyStringObject *self, PyObject *sep_obj) 1503 { 1504 const char *sep; 1505 Py_ssize_t sep_len; 1506 1507 if (PyString_Check(sep_obj)) { 1508 sep = PyString_AS_STRING(sep_obj); 1509 sep_len = PyString_GET_SIZE(sep_obj); 1510 } 1511 #ifdef Py_USING_UNICODE 1512 else if (PyUnicode_Check(sep_obj)) 1513 return PyUnicode_RPartition((PyObject *) self, sep_obj); 1514 #endif 1515 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) 1516 return NULL; 1517 1518 return stringlib_rpartition( 1519 (PyObject*) self, 1520 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1521 sep_obj, sep, sep_len 1522 ); 1523 } 1524 1525 PyDoc_STRVAR(rsplit__doc__, 1526 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\ 1527 \n\ 1528 Return a list of the words in the string S, using sep as the\n\ 1529 delimiter string, starting at the end of the string and working\n\ 1530 to the front. If maxsplit is given, at most maxsplit splits are\n\ 1531 done. If sep is not specified or is None, any whitespace string\n\ 1532 is a separator."); 1533 1534 static PyObject * 1535 string_rsplit(PyStringObject *self, PyObject *args) 1536 { 1537 Py_ssize_t len = PyString_GET_SIZE(self), n; 1538 Py_ssize_t maxsplit = -1; 1539 const char *s = PyString_AS_STRING(self), *sub; 1540 PyObject *subobj = Py_None; 1541 1542 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) 1543 return NULL; 1544 if (maxsplit < 0) 1545 maxsplit = PY_SSIZE_T_MAX; 1546 if (subobj == Py_None) 1547 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); 1548 if (PyString_Check(subobj)) { 1549 sub = PyString_AS_STRING(subobj); 1550 n = PyString_GET_SIZE(subobj); 1551 } 1552 #ifdef Py_USING_UNICODE 1553 else if (PyUnicode_Check(subobj)) 1554 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit); 1555 #endif 1556 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1557 return NULL; 1558 1559 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit); 1560 } 1561 1562 1563 PyDoc_STRVAR(join__doc__, 1564 "S.join(iterable) -> string\n\ 1565 \n\ 1566 Return a string which is the concatenation of the strings in the\n\ 1567 iterable. The separator between elements is S."); 1568 1569 static PyObject * 1570 string_join(PyStringObject *self, PyObject *orig) 1571 { 1572 char *sep = PyString_AS_STRING(self); 1573 const Py_ssize_t seplen = PyString_GET_SIZE(self); 1574 PyObject *res = NULL; 1575 char *p; 1576 Py_ssize_t seqlen = 0; 1577 size_t sz = 0; 1578 Py_ssize_t i; 1579 PyObject *seq, *item; 1580 1581 seq = PySequence_Fast(orig, ""); 1582 if (seq == NULL) { 1583 return NULL; 1584 } 1585 1586 seqlen = PySequence_Size(seq); 1587 if (seqlen == 0) { 1588 Py_DECREF(seq); 1589 return PyString_FromString(""); 1590 } 1591 if (seqlen == 1) { 1592 item = PySequence_Fast_GET_ITEM(seq, 0); 1593 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) { 1594 Py_INCREF(item); 1595 Py_DECREF(seq); 1596 return item; 1597 } 1598 } 1599 1600 /* There are at least two things to join, or else we have a subclass 1601 * of the builtin types in the sequence. 1602 * Do a pre-pass to figure out the total amount of space we'll 1603 * need (sz), see whether any argument is absurd, and defer to 1604 * the Unicode join if appropriate. 1605 */ 1606 for (i = 0; i < seqlen; i++) { 1607 const size_t old_sz = sz; 1608 item = PySequence_Fast_GET_ITEM(seq, i); 1609 if (!PyString_Check(item)){ 1610 #ifdef Py_USING_UNICODE 1611 if (PyUnicode_Check(item)) { 1612 /* Defer to Unicode join. 1613 * CAUTION: There's no gurantee that the 1614 * original sequence can be iterated over 1615 * again, so we must pass seq here. 1616 */ 1617 PyObject *result; 1618 result = PyUnicode_Join((PyObject *)self, seq); 1619 Py_DECREF(seq); 1620 return result; 1621 } 1622 #endif 1623 PyErr_Format(PyExc_TypeError, 1624 "sequence item %zd: expected string," 1625 " %.80s found", 1626 i, Py_TYPE(item)->tp_name); 1627 Py_DECREF(seq); 1628 return NULL; 1629 } 1630 sz += PyString_GET_SIZE(item); 1631 if (i != 0) 1632 sz += seplen; 1633 if (sz < old_sz || sz > PY_SSIZE_T_MAX) { 1634 PyErr_SetString(PyExc_OverflowError, 1635 "join() result is too long for a Python string"); 1636 Py_DECREF(seq); 1637 return NULL; 1638 } 1639 } 1640 1641 /* Allocate result space. */ 1642 res = PyString_FromStringAndSize((char*)NULL, sz); 1643 if (res == NULL) { 1644 Py_DECREF(seq); 1645 return NULL; 1646 } 1647 1648 /* Catenate everything. */ 1649 p = PyString_AS_STRING(res); 1650 for (i = 0; i < seqlen; ++i) { 1651 size_t n; 1652 item = PySequence_Fast_GET_ITEM(seq, i); 1653 n = PyString_GET_SIZE(item); 1654 Py_MEMCPY(p, PyString_AS_STRING(item), n); 1655 p += n; 1656 if (i < seqlen - 1) { 1657 Py_MEMCPY(p, sep, seplen); 1658 p += seplen; 1659 } 1660 } 1661 1662 Py_DECREF(seq); 1663 return res; 1664 } 1665 1666 PyObject * 1667 _PyString_Join(PyObject *sep, PyObject *x) 1668 { 1669 assert(sep != NULL && PyString_Check(sep)); 1670 assert(x != NULL); 1671 return string_join((PyStringObject *)sep, x); 1672 } 1673 1674 /* helper macro to fixup start/end slice values */ 1675 #define ADJUST_INDICES(start, end, len) \ 1676 if (end > len) \ 1677 end = len; \ 1678 else if (end < 0) { \ 1679 end += len; \ 1680 if (end < 0) \ 1681 end = 0; \ 1682 } \ 1683 if (start < 0) { \ 1684 start += len; \ 1685 if (start < 0) \ 1686 start = 0; \ 1687 } 1688 1689 Py_LOCAL_INLINE(Py_ssize_t) 1690 string_find_internal(PyStringObject *self, PyObject *args, int dir) 1691 { 1692 PyObject *subobj; 1693 const char *sub; 1694 Py_ssize_t sub_len; 1695 Py_ssize_t start=0, end=PY_SSIZE_T_MAX; 1696 1697 if (!stringlib_parse_args_finds("find/rfind/index/rindex", 1698 args, &subobj, &start, &end)) 1699 return -2; 1700 1701 if (PyString_Check(subobj)) { 1702 sub = PyString_AS_STRING(subobj); 1703 sub_len = PyString_GET_SIZE(subobj); 1704 } 1705 #ifdef Py_USING_UNICODE 1706 else if (PyUnicode_Check(subobj)) 1707 return PyUnicode_Find( 1708 (PyObject *)self, subobj, start, end, dir); 1709 #endif 1710 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) 1711 /* XXX - the "expected a character buffer object" is pretty 1712 confusing for a non-expert. remap to something else ? */ 1713 return -2; 1714 1715 if (dir > 0) 1716 return stringlib_find_slice( 1717 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1718 sub, sub_len, start, end); 1719 else 1720 return stringlib_rfind_slice( 1721 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1722 sub, sub_len, start, end); 1723 } 1724 1725 1726 PyDoc_STRVAR(find__doc__, 1727 "S.find(sub [,start [,end]]) -> int\n\ 1728 \n\ 1729 Return the lowest index in S where substring sub is found,\n\ 1730 such that sub is contained within s[start:end]. Optional\n\ 1731 arguments start and end are interpreted as in slice notation.\n\ 1732 \n\ 1733 Return -1 on failure."); 1734 1735 static PyObject * 1736 string_find(PyStringObject *self, PyObject *args) 1737 { 1738 Py_ssize_t result = string_find_internal(self, args, +1); 1739 if (result == -2) 1740 return NULL; 1741 return PyInt_FromSsize_t(result); 1742 } 1743 1744 1745 PyDoc_STRVAR(index__doc__, 1746 "S.index(sub [,start [,end]]) -> int\n\ 1747 \n\ 1748 Like S.find() but raise ValueError when the substring is not found."); 1749 1750 static PyObject * 1751 string_index(PyStringObject *self, PyObject *args) 1752 { 1753 Py_ssize_t result = string_find_internal(self, args, +1); 1754 if (result == -2) 1755 return NULL; 1756 if (result == -1) { 1757 PyErr_SetString(PyExc_ValueError, 1758 "substring not found"); 1759 return NULL; 1760 } 1761 return PyInt_FromSsize_t(result); 1762 } 1763 1764 1765 PyDoc_STRVAR(rfind__doc__, 1766 "S.rfind(sub [,start [,end]]) -> int\n\ 1767 \n\ 1768 Return the highest index in S where substring sub is found,\n\ 1769 such that sub is contained within s[start:end]. Optional\n\ 1770 arguments start and end are interpreted as in slice notation.\n\ 1771 \n\ 1772 Return -1 on failure."); 1773 1774 static PyObject * 1775 string_rfind(PyStringObject *self, PyObject *args) 1776 { 1777 Py_ssize_t result = string_find_internal(self, args, -1); 1778 if (result == -2) 1779 return NULL; 1780 return PyInt_FromSsize_t(result); 1781 } 1782 1783 1784 PyDoc_STRVAR(rindex__doc__, 1785 "S.rindex(sub [,start [,end]]) -> int\n\ 1786 \n\ 1787 Like S.rfind() but raise ValueError when the substring is not found."); 1788 1789 static PyObject * 1790 string_rindex(PyStringObject *self, PyObject *args) 1791 { 1792 Py_ssize_t result = string_find_internal(self, args, -1); 1793 if (result == -2) 1794 return NULL; 1795 if (result == -1) { 1796 PyErr_SetString(PyExc_ValueError, 1797 "substring not found"); 1798 return NULL; 1799 } 1800 return PyInt_FromSsize_t(result); 1801 } 1802 1803 1804 Py_LOCAL_INLINE(PyObject *) 1805 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) 1806 { 1807 char *s = PyString_AS_STRING(self); 1808 Py_ssize_t len = PyString_GET_SIZE(self); 1809 char *sep = PyString_AS_STRING(sepobj); 1810 Py_ssize_t seplen = PyString_GET_SIZE(sepobj); 1811 Py_ssize_t i, j; 1812 1813 i = 0; 1814 if (striptype != RIGHTSTRIP) { 1815 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { 1816 i++; 1817 } 1818 } 1819 1820 j = len; 1821 if (striptype != LEFTSTRIP) { 1822 do { 1823 j--; 1824 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen)); 1825 j++; 1826 } 1827 1828 if (i == 0 && j == len && PyString_CheckExact(self)) { 1829 Py_INCREF(self); 1830 return (PyObject*)self; 1831 } 1832 else 1833 return PyString_FromStringAndSize(s+i, j-i); 1834 } 1835 1836 1837 Py_LOCAL_INLINE(PyObject *) 1838 do_strip(PyStringObject *self, int striptype) 1839 { 1840 char *s = PyString_AS_STRING(self); 1841 Py_ssize_t len = PyString_GET_SIZE(self), i, j; 1842 1843 i = 0; 1844 if (striptype != RIGHTSTRIP) { 1845 while (i < len && isspace(Py_CHARMASK(s[i]))) { 1846 i++; 1847 } 1848 } 1849 1850 j = len; 1851 if (striptype != LEFTSTRIP) { 1852 do { 1853 j--; 1854 } while (j >= i && isspace(Py_CHARMASK(s[j]))); 1855 j++; 1856 } 1857 1858 if (i == 0 && j == len && PyString_CheckExact(self)) { 1859 Py_INCREF(self); 1860 return (PyObject*)self; 1861 } 1862 else 1863 return PyString_FromStringAndSize(s+i, j-i); 1864 } 1865 1866 1867 Py_LOCAL_INLINE(PyObject *) 1868 do_argstrip(PyStringObject *self, int striptype, PyObject *args) 1869 { 1870 PyObject *sep = NULL; 1871 1872 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) 1873 return NULL; 1874 1875 if (sep != NULL && sep != Py_None) { 1876 if (PyString_Check(sep)) 1877 return do_xstrip(self, striptype, sep); 1878 #ifdef Py_USING_UNICODE 1879 else if (PyUnicode_Check(sep)) { 1880 PyObject *uniself = PyUnicode_FromObject((PyObject *)self); 1881 PyObject *res; 1882 if (uniself==NULL) 1883 return NULL; 1884 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself, 1885 striptype, sep); 1886 Py_DECREF(uniself); 1887 return res; 1888 } 1889 #endif 1890 PyErr_Format(PyExc_TypeError, 1891 #ifdef Py_USING_UNICODE 1892 "%s arg must be None, str or unicode", 1893 #else 1894 "%s arg must be None or str", 1895 #endif 1896 STRIPNAME(striptype)); 1897 return NULL; 1898 } 1899 1900 return do_strip(self, striptype); 1901 } 1902 1903 1904 PyDoc_STRVAR(strip__doc__, 1905 "S.strip([chars]) -> string or unicode\n\ 1906 \n\ 1907 Return a copy of the string S with leading and trailing\n\ 1908 whitespace removed.\n\ 1909 If chars is given and not None, remove characters in chars instead.\n\ 1910 If chars is unicode, S will be converted to unicode before stripping"); 1911 1912 static PyObject * 1913 string_strip(PyStringObject *self, PyObject *args) 1914 { 1915 if (PyTuple_GET_SIZE(args) == 0) 1916 return do_strip(self, BOTHSTRIP); /* Common case */ 1917 else 1918 return do_argstrip(self, BOTHSTRIP, args); 1919 } 1920 1921 1922 PyDoc_STRVAR(lstrip__doc__, 1923 "S.lstrip([chars]) -> string or unicode\n\ 1924 \n\ 1925 Return a copy of the string S with leading whitespace removed.\n\ 1926 If chars is given and not None, remove characters in chars instead.\n\ 1927 If chars is unicode, S will be converted to unicode before stripping"); 1928 1929 static PyObject * 1930 string_lstrip(PyStringObject *self, PyObject *args) 1931 { 1932 if (PyTuple_GET_SIZE(args) == 0) 1933 return do_strip(self, LEFTSTRIP); /* Common case */ 1934 else 1935 return do_argstrip(self, LEFTSTRIP, args); 1936 } 1937 1938 1939 PyDoc_STRVAR(rstrip__doc__, 1940 "S.rstrip([chars]) -> string or unicode\n\ 1941 \n\ 1942 Return a copy of the string S with trailing whitespace removed.\n\ 1943 If chars is given and not None, remove characters in chars instead.\n\ 1944 If chars is unicode, S will be converted to unicode before stripping"); 1945 1946 static PyObject * 1947 string_rstrip(PyStringObject *self, PyObject *args) 1948 { 1949 if (PyTuple_GET_SIZE(args) == 0) 1950 return do_strip(self, RIGHTSTRIP); /* Common case */ 1951 else 1952 return do_argstrip(self, RIGHTSTRIP, args); 1953 } 1954 1955 1956 PyDoc_STRVAR(lower__doc__, 1957 "S.lower() -> string\n\ 1958 \n\ 1959 Return a copy of the string S converted to lowercase."); 1960 1961 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */ 1962 #ifndef _tolower 1963 #define _tolower tolower 1964 #endif 1965 1966 static PyObject * 1967 string_lower(PyStringObject *self) 1968 { 1969 char *s; 1970 Py_ssize_t i, n = PyString_GET_SIZE(self); 1971 PyObject *newobj; 1972 1973 newobj = PyString_FromStringAndSize(NULL, n); 1974 if (!newobj) 1975 return NULL; 1976 1977 s = PyString_AS_STRING(newobj); 1978 1979 Py_MEMCPY(s, PyString_AS_STRING(self), n); 1980 1981 for (i = 0; i < n; i++) { 1982 int c = Py_CHARMASK(s[i]); 1983 if (isupper(c)) 1984 s[i] = _tolower(c); 1985 } 1986 1987 return newobj; 1988 } 1989 1990 PyDoc_STRVAR(upper__doc__, 1991 "S.upper() -> string\n\ 1992 \n\ 1993 Return a copy of the string S converted to uppercase."); 1994 1995 #ifndef _toupper 1996 #define _toupper toupper 1997 #endif 1998 1999 static PyObject * 2000 string_upper(PyStringObject *self) 2001 { 2002 char *s; 2003 Py_ssize_t i, n = PyString_GET_SIZE(self); 2004 PyObject *newobj; 2005 2006 newobj = PyString_FromStringAndSize(NULL, n); 2007 if (!newobj) 2008 return NULL; 2009 2010 s = PyString_AS_STRING(newobj); 2011 2012 Py_MEMCPY(s, PyString_AS_STRING(self), n); 2013 2014 for (i = 0; i < n; i++) { 2015 int c = Py_CHARMASK(s[i]); 2016 if (islower(c)) 2017 s[i] = _toupper(c); 2018 } 2019 2020 return newobj; 2021 } 2022 2023 PyDoc_STRVAR(title__doc__, 2024 "S.title() -> string\n\ 2025 \n\ 2026 Return a titlecased version of S, i.e. words start with uppercase\n\ 2027 characters, all remaining cased characters have lowercase."); 2028 2029 static PyObject* 2030 string_title(PyStringObject *self) 2031 { 2032 char *s = PyString_AS_STRING(self), *s_new; 2033 Py_ssize_t i, n = PyString_GET_SIZE(self); 2034 int previous_is_cased = 0; 2035 PyObject *newobj; 2036 2037 newobj = PyString_FromStringAndSize(NULL, n); 2038 if (newobj == NULL) 2039 return NULL; 2040 s_new = PyString_AsString(newobj); 2041 for (i = 0; i < n; i++) { 2042 int c = Py_CHARMASK(*s++); 2043 if (islower(c)) { 2044 if (!previous_is_cased) 2045 c = toupper(c); 2046 previous_is_cased = 1; 2047 } else if (isupper(c)) { 2048 if (previous_is_cased) 2049 c = tolower(c); 2050 previous_is_cased = 1; 2051 } else 2052 previous_is_cased = 0; 2053 *s_new++ = c; 2054 } 2055 return newobj; 2056 } 2057 2058 PyDoc_STRVAR(capitalize__doc__, 2059 "S.capitalize() -> string\n\ 2060 \n\ 2061 Return a copy of the string S with only its first character\n\ 2062 capitalized."); 2063 2064 static PyObject * 2065 string_capitalize(PyStringObject *self) 2066 { 2067 char *s = PyString_AS_STRING(self), *s_new; 2068 Py_ssize_t i, n = PyString_GET_SIZE(self); 2069 PyObject *newobj; 2070 2071 newobj = PyString_FromStringAndSize(NULL, n); 2072 if (newobj == NULL) 2073 return NULL; 2074 s_new = PyString_AsString(newobj); 2075 if (0 < n) { 2076 int c = Py_CHARMASK(*s++); 2077 if (islower(c)) 2078 *s_new = toupper(c); 2079 else 2080 *s_new = c; 2081 s_new++; 2082 } 2083 for (i = 1; i < n; i++) { 2084 int c = Py_CHARMASK(*s++); 2085 if (isupper(c)) 2086 *s_new = tolower(c); 2087 else 2088 *s_new = c; 2089 s_new++; 2090 } 2091 return newobj; 2092 } 2093 2094 2095 PyDoc_STRVAR(count__doc__, 2096 "S.count(sub[, start[, end]]) -> int\n\ 2097 \n\ 2098 Return the number of non-overlapping occurrences of substring sub in\n\ 2099 string S[start:end]. Optional arguments start and end are interpreted\n\ 2100 as in slice notation."); 2101 2102 static PyObject * 2103 string_count(PyStringObject *self, PyObject *args) 2104 { 2105 PyObject *sub_obj; 2106 const char *str = PyString_AS_STRING(self), *sub; 2107 Py_ssize_t sub_len; 2108 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; 2109 2110 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end)) 2111 return NULL; 2112 2113 if (PyString_Check(sub_obj)) { 2114 sub = PyString_AS_STRING(sub_obj); 2115 sub_len = PyString_GET_SIZE(sub_obj); 2116 } 2117 #ifdef Py_USING_UNICODE 2118 else if (PyUnicode_Check(sub_obj)) { 2119 Py_ssize_t count; 2120 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end); 2121 if (count == -1) 2122 return NULL; 2123 else 2124 return PyInt_FromSsize_t(count); 2125 } 2126 #endif 2127 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) 2128 return NULL; 2129 2130 ADJUST_INDICES(start, end, PyString_GET_SIZE(self)); 2131 2132 return PyInt_FromSsize_t( 2133 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) 2134 ); 2135 } 2136 2137 PyDoc_STRVAR(swapcase__doc__, 2138 "S.swapcase() -> string\n\ 2139 \n\ 2140 Return a copy of the string S with uppercase characters\n\ 2141 converted to lowercase and vice versa."); 2142 2143 static PyObject * 2144 string_swapcase(PyStringObject *self) 2145 { 2146 char *s = PyString_AS_STRING(self), *s_new; 2147 Py_ssize_t i, n = PyString_GET_SIZE(self); 2148 PyObject *newobj; 2149 2150 newobj = PyString_FromStringAndSize(NULL, n); 2151 if (newobj == NULL) 2152 return NULL; 2153 s_new = PyString_AsString(newobj); 2154 for (i = 0; i < n; i++) { 2155 int c = Py_CHARMASK(*s++); 2156 if (islower(c)) { 2157 *s_new = toupper(c); 2158 } 2159 else if (isupper(c)) { 2160 *s_new = tolower(c); 2161 } 2162 else 2163 *s_new = c; 2164 s_new++; 2165 } 2166 return newobj; 2167 } 2168 2169 2170 PyDoc_STRVAR(translate__doc__, 2171 "S.translate(table [,deletechars]) -> string\n\ 2172 \n\ 2173 Return a copy of the string S, where all characters occurring\n\ 2174 in the optional argument deletechars are removed, and the\n\ 2175 remaining characters have been mapped through the given\n\ 2176 translation table, which must be a string of length 256."); 2177 2178 static PyObject * 2179 string_translate(PyStringObject *self, PyObject *args) 2180 { 2181 register char *input, *output; 2182 const char *table; 2183 register Py_ssize_t i, c, changed = 0; 2184 PyObject *input_obj = (PyObject*)self; 2185 const char *output_start, *del_table=NULL; 2186 Py_ssize_t inlen, tablen, dellen = 0; 2187 PyObject *result; 2188 int trans_table[256]; 2189 PyObject *tableobj, *delobj = NULL; 2190 2191 if (!PyArg_UnpackTuple(args, "translate", 1, 2, 2192 &tableobj, &delobj)) 2193 return NULL; 2194 2195 if (PyString_Check(tableobj)) { 2196 table = PyString_AS_STRING(tableobj); 2197 tablen = PyString_GET_SIZE(tableobj); 2198 } 2199 else if (tableobj == Py_None) { 2200 table = NULL; 2201 tablen = 256; 2202 } 2203 #ifdef Py_USING_UNICODE 2204 else if (PyUnicode_Check(tableobj)) { 2205 /* Unicode .translate() does not support the deletechars 2206 parameter; instead a mapping to None will cause characters 2207 to be deleted. */ 2208 if (delobj != NULL) { 2209 PyErr_SetString(PyExc_TypeError, 2210 "deletions are implemented differently for unicode"); 2211 return NULL; 2212 } 2213 return PyUnicode_Translate((PyObject *)self, tableobj, NULL); 2214 } 2215 #endif 2216 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen)) 2217 return NULL; 2218 2219 if (tablen != 256) { 2220 PyErr_SetString(PyExc_ValueError, 2221 "translation table must be 256 characters long"); 2222 return NULL; 2223 } 2224 2225 if (delobj != NULL) { 2226 if (PyString_Check(delobj)) { 2227 del_table = PyString_AS_STRING(delobj); 2228 dellen = PyString_GET_SIZE(delobj); 2229 } 2230 #ifdef Py_USING_UNICODE 2231 else if (PyUnicode_Check(delobj)) { 2232 PyErr_SetString(PyExc_TypeError, 2233 "deletions are implemented differently for unicode"); 2234 return NULL; 2235 } 2236 #endif 2237 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) 2238 return NULL; 2239 } 2240 else { 2241 del_table = NULL; 2242 dellen = 0; 2243 } 2244 2245 inlen = PyString_GET_SIZE(input_obj); 2246 result = PyString_FromStringAndSize((char *)NULL, inlen); 2247 if (result == NULL) 2248 return NULL; 2249 output_start = output = PyString_AsString(result); 2250 input = PyString_AS_STRING(input_obj); 2251 2252 if (dellen == 0 && table != NULL) { 2253 /* If no deletions are required, use faster code */ 2254 for (i = inlen; --i >= 0; ) { 2255 c = Py_CHARMASK(*input++); 2256 if (Py_CHARMASK((*output++ = table[c])) != c) 2257 changed = 1; 2258 } 2259 if (changed || !PyString_CheckExact(input_obj)) 2260 return result; 2261 Py_DECREF(result); 2262 Py_INCREF(input_obj); 2263 return input_obj; 2264 } 2265 2266 if (table == NULL) { 2267 for (i = 0; i < 256; i++) 2268 trans_table[i] = Py_CHARMASK(i); 2269 } else { 2270 for (i = 0; i < 256; i++) 2271 trans_table[i] = Py_CHARMASK(table[i]); 2272 } 2273 2274 for (i = 0; i < dellen; i++) 2275 trans_table[(int) Py_CHARMASK(del_table[i])] = -1; 2276 2277 for (i = inlen; --i >= 0; ) { 2278 c = Py_CHARMASK(*input++); 2279 if (trans_table[c] != -1) 2280 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) 2281 continue; 2282 changed = 1; 2283 } 2284 if (!changed && PyString_CheckExact(input_obj)) { 2285 Py_DECREF(result); 2286 Py_INCREF(input_obj); 2287 return input_obj; 2288 } 2289 /* Fix the size of the resulting string */ 2290 if (inlen > 0 && _PyString_Resize(&result, output - output_start)) 2291 return NULL; 2292 return result; 2293 } 2294 2295 2296 /* find and count characters and substrings */ 2297 2298 #define findchar(target, target_len, c) \ 2299 ((char *)memchr((const void *)(target), c, target_len)) 2300 2301 /* String ops must return a string. */ 2302 /* If the object is subclass of string, create a copy */ 2303 Py_LOCAL(PyStringObject *) 2304 return_self(PyStringObject *self) 2305 { 2306 if (PyString_CheckExact(self)) { 2307 Py_INCREF(self); 2308 return self; 2309 } 2310 return (PyStringObject *)PyString_FromStringAndSize( 2311 PyString_AS_STRING(self), 2312 PyString_GET_SIZE(self)); 2313 } 2314 2315 Py_LOCAL_INLINE(Py_ssize_t) 2316 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount) 2317 { 2318 Py_ssize_t count=0; 2319 const char *start=target; 2320 const char *end=target+target_len; 2321 2322 while ( (start=findchar(start, end-start, c)) != NULL ) { 2323 count++; 2324 if (count >= maxcount) 2325 break; 2326 start += 1; 2327 } 2328 return count; 2329 } 2330 2331 2332 /* Algorithms for different cases of string replacement */ 2333 2334 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ 2335 Py_LOCAL(PyStringObject *) 2336 replace_interleave(PyStringObject *self, 2337 const char *to_s, Py_ssize_t to_len, 2338 Py_ssize_t maxcount) 2339 { 2340 char *self_s, *result_s; 2341 Py_ssize_t self_len, result_len; 2342 Py_ssize_t count, i, product; 2343 PyStringObject *result; 2344 2345 self_len = PyString_GET_SIZE(self); 2346 2347 /* 1 at the end plus 1 after every character */ 2348 count = self_len+1; 2349 if (maxcount < count) 2350 count = maxcount; 2351 2352 /* Check for overflow */ 2353 /* result_len = count * to_len + self_len; */ 2354 product = count * to_len; 2355 if (product / to_len != count) { 2356 PyErr_SetString(PyExc_OverflowError, 2357 "replace string is too long"); 2358 return NULL; 2359 } 2360 result_len = product + self_len; 2361 if (result_len < 0) { 2362 PyErr_SetString(PyExc_OverflowError, 2363 "replace string is too long"); 2364 return NULL; 2365 } 2366 2367 if (! (result = (PyStringObject *) 2368 PyString_FromStringAndSize(NULL, result_len)) ) 2369 return NULL; 2370 2371 self_s = PyString_AS_STRING(self); 2372 result_s = PyString_AS_STRING(result); 2373 2374 /* TODO: special case single character, which doesn't need memcpy */ 2375 2376 /* Lay the first one down (guaranteed this will occur) */ 2377 Py_MEMCPY(result_s, to_s, to_len); 2378 result_s += to_len; 2379 count -= 1; 2380 2381 for (i=0; i<count; i++) { 2382 *result_s++ = *self_s++; 2383 Py_MEMCPY(result_s, to_s, to_len); 2384 result_s += to_len; 2385 } 2386 2387 /* Copy the rest of the original string */ 2388 Py_MEMCPY(result_s, self_s, self_len-i); 2389 2390 return result; 2391 } 2392 2393 /* Special case for deleting a single character */ 2394 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */ 2395 Py_LOCAL(PyStringObject *) 2396 replace_delete_single_character(PyStringObject *self, 2397 char from_c, Py_ssize_t maxcount) 2398 { 2399 char *self_s, *result_s; 2400 char *start, *next, *end; 2401 Py_ssize_t self_len, result_len; 2402 Py_ssize_t count; 2403 PyStringObject *result; 2404 2405 self_len = PyString_GET_SIZE(self); 2406 self_s = PyString_AS_STRING(self); 2407 2408 count = countchar(self_s, self_len, from_c, maxcount); 2409 if (count == 0) { 2410 return return_self(self); 2411 } 2412 2413 result_len = self_len - count; /* from_len == 1 */ 2414 assert(result_len>=0); 2415 2416 if ( (result = (PyStringObject *) 2417 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2418 return NULL; 2419 result_s = PyString_AS_STRING(result); 2420 2421 start = self_s; 2422 end = self_s + self_len; 2423 while (count-- > 0) { 2424 next = findchar(start, end-start, from_c); 2425 if (next == NULL) 2426 break; 2427 Py_MEMCPY(result_s, start, next-start); 2428 result_s += (next-start); 2429 start = next+1; 2430 } 2431 Py_MEMCPY(result_s, start, end-start); 2432 2433 return result; 2434 } 2435 2436 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ 2437 2438 Py_LOCAL(PyStringObject *) 2439 replace_delete_substring(PyStringObject *self, 2440 const char *from_s, Py_ssize_t from_len, 2441 Py_ssize_t maxcount) { 2442 char *self_s, *result_s; 2443 char *start, *next, *end; 2444 Py_ssize_t self_len, result_len; 2445 Py_ssize_t count, offset; 2446 PyStringObject *result; 2447 2448 self_len = PyString_GET_SIZE(self); 2449 self_s = PyString_AS_STRING(self); 2450 2451 count = stringlib_count(self_s, self_len, 2452 from_s, from_len, 2453 maxcount); 2454 2455 if (count == 0) { 2456 /* no matches */ 2457 return return_self(self); 2458 } 2459 2460 result_len = self_len - (count * from_len); 2461 assert (result_len>=0); 2462 2463 if ( (result = (PyStringObject *) 2464 PyString_FromStringAndSize(NULL, result_len)) == NULL ) 2465 return NULL; 2466 2467 result_s = PyString_AS_STRING(result); 2468 2469 start = self_s; 2470 end = self_s + self_len; 2471 while (count-- > 0) { 2472 offset = stringlib_find(start, end-start, 2473 from_s, from_len, 2474 0); 2475 if (offset == -1) 2476 break; 2477 next = start + offset; 2478 2479 Py_MEMCPY(result_s, start, next-start); 2480 2481 result_s += (next-start); 2482 start = next+from_len; 2483 } 2484 Py_MEMCPY(result_s, start, end-start); 2485 return result; 2486 } 2487 2488 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ 2489 Py_LOCAL(PyStringObject *) 2490 replace_single_character_in_place(PyStringObject *self, 2491 char from_c, char to_c, 2492 Py_ssize_t maxcount) 2493 { 2494 char *self_s, *result_s, *start, *end, *next; 2495 Py_ssize_t self_len; 2496 PyStringObject *result; 2497 2498 /* The result string will be the same size */ 2499 self_s = PyString_AS_STRING(self); 2500 self_len = PyString_GET_SIZE(self); 2501 2502 next = findchar(self_s, self_len, from_c); 2503 2504 if (next == NULL) { 2505 /* No matches; return the original string */ 2506 return return_self(self); 2507 } 2508 2509 /* Need to make a new string */ 2510 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); 2511 if (result == NULL) 2512 return NULL; 2513 result_s = PyString_AS_STRING(result); 2514 Py_MEMCPY(result_s, self_s, self_len); 2515 2516 /* change everything in-place, starting with this one */ 2517 start = result_s + (next-self_s); 2518 *start = to_c; 2519 start++; 2520 end = result_s + self_len; 2521 2522 while (--maxcount > 0) { 2523 next = findchar(start, end-start, from_c); 2524 if (next == NULL) 2525 break; 2526 *next = to_c; 2527 start = next+1; 2528 } 2529 2530 return result; 2531 } 2532 2533 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ 2534 Py_LOCAL(PyStringObject *) 2535 replace_substring_in_place(PyStringObject *self, 2536 const char *from_s, Py_ssize_t from_len, 2537 const char *to_s, Py_ssize_t to_len, 2538 Py_ssize_t maxcount) 2539 { 2540 char *result_s, *start, *end; 2541 char *self_s; 2542 Py_ssize_t self_len, offset; 2543 PyStringObject *result; 2544 2545 /* The result string will be the same size */ 2546 2547 self_s = PyString_AS_STRING(self); 2548 self_len = PyString_GET_SIZE(self); 2549 2550 offset = stringlib_find(self_s, self_len, 2551 from_s, from_len, 2552 0); 2553 if (offset == -1) { 2554 /* No matches; return the original string */ 2555 return return_self(self); 2556 } 2557 2558 /* Need to make a new string */ 2559 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); 2560 if (result == NULL) 2561 return NULL; 2562 result_s = PyString_AS_STRING(result); 2563 Py_MEMCPY(result_s, self_s, self_len); 2564 2565 /* change everything in-place, starting with this one */ 2566 start = result_s + offset; 2567 Py_MEMCPY(start, to_s, from_len); 2568 start += from_len; 2569 end = result_s + self_len; 2570 2571 while ( --maxcount > 0) { 2572 offset = stringlib_find(start, end-start, 2573 from_s, from_len, 2574 0); 2575 if (offset==-1) 2576 break; 2577 Py_MEMCPY(start+offset, to_s, from_len); 2578 start += offset+from_len; 2579 } 2580 2581 return result; 2582 } 2583 2584 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ 2585 Py_LOCAL(PyStringObject *) 2586 replace_single_character(PyStringObject *self, 2587 char from_c, 2588 const char *to_s, Py_ssize_t to_len, 2589 Py_ssize_t maxcount) 2590 { 2591 char *self_s, *result_s; 2592 char *start, *next, *end; 2593 Py_ssize_t self_len, result_len; 2594 Py_ssize_t count, product; 2595 PyStringObject *result; 2596 2597 self_s = PyString_AS_STRING(self); 2598 self_len = PyString_GET_SIZE(self); 2599 2600 count = countchar(self_s, self_len, from_c, maxcount); 2601 if (count == 0) { 2602 /* no matches, return unchanged */ 2603 return return_self(self); 2604 } 2605 2606 /* use the difference between current and new, hence the "-1" */ 2607 /* result_len = self_len + count * (to_len-1) */ 2608 product = count * (to_len-1); 2609 if (product / (to_len-1) != count) { 2610 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2611 return NULL; 2612 } 2613 result_len = self_len + product; 2614 if (result_len < 0) { 2615 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2616 return NULL; 2617 } 2618 2619 if ( (result = (PyStringObject *) 2620 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2621 return NULL; 2622 result_s = PyString_AS_STRING(result); 2623 2624 start = self_s; 2625 end = self_s + self_len; 2626 while (count-- > 0) { 2627 next = findchar(start, end-start, from_c); 2628 if (next == NULL) 2629 break; 2630 2631 if (next == start) { 2632 /* replace with the 'to' */ 2633 Py_MEMCPY(result_s, to_s, to_len); 2634 result_s += to_len; 2635 start += 1; 2636 } else { 2637 /* copy the unchanged old then the 'to' */ 2638 Py_MEMCPY(result_s, start, next-start); 2639 result_s += (next-start); 2640 Py_MEMCPY(result_s, to_s, to_len); 2641 result_s += to_len; 2642 start = next+1; 2643 } 2644 } 2645 /* Copy the remainder of the remaining string */ 2646 Py_MEMCPY(result_s, start, end-start); 2647 2648 return result; 2649 } 2650 2651 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ 2652 Py_LOCAL(PyStringObject *) 2653 replace_substring(PyStringObject *self, 2654 const char *from_s, Py_ssize_t from_len, 2655 const char *to_s, Py_ssize_t to_len, 2656 Py_ssize_t maxcount) { 2657 char *self_s, *result_s; 2658 char *start, *next, *end; 2659 Py_ssize_t self_len, result_len; 2660 Py_ssize_t count, offset, product; 2661 PyStringObject *result; 2662 2663 self_s = PyString_AS_STRING(self); 2664 self_len = PyString_GET_SIZE(self); 2665 2666 count = stringlib_count(self_s, self_len, 2667 from_s, from_len, 2668 maxcount); 2669 2670 if (count == 0) { 2671 /* no matches, return unchanged */ 2672 return return_self(self); 2673 } 2674 2675 /* Check for overflow */ 2676 /* result_len = self_len + count * (to_len-from_len) */ 2677 product = count * (to_len-from_len); 2678 if (product / (to_len-from_len) != count) { 2679 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2680 return NULL; 2681 } 2682 result_len = self_len + product; 2683 if (result_len < 0) { 2684 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2685 return NULL; 2686 } 2687 2688 if ( (result = (PyStringObject *) 2689 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2690 return NULL; 2691 result_s = PyString_AS_STRING(result); 2692 2693 start = self_s; 2694 end = self_s + self_len; 2695 while (count-- > 0) { 2696 offset = stringlib_find(start, end-start, 2697 from_s, from_len, 2698 0); 2699 if (offset == -1) 2700 break; 2701 next = start+offset; 2702 if (next == start) { 2703 /* replace with the 'to' */ 2704 Py_MEMCPY(result_s, to_s, to_len); 2705 result_s += to_len; 2706 start += from_len; 2707 } else { 2708 /* copy the unchanged old then the 'to' */ 2709 Py_MEMCPY(result_s, start, next-start); 2710 result_s += (next-start); 2711 Py_MEMCPY(result_s, to_s, to_len); 2712 result_s += to_len; 2713 start = next+from_len; 2714 } 2715 } 2716 /* Copy the remainder of the remaining string */ 2717 Py_MEMCPY(result_s, start, end-start); 2718 2719 return result; 2720 } 2721 2722 2723 Py_LOCAL(PyStringObject *) 2724 replace(PyStringObject *self, 2725 const char *from_s, Py_ssize_t from_len, 2726 const char *to_s, Py_ssize_t to_len, 2727 Py_ssize_t maxcount) 2728 { 2729 if (maxcount < 0) { 2730 maxcount = PY_SSIZE_T_MAX; 2731 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) { 2732 /* nothing to do; return the original string */ 2733 return return_self(self); 2734 } 2735 2736 if (maxcount == 0 || 2737 (from_len == 0 && to_len == 0)) { 2738 /* nothing to do; return the original string */ 2739 return return_self(self); 2740 } 2741 2742 /* Handle zero-length special cases */ 2743 2744 if (from_len == 0) { 2745 /* insert the 'to' string everywhere. */ 2746 /* >>> "Python".replace("", ".") */ 2747 /* '.P.y.t.h.o.n.' */ 2748 return replace_interleave(self, to_s, to_len, maxcount); 2749 } 2750 2751 /* Except for "".replace("", "A") == "A" there is no way beyond this */ 2752 /* point for an empty self string to generate a non-empty string */ 2753 /* Special case so the remaining code always gets a non-empty string */ 2754 if (PyString_GET_SIZE(self) == 0) { 2755 return return_self(self); 2756 } 2757 2758 if (to_len == 0) { 2759 /* delete all occurances of 'from' string */ 2760 if (from_len == 1) { 2761 return replace_delete_single_character( 2762 self, from_s[0], maxcount); 2763 } else { 2764 return replace_delete_substring(self, from_s, from_len, maxcount); 2765 } 2766 } 2767 2768 /* Handle special case where both strings have the same length */ 2769 2770 if (from_len == to_len) { 2771 if (from_len == 1) { 2772 return replace_single_character_in_place( 2773 self, 2774 from_s[0], 2775 to_s[0], 2776 maxcount); 2777 } else { 2778 return replace_substring_in_place( 2779 self, from_s, from_len, to_s, to_len, maxcount); 2780 } 2781 } 2782 2783 /* Otherwise use the more generic algorithms */ 2784 if (from_len == 1) { 2785 return replace_single_character(self, from_s[0], 2786 to_s, to_len, maxcount); 2787 } else { 2788 /* len('from')>=2, len('to')>=1 */ 2789 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); 2790 } 2791 } 2792 2793 PyDoc_STRVAR(replace__doc__, 2794 "S.replace(old, new[, count]) -> string\n\ 2795 \n\ 2796 Return a copy of string S with all occurrences of substring\n\ 2797 old replaced by new. If the optional argument count is\n\ 2798 given, only the first count occurrences are replaced."); 2799 2800 static PyObject * 2801 string_replace(PyStringObject *self, PyObject *args) 2802 { 2803 Py_ssize_t count = -1; 2804 PyObject *from, *to; 2805 const char *from_s, *to_s; 2806 Py_ssize_t from_len, to_len; 2807 2808 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) 2809 return NULL; 2810 2811 if (PyString_Check(from)) { 2812 from_s = PyString_AS_STRING(from); 2813 from_len = PyString_GET_SIZE(from); 2814 } 2815 #ifdef Py_USING_UNICODE 2816 if (PyUnicode_Check(from)) 2817 return PyUnicode_Replace((PyObject *)self, 2818 from, to, count); 2819 #endif 2820 else if (PyObject_AsCharBuffer(from, &from_s, &from_len)) 2821 return NULL; 2822 2823 if (PyString_Check(to)) { 2824 to_s = PyString_AS_STRING(to); 2825 to_len = PyString_GET_SIZE(to); 2826 } 2827 #ifdef Py_USING_UNICODE 2828 else if (PyUnicode_Check(to)) 2829 return PyUnicode_Replace((PyObject *)self, 2830 from, to, count); 2831 #endif 2832 else if (PyObject_AsCharBuffer(to, &to_s, &to_len)) 2833 return NULL; 2834 2835 return (PyObject *)replace((PyStringObject *) self, 2836 from_s, from_len, 2837 to_s, to_len, count); 2838 } 2839 2840 /** End DALKE **/ 2841 2842 /* Matches the end (direction >= 0) or start (direction < 0) of self 2843 * against substr, using the start and end arguments. Returns 2844 * -1 on error, 0 if not found and 1 if found. 2845 */ 2846 Py_LOCAL(int) 2847 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start, 2848 Py_ssize_t end, int direction) 2849 { 2850 Py_ssize_t len = PyString_GET_SIZE(self); 2851 Py_ssize_t slen; 2852 const char* sub; 2853 const char* str; 2854 2855 if (PyString_Check(substr)) { 2856 sub = PyString_AS_STRING(substr); 2857 slen = PyString_GET_SIZE(substr); 2858 } 2859 #ifdef Py_USING_UNICODE 2860 else if (PyUnicode_Check(substr)) 2861 return PyUnicode_Tailmatch((PyObject *)self, 2862 substr, start, end, direction); 2863 #endif 2864 else if (PyObject_AsCharBuffer(substr, &sub, &slen)) 2865 return -1; 2866 str = PyString_AS_STRING(self); 2867 2868 ADJUST_INDICES(start, end, len); 2869 2870 if (direction < 0) { 2871 /* startswith */ 2872 if (start+slen > len) 2873 return 0; 2874 } else { 2875 /* endswith */ 2876 if (end-start < slen || start > len) 2877 return 0; 2878 2879 if (end-slen > start) 2880 start = end - slen; 2881 } 2882 if (end-start >= slen) 2883 return ! memcmp(str+start, sub, slen); 2884 return 0; 2885 } 2886 2887 2888 PyDoc_STRVAR(startswith__doc__, 2889 "S.startswith(prefix[, start[, end]]) -> bool\n\ 2890 \n\ 2891 Return True if S starts with the specified prefix, False otherwise.\n\ 2892 With optional start, test S beginning at that position.\n\ 2893 With optional end, stop comparing S at that position.\n\ 2894 prefix can also be a tuple of strings to try."); 2895 2896 static PyObject * 2897 string_startswith(PyStringObject *self, PyObject *args) 2898 { 2899 Py_ssize_t start = 0; 2900 Py_ssize_t end = PY_SSIZE_T_MAX; 2901 PyObject *subobj; 2902 int result; 2903 2904 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) 2905 return NULL; 2906 if (PyTuple_Check(subobj)) { 2907 Py_ssize_t i; 2908 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 2909 result = _string_tailmatch(self, 2910 PyTuple_GET_ITEM(subobj, i), 2911 start, end, -1); 2912 if (result == -1) 2913 return NULL; 2914 else if (result) { 2915 Py_RETURN_TRUE; 2916 } 2917 } 2918 Py_RETURN_FALSE; 2919 } 2920 result = _string_tailmatch(self, subobj, start, end, -1); 2921 if (result == -1) { 2922 if (PyErr_ExceptionMatches(PyExc_TypeError)) 2923 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, " 2924 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); 2925 return NULL; 2926 } 2927 else 2928 return PyBool_FromLong(result); 2929 } 2930 2931 2932 PyDoc_STRVAR(endswith__doc__, 2933 "S.endswith(suffix[, start[, end]]) -> bool\n\ 2934 \n\ 2935 Return True if S ends with the specified suffix, False otherwise.\n\ 2936 With optional start, test S beginning at that position.\n\ 2937 With optional end, stop comparing S at that position.\n\ 2938 suffix can also be a tuple of strings to try."); 2939 2940 static PyObject * 2941 string_endswith(PyStringObject *self, PyObject *args) 2942 { 2943 Py_ssize_t start = 0; 2944 Py_ssize_t end = PY_SSIZE_T_MAX; 2945 PyObject *subobj; 2946 int result; 2947 2948 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) 2949 return NULL; 2950 if (PyTuple_Check(subobj)) { 2951 Py_ssize_t i; 2952 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 2953 result = _string_tailmatch(self, 2954 PyTuple_GET_ITEM(subobj, i), 2955 start, end, +1); 2956 if (result == -1) 2957 return NULL; 2958 else if (result) { 2959 Py_RETURN_TRUE; 2960 } 2961 } 2962 Py_RETURN_FALSE; 2963 } 2964 result = _string_tailmatch(self, subobj, start, end, +1); 2965 if (result == -1) { 2966 if (PyErr_ExceptionMatches(PyExc_TypeError)) 2967 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, " 2968 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); 2969 return NULL; 2970 } 2971 else 2972 return PyBool_FromLong(result); 2973 } 2974 2975 2976 PyDoc_STRVAR(encode__doc__, 2977 "S.encode([encoding[,errors]]) -> object\n\ 2978 \n\ 2979 Encodes S using the codec registered for encoding. encoding defaults\n\ 2980 to the default encoding. errors may be given to set a different error\n\ 2981 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 2982 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ 2983 'xmlcharrefreplace' as well as any other name registered with\n\ 2984 codecs.register_error that is able to handle UnicodeEncodeErrors."); 2985 2986 static PyObject * 2987 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs) 2988 { 2989 static char *kwlist[] = {"encoding", "errors", 0}; 2990 char *encoding = NULL; 2991 char *errors = NULL; 2992 PyObject *v; 2993 2994 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", 2995 kwlist, &encoding, &errors)) 2996 return NULL; 2997 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); 2998 if (v == NULL) 2999 goto onError; 3000 if (!PyString_Check(v) && !PyUnicode_Check(v)) { 3001 PyErr_Format(PyExc_TypeError, 3002 "encoder did not return a string/unicode object " 3003 "(type=%.400s)", 3004 Py_TYPE(v)->tp_name); 3005 Py_DECREF(v); 3006 return NULL; 3007 } 3008 return v; 3009 3010 onError: 3011 return NULL; 3012 } 3013 3014 3015 PyDoc_STRVAR(decode__doc__, 3016 "S.decode([encoding[,errors]]) -> object\n\ 3017 \n\ 3018 Decodes S using the codec registered for encoding. encoding defaults\n\ 3019 to the default encoding. errors may be given to set a different error\n\ 3020 handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 3021 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ 3022 as well as any other name registered with codecs.register_error that is\n\ 3023 able to handle UnicodeDecodeErrors."); 3024 3025 static PyObject * 3026 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs) 3027 { 3028 static char *kwlist[] = {"encoding", "errors", 0}; 3029 char *encoding = NULL; 3030 char *errors = NULL; 3031 PyObject *v; 3032 3033 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", 3034 kwlist, &encoding, &errors)) 3035 return NULL; 3036 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); 3037 if (v == NULL) 3038 goto onError; 3039 if (!PyString_Check(v) && !PyUnicode_Check(v)) { 3040 PyErr_Format(PyExc_TypeError, 3041 "decoder did not return a string/unicode object " 3042 "(type=%.400s)", 3043 Py_TYPE(v)->tp_name); 3044 Py_DECREF(v); 3045 return NULL; 3046 } 3047 return v; 3048 3049 onError: 3050 return NULL; 3051 } 3052 3053 3054 PyDoc_STRVAR(expandtabs__doc__, 3055 "S.expandtabs([tabsize]) -> string\n\ 3056 \n\ 3057 Return a copy of S where all tab characters are expanded using spaces.\n\ 3058 If tabsize is not given, a tab size of 8 characters is assumed."); 3059 3060 static PyObject* 3061 string_expandtabs(PyStringObject *self, PyObject *args) 3062 { 3063 const char *e, *p, *qe; 3064 char *q; 3065 Py_ssize_t i, j, incr; 3066 PyObject *u; 3067 int tabsize = 8; 3068 3069 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) 3070 return NULL; 3071 3072 /* First pass: determine size of output string */ 3073 i = 0; /* chars up to and including most recent \n or \r */ 3074 j = 0; /* chars since most recent \n or \r (use in tab calculations) */ 3075 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */ 3076 for (p = PyString_AS_STRING(self); p < e; p++) 3077 if (*p == '\t') { 3078 if (tabsize > 0) { 3079 incr = tabsize - (j % tabsize); 3080 if (j > PY_SSIZE_T_MAX - incr) 3081 goto overflow1; 3082 j += incr; 3083 } 3084 } 3085 else { 3086 if (j > PY_SSIZE_T_MAX - 1) 3087 goto overflow1; 3088 j++; 3089 if (*p == '\n' || *p == '\r') { 3090 if (i > PY_SSIZE_T_MAX - j) 3091 goto overflow1; 3092 i += j; 3093 j = 0; 3094 } 3095 } 3096 3097 if (i > PY_SSIZE_T_MAX - j) 3098 goto overflow1; 3099 3100 /* Second pass: create output string and fill it */ 3101 u = PyString_FromStringAndSize(NULL, i + j); 3102 if (!u) 3103 return NULL; 3104 3105 j = 0; /* same as in first pass */ 3106 q = PyString_AS_STRING(u); /* next output char */ 3107 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */ 3108 3109 for (p = PyString_AS_STRING(self); p < e; p++) 3110 if (*p == '\t') { 3111 if (tabsize > 0) { 3112 i = tabsize - (j % tabsize); 3113 j += i; 3114 while (i--) { 3115 if (q >= qe) 3116 goto overflow2; 3117 *q++ = ' '; 3118 } 3119 } 3120 } 3121 else { 3122 if (q >= qe) 3123 goto overflow2; 3124 *q++ = *p; 3125 j++; 3126 if (*p == '\n' || *p == '\r') 3127 j = 0; 3128 } 3129 3130 return u; 3131 3132 overflow2: 3133 Py_DECREF(u); 3134 overflow1: 3135 PyErr_SetString(PyExc_OverflowError, "new string is too long"); 3136 return NULL; 3137 } 3138 3139 Py_LOCAL_INLINE(PyObject *) 3140 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill) 3141 { 3142 PyObject *u; 3143 3144 if (left < 0) 3145 left = 0; 3146 if (right < 0) 3147 right = 0; 3148 3149 if (left == 0 && right == 0 && PyString_CheckExact(self)) { 3150 Py_INCREF(self); 3151 return (PyObject *)self; 3152 } 3153 3154 u = PyString_FromStringAndSize(NULL, 3155 left + PyString_GET_SIZE(self) + right); 3156 if (u) { 3157 if (left) 3158 memset(PyString_AS_STRING(u), fill, left); 3159 Py_MEMCPY(PyString_AS_STRING(u) + left, 3160 PyString_AS_STRING(self), 3161 PyString_GET_SIZE(self)); 3162 if (right) 3163 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), 3164 fill, right); 3165 } 3166 3167 return u; 3168 } 3169 3170 PyDoc_STRVAR(ljust__doc__, 3171 "S.ljust(width[, fillchar]) -> string\n" 3172 "\n" 3173 "Return S left-justified in a string of length width. Padding is\n" 3174 "done using the specified fill character (default is a space)."); 3175 3176 static PyObject * 3177 string_ljust(PyStringObject *self, PyObject *args) 3178 { 3179 Py_ssize_t width; 3180 char fillchar = ' '; 3181 3182 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) 3183 return NULL; 3184 3185 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { 3186 Py_INCREF(self); 3187 return (PyObject*) self; 3188 } 3189 3190 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar); 3191 } 3192 3193 3194 PyDoc_STRVAR(rjust__doc__, 3195 "S.rjust(width[, fillchar]) -> string\n" 3196 "\n" 3197 "Return S right-justified in a string of length width. Padding is\n" 3198 "done using the specified fill character (default is a space)"); 3199 3200 static PyObject * 3201 string_rjust(PyStringObject *self, PyObject *args) 3202 { 3203 Py_ssize_t width; 3204 char fillchar = ' '; 3205 3206 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) 3207 return NULL; 3208 3209 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { 3210 Py_INCREF(self); 3211 return (PyObject*) self; 3212 } 3213 3214 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar); 3215 } 3216 3217 3218 PyDoc_STRVAR(center__doc__, 3219 "S.center(width[, fillchar]) -> string\n" 3220 "\n" 3221 "Return S centered in a string of length width. Padding is\n" 3222 "done using the specified fill character (default is a space)"); 3223 3224 static PyObject * 3225 string_center(PyStringObject *self, PyObject *args) 3226 { 3227 Py_ssize_t marg, left; 3228 Py_ssize_t width; 3229 char fillchar = ' '; 3230 3231 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) 3232 return NULL; 3233 3234 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { 3235 Py_INCREF(self); 3236 return (PyObject*) self; 3237 } 3238 3239 marg = width - PyString_GET_SIZE(self); 3240 left = marg / 2 + (marg & width & 1); 3241 3242 return pad(self, left, marg - left, fillchar); 3243 } 3244 3245 PyDoc_STRVAR(zfill__doc__, 3246 "S.zfill(width) -> string\n" 3247 "\n" 3248 "Pad a numeric string S with zeros on the left, to fill a field\n" 3249 "of the specified width. The string S is never truncated."); 3250 3251 static PyObject * 3252 string_zfill(PyStringObject *self, PyObject *args) 3253 { 3254 Py_ssize_t fill; 3255 PyObject *s; 3256 char *p; 3257 Py_ssize_t width; 3258 3259 if (!PyArg_ParseTuple(args, "n:zfill", &width)) 3260 return NULL; 3261 3262 if (PyString_GET_SIZE(self) >= width) { 3263 if (PyString_CheckExact(self)) { 3264 Py_INCREF(self); 3265 return (PyObject*) self; 3266 } 3267 else 3268 return PyString_FromStringAndSize( 3269 PyString_AS_STRING(self), 3270 PyString_GET_SIZE(self) 3271 ); 3272 } 3273 3274 fill = width - PyString_GET_SIZE(self); 3275 3276 s = pad(self, fill, 0, '0'); 3277 3278 if (s == NULL) 3279 return NULL; 3280 3281 p = PyString_AS_STRING(s); 3282 if (p[fill] == '+' || p[fill] == '-') { 3283 /* move sign to beginning of string */ 3284 p[0] = p[fill]; 3285 p[fill] = '0'; 3286 } 3287 3288 return (PyObject*) s; 3289 } 3290 3291 PyDoc_STRVAR(isspace__doc__, 3292 "S.isspace() -> bool\n\ 3293 \n\ 3294 Return True if all characters in S are whitespace\n\ 3295 and there is at least one character in S, False otherwise."); 3296 3297 static PyObject* 3298 string_isspace(PyStringObject *self) 3299 { 3300 register const unsigned char *p 3301 = (unsigned char *) PyString_AS_STRING(self); 3302 register const unsigned char *e; 3303 3304 /* Shortcut for single character strings */ 3305 if (PyString_GET_SIZE(self) == 1 && 3306 isspace(*p)) 3307 return PyBool_FromLong(1); 3308 3309 /* Special case for empty strings */ 3310 if (PyString_GET_SIZE(self) == 0) 3311 return PyBool_FromLong(0); 3312 3313 e = p + PyString_GET_SIZE(self); 3314 for (; p < e; p++) { 3315 if (!isspace(*p)) 3316 return PyBool_FromLong(0); 3317 } 3318 return PyBool_FromLong(1); 3319 } 3320 3321 3322 PyDoc_STRVAR(isalpha__doc__, 3323 "S.isalpha() -> bool\n\ 3324 \n\ 3325 Return True if all characters in S are alphabetic\n\ 3326 and there is at least one character in S, False otherwise."); 3327 3328 static PyObject* 3329 string_isalpha(PyStringObject *self) 3330 { 3331 register const unsigned char *p 3332 = (unsigned char *) PyString_AS_STRING(self); 3333 register const unsigned char *e; 3334 3335 /* Shortcut for single character strings */ 3336 if (PyString_GET_SIZE(self) == 1 && 3337 isalpha(*p)) 3338 return PyBool_FromLong(1); 3339 3340 /* Special case for empty strings */ 3341 if (PyString_GET_SIZE(self) == 0) 3342 return PyBool_FromLong(0); 3343 3344 e = p + PyString_GET_SIZE(self); 3345 for (; p < e; p++) { 3346 if (!isalpha(*p)) 3347 return PyBool_FromLong(0); 3348 } 3349 return PyBool_FromLong(1); 3350 } 3351 3352 3353 PyDoc_STRVAR(isalnum__doc__, 3354 "S.isalnum() -> bool\n\ 3355 \n\ 3356 Return True if all characters in S are alphanumeric\n\ 3357 and there is at least one character in S, False otherwise."); 3358 3359 static PyObject* 3360 string_isalnum(PyStringObject *self) 3361 { 3362 register const unsigned char *p 3363 = (unsigned char *) PyString_AS_STRING(self); 3364 register const unsigned char *e; 3365 3366 /* Shortcut for single character strings */ 3367 if (PyString_GET_SIZE(self) == 1 && 3368 isalnum(*p)) 3369 return PyBool_FromLong(1); 3370 3371 /* Special case for empty strings */ 3372 if (PyString_GET_SIZE(self) == 0) 3373 return PyBool_FromLong(0); 3374 3375 e = p + PyString_GET_SIZE(self); 3376 for (; p < e; p++) { 3377 if (!isalnum(*p)) 3378 return PyBool_FromLong(0); 3379 } 3380 return PyBool_FromLong(1); 3381 } 3382 3383 3384 PyDoc_STRVAR(isdigit__doc__, 3385 "S.isdigit() -> bool\n\ 3386 \n\ 3387 Return True if all characters in S are digits\n\ 3388 and there is at least one character in S, False otherwise."); 3389 3390 static PyObject* 3391 string_isdigit(PyStringObject *self) 3392 { 3393 register const unsigned char *p 3394 = (unsigned char *) PyString_AS_STRING(self); 3395 register const unsigned char *e; 3396 3397 /* Shortcut for single character strings */ 3398 if (PyString_GET_SIZE(self) == 1 && 3399 isdigit(*p)) 3400 return PyBool_FromLong(1); 3401 3402 /* Special case for empty strings */ 3403 if (PyString_GET_SIZE(self) == 0) 3404 return PyBool_FromLong(0); 3405 3406 e = p + PyString_GET_SIZE(self); 3407 for (; p < e; p++) { 3408 if (!isdigit(*p)) 3409 return PyBool_FromLong(0); 3410 } 3411 return PyBool_FromLong(1); 3412 } 3413 3414 3415 PyDoc_STRVAR(islower__doc__, 3416 "S.islower() -> bool\n\ 3417 \n\ 3418 Return True if all cased characters in S are lowercase and there is\n\ 3419 at least one cased character in S, False otherwise."); 3420 3421 static PyObject* 3422 string_islower(PyStringObject *self) 3423 { 3424 register const unsigned char *p 3425 = (unsigned char *) PyString_AS_STRING(self); 3426 register const unsigned char *e; 3427 int cased; 3428 3429 /* Shortcut for single character strings */ 3430 if (PyString_GET_SIZE(self) == 1) 3431 return PyBool_FromLong(islower(*p) != 0); 3432 3433 /* Special case for empty strings */ 3434 if (PyString_GET_SIZE(self) == 0) 3435 return PyBool_FromLong(0); 3436 3437 e = p + PyString_GET_SIZE(self); 3438 cased = 0; 3439 for (; p < e; p++) { 3440 if (isupper(*p)) 3441 return PyBool_FromLong(0); 3442 else if (!cased && islower(*p)) 3443 cased = 1; 3444 } 3445 return PyBool_FromLong(cased); 3446 } 3447 3448 3449 PyDoc_STRVAR(isupper__doc__, 3450 "S.isupper() -> bool\n\ 3451 \n\ 3452 Return True if all cased characters in S are uppercase and there is\n\ 3453 at least one cased character in S, False otherwise."); 3454 3455 static PyObject* 3456 string_isupper(PyStringObject *self) 3457 { 3458 register const unsigned char *p 3459 = (unsigned char *) PyString_AS_STRING(self); 3460 register const unsigned char *e; 3461 int cased; 3462 3463 /* Shortcut for single character strings */ 3464 if (PyString_GET_SIZE(self) == 1) 3465 return PyBool_FromLong(isupper(*p) != 0); 3466 3467 /* Special case for empty strings */ 3468 if (PyString_GET_SIZE(self) == 0) 3469 return PyBool_FromLong(0); 3470 3471 e = p + PyString_GET_SIZE(self); 3472 cased = 0; 3473 for (; p < e; p++) { 3474 if (islower(*p)) 3475 return PyBool_FromLong(0); 3476 else if (!cased && isupper(*p)) 3477 cased = 1; 3478 } 3479 return PyBool_FromLong(cased); 3480 } 3481 3482 3483 PyDoc_STRVAR(istitle__doc__, 3484 "S.istitle() -> bool\n\ 3485 \n\ 3486 Return True if S is a titlecased string and there is at least one\n\ 3487 character in S, i.e. uppercase characters may only follow uncased\n\ 3488 characters and lowercase characters only cased ones. Return False\n\ 3489 otherwise."); 3490 3491 static PyObject* 3492 string_istitle(PyStringObject *self, PyObject *uncased) 3493 { 3494 register const unsigned char *p 3495 = (unsigned char *) PyString_AS_STRING(self); 3496 register const unsigned char *e; 3497 int cased, previous_is_cased; 3498 3499 /* Shortcut for single character strings */ 3500 if (PyString_GET_SIZE(self) == 1) 3501 return PyBool_FromLong(isupper(*p) != 0); 3502 3503 /* Special case for empty strings */ 3504 if (PyString_GET_SIZE(self) == 0) 3505 return PyBool_FromLong(0); 3506 3507 e = p + PyString_GET_SIZE(self); 3508 cased = 0; 3509 previous_is_cased = 0; 3510 for (; p < e; p++) { 3511 register const unsigned char ch = *p; 3512 3513 if (isupper(ch)) { 3514 if (previous_is_cased) 3515 return PyBool_FromLong(0); 3516 previous_is_cased = 1; 3517 cased = 1; 3518 } 3519 else if (islower(ch)) { 3520 if (!previous_is_cased) 3521 return PyBool_FromLong(0); 3522 previous_is_cased = 1; 3523 cased = 1; 3524 } 3525 else 3526 previous_is_cased = 0; 3527 } 3528 return PyBool_FromLong(cased); 3529 } 3530 3531 3532 PyDoc_STRVAR(splitlines__doc__, 3533 "S.splitlines([keepends]) -> list of strings\n\ 3534 \n\ 3535 Return a list of the lines in S, breaking at line boundaries.\n\ 3536 Line breaks are not included in the resulting list unless keepends\n\ 3537 is given and true."); 3538 3539 static PyObject* 3540 string_splitlines(PyStringObject *self, PyObject *args) 3541 { 3542 int keepends = 0; 3543 3544 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) 3545 return NULL; 3546 3547 return stringlib_splitlines( 3548 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self), 3549 keepends 3550 ); 3551 } 3552 3553 PyDoc_STRVAR(sizeof__doc__, 3554 "S.__sizeof__() -> size of S in memory, in bytes"); 3555 3556 static PyObject * 3557 string_sizeof(PyStringObject *v) 3558 { 3559 Py_ssize_t res; 3560 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize; 3561 return PyInt_FromSsize_t(res); 3562 } 3563 3564 static PyObject * 3565 string_getnewargs(PyStringObject *v) 3566 { 3567 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v)); 3568 } 3569 3570 3571 #include "stringlib/string_format.h" 3572 3573 PyDoc_STRVAR(format__doc__, 3574 "S.format(*args, **kwargs) -> string\n\ 3575 \n\ 3576 Return a formatted version of S, using substitutions from args and kwargs.\n\ 3577 The substitutions are identified by braces ('{' and '}')."); 3578 3579 static PyObject * 3580 string__format__(PyObject* self, PyObject* args) 3581 { 3582 PyObject *format_spec; 3583 PyObject *result = NULL; 3584 PyObject *tmp = NULL; 3585 3586 /* If 2.x, convert format_spec to the same type as value */ 3587 /* This is to allow things like u''.format('') */ 3588 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) 3589 goto done; 3590 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) { 3591 PyErr_Format(PyExc_TypeError, "__format__ arg must be str " 3592 "or unicode, not %s", Py_TYPE(format_spec)->tp_name); 3593 goto done; 3594 } 3595 tmp = PyObject_Str(format_spec); 3596 if (tmp == NULL) 3597 goto done; 3598 format_spec = tmp; 3599 3600 result = _PyBytes_FormatAdvanced(self, 3601 PyString_AS_STRING(format_spec), 3602 PyString_GET_SIZE(format_spec)); 3603 done: 3604 Py_XDECREF(tmp); 3605 return result; 3606 } 3607 3608 PyDoc_STRVAR(p_format__doc__, 3609 "S.__format__(format_spec) -> string\n\ 3610 \n\ 3611 Return a formatted version of S as described by format_spec."); 3612 3613 3614 static PyMethodDef 3615 string_methods[] = { 3616 /* Counterparts of the obsolete stropmodule functions; except 3617 string.maketrans(). */ 3618 {"join", (PyCFunction)string_join, METH_O, join__doc__}, 3619 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__}, 3620 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__}, 3621 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__}, 3622 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__}, 3623 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__}, 3624 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__}, 3625 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__}, 3626 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__}, 3627 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__}, 3628 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__}, 3629 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__}, 3630 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, 3631 capitalize__doc__}, 3632 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, 3633 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, 3634 endswith__doc__}, 3635 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__}, 3636 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__}, 3637 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__}, 3638 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, 3639 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__}, 3640 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__}, 3641 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__}, 3642 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__}, 3643 {"rpartition", (PyCFunction)string_rpartition, METH_O, 3644 rpartition__doc__}, 3645 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, 3646 startswith__doc__}, 3647 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__}, 3648 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, 3649 swapcase__doc__}, 3650 {"translate", (PyCFunction)string_translate, METH_VARARGS, 3651 translate__doc__}, 3652 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__}, 3653 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__}, 3654 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__}, 3655 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__}, 3656 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__}, 3657 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, 3658 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, 3659 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, 3660 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, 3661 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, 3662 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, 3663 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, 3664 expandtabs__doc__}, 3665 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, 3666 splitlines__doc__}, 3667 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS, 3668 sizeof__doc__}, 3669 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS}, 3670 {NULL, NULL} /* sentinel */ 3671 }; 3672 3673 static PyObject * 3674 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); 3675 3676 static PyObject * 3677 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3678 { 3679 PyObject *x = NULL; 3680 static char *kwlist[] = {"object", 0}; 3681 3682 if (type != &PyString_Type) 3683 return str_subtype_new(type, args, kwds); 3684 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x)) 3685 return NULL; 3686 if (x == NULL) 3687 return PyString_FromString(""); 3688 return PyObject_Str(x); 3689 } 3690 3691 static PyObject * 3692 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3693 { 3694 PyObject *tmp, *pnew; 3695 Py_ssize_t n; 3696 3697 assert(PyType_IsSubtype(type, &PyString_Type)); 3698 tmp = string_new(&PyString_Type, args, kwds); 3699 if (tmp == NULL) 3700 return NULL; 3701 assert(PyString_CheckExact(tmp)); 3702 n = PyString_GET_SIZE(tmp); 3703 pnew = type->tp_alloc(type, n); 3704 if (pnew != NULL) { 3705 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); 3706 ((PyStringObject *)pnew)->ob_shash = 3707 ((PyStringObject *)tmp)->ob_shash; 3708 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; 3709 } 3710 Py_DECREF(tmp); 3711 return pnew; 3712 } 3713 3714 static PyObject * 3715 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3716 { 3717 PyErr_SetString(PyExc_TypeError, 3718 "The basestring type cannot be instantiated"); 3719 return NULL; 3720 } 3721 3722 static PyObject * 3723 string_mod(PyObject *v, PyObject *w) 3724 { 3725 if (!PyString_Check(v)) { 3726 Py_INCREF(Py_NotImplemented); 3727 return Py_NotImplemented; 3728 } 3729 return PyString_Format(v, w); 3730 } 3731 3732 PyDoc_STRVAR(basestring_doc, 3733 "Type basestring cannot be instantiated; it is the base for str and unicode."); 3734 3735 static PyNumberMethods string_as_number = { 3736 0, /*nb_add*/ 3737 0, /*nb_subtract*/ 3738 0, /*nb_multiply*/ 3739 0, /*nb_divide*/ 3740 string_mod, /*nb_remainder*/ 3741 }; 3742 3743 3744 PyTypeObject PyBaseString_Type = { 3745 PyVarObject_HEAD_INIT(&PyType_Type, 0) 3746 "basestring", 3747 0, 3748 0, 3749 0, /* tp_dealloc */ 3750 0, /* tp_print */ 3751 0, /* tp_getattr */ 3752 0, /* tp_setattr */ 3753 0, /* tp_compare */ 3754 0, /* tp_repr */ 3755 0, /* tp_as_number */ 3756 0, /* tp_as_sequence */ 3757 0, /* tp_as_mapping */ 3758 0, /* tp_hash */ 3759 0, /* tp_call */ 3760 0, /* tp_str */ 3761 0, /* tp_getattro */ 3762 0, /* tp_setattro */ 3763 0, /* tp_as_buffer */ 3764 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ 3765 basestring_doc, /* tp_doc */ 3766 0, /* tp_traverse */ 3767 0, /* tp_clear */ 3768 0, /* tp_richcompare */ 3769 0, /* tp_weaklistoffset */ 3770 0, /* tp_iter */ 3771 0, /* tp_iternext */ 3772 0, /* tp_methods */ 3773 0, /* tp_members */ 3774 0, /* tp_getset */ 3775 &PyBaseObject_Type, /* tp_base */ 3776 0, /* tp_dict */ 3777 0, /* tp_descr_get */ 3778 0, /* tp_descr_set */ 3779 0, /* tp_dictoffset */ 3780 0, /* tp_init */ 3781 0, /* tp_alloc */ 3782 basestring_new, /* tp_new */ 3783 0, /* tp_free */ 3784 }; 3785 3786 PyDoc_STRVAR(string_doc, 3787 "str(object) -> string\n\ 3788 \n\ 3789 Return a nice string representation of the object.\n\ 3790 If the argument is a string, the return value is the same object."); 3791 3792 PyTypeObject PyString_Type = { 3793 PyVarObject_HEAD_INIT(&PyType_Type, 0) 3794 "str", 3795 PyStringObject_SIZE, 3796 sizeof(char), 3797 string_dealloc, /* tp_dealloc */ 3798 (printfunc)string_print, /* tp_print */ 3799 0, /* tp_getattr */ 3800 0, /* tp_setattr */ 3801 0, /* tp_compare */ 3802 string_repr, /* tp_repr */ 3803 &string_as_number, /* tp_as_number */ 3804 &string_as_sequence, /* tp_as_sequence */ 3805 &string_as_mapping, /* tp_as_mapping */ 3806 (hashfunc)string_hash, /* tp_hash */ 3807 0, /* tp_call */ 3808 string_str, /* tp_str */ 3809 PyObject_GenericGetAttr, /* tp_getattro */ 3810 0, /* tp_setattro */ 3811 &string_as_buffer, /* tp_as_buffer */ 3812 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | 3813 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS | 3814 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */ 3815 string_doc, /* tp_doc */ 3816 0, /* tp_traverse */ 3817 0, /* tp_clear */ 3818 (richcmpfunc)string_richcompare, /* tp_richcompare */ 3819 0, /* tp_weaklistoffset */ 3820 0, /* tp_iter */ 3821 0, /* tp_iternext */ 3822 string_methods, /* tp_methods */ 3823 0, /* tp_members */ 3824 0, /* tp_getset */ 3825 &PyBaseString_Type, /* tp_base */ 3826 0, /* tp_dict */ 3827 0, /* tp_descr_get */ 3828 0, /* tp_descr_set */ 3829 0, /* tp_dictoffset */ 3830 0, /* tp_init */ 3831 0, /* tp_alloc */ 3832 string_new, /* tp_new */ 3833 PyObject_Del, /* tp_free */ 3834 }; 3835 3836 void 3837 PyString_Concat(register PyObject **pv, register PyObject *w) 3838 { 3839 register PyObject *v; 3840 if (*pv == NULL) 3841 return; 3842 if (w == NULL || !PyString_Check(*pv)) { 3843 Py_DECREF(*pv); 3844 *pv = NULL; 3845 return; 3846 } 3847 v = string_concat((PyStringObject *) *pv, w); 3848 Py_DECREF(*pv); 3849 *pv = v; 3850 } 3851 3852 void 3853 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w) 3854 { 3855 PyString_Concat(pv, w); 3856 Py_XDECREF(w); 3857 } 3858 3859 3860 /* The following function breaks the notion that strings are immutable: 3861 it changes the size of a string. We get away with this only if there 3862 is only one module referencing the object. You can also think of it 3863 as creating a new string object and destroying the old one, only 3864 more efficiently. In any case, don't use this if the string may 3865 already be known to some other part of the code... 3866 Note that if there's not enough memory to resize the string, the original 3867 string object at *pv is deallocated, *pv is set to NULL, an "out of 3868 memory" exception is set, and -1 is returned. Else (on success) 0 is 3869 returned, and the value in *pv may or may not be the same as on input. 3870 As always, an extra byte is allocated for a trailing \0 byte (newsize 3871 does *not* include that), and a trailing \0 byte is stored. 3872 */ 3873 3874 int 3875 _PyString_Resize(PyObject **pv, Py_ssize_t newsize) 3876 { 3877 register PyObject *v; 3878 register PyStringObject *sv; 3879 v = *pv; 3880 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 || 3881 PyString_CHECK_INTERNED(v)) { 3882 *pv = 0; 3883 Py_DECREF(v); 3884 PyErr_BadInternalCall(); 3885 return -1; 3886 } 3887 /* XXX UNREF/NEWREF interface should be more symmetrical */ 3888 _Py_DEC_REFTOTAL; 3889 _Py_ForgetReference(v); 3890 *pv = (PyObject *) 3891 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize); 3892 if (*pv == NULL) { 3893 PyObject_Del(v); 3894 PyErr_NoMemory(); 3895 return -1; 3896 } 3897 _Py_NewReference(*pv); 3898 sv = (PyStringObject *) *pv; 3899 Py_SIZE(sv) = newsize; 3900 sv->ob_sval[newsize] = '\0'; 3901 sv->ob_shash = -1; /* invalidate cached hash value */ 3902 return 0; 3903 } 3904 3905 /* Helpers for formatstring */ 3906 3907 Py_LOCAL_INLINE(PyObject *) 3908 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) 3909 { 3910 Py_ssize_t argidx = *p_argidx; 3911 if (argidx < arglen) { 3912 (*p_argidx)++; 3913 if (arglen < 0) 3914 return args; 3915 else 3916 return PyTuple_GetItem(args, argidx); 3917 } 3918 PyErr_SetString(PyExc_TypeError, 3919 "not enough arguments for format string"); 3920 return NULL; 3921 } 3922 3923 /* Format codes 3924 * F_LJUST '-' 3925 * F_SIGN '+' 3926 * F_BLANK ' ' 3927 * F_ALT '#' 3928 * F_ZERO '0' 3929 */ 3930 #define F_LJUST (1<<0) 3931 #define F_SIGN (1<<1) 3932 #define F_BLANK (1<<2) 3933 #define F_ALT (1<<3) 3934 #define F_ZERO (1<<4) 3935 3936 /* Returns a new reference to a PyString object, or NULL on failure. */ 3937 3938 static PyObject * 3939 formatfloat(PyObject *v, int flags, int prec, int type) 3940 { 3941 char *p; 3942 PyObject *result; 3943 double x; 3944 3945 x = PyFloat_AsDouble(v); 3946 if (x == -1.0 && PyErr_Occurred()) { 3947 PyErr_Format(PyExc_TypeError, "float argument required, " 3948 "not %.200s", Py_TYPE(v)->tp_name); 3949 return NULL; 3950 } 3951 3952 if (prec < 0) 3953 prec = 6; 3954 3955 p = PyOS_double_to_string(x, type, prec, 3956 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); 3957 3958 if (p == NULL) 3959 return NULL; 3960 result = PyString_FromStringAndSize(p, strlen(p)); 3961 PyMem_Free(p); 3962 return result; 3963 } 3964 3965 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and 3966 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for 3967 * Python's regular ints. 3968 * Return value: a new PyString*, or NULL if error. 3969 * . *pbuf is set to point into it, 3970 * *plen set to the # of chars following that. 3971 * Caller must decref it when done using pbuf. 3972 * The string starting at *pbuf is of the form 3973 * "-"? ("0x" | "0X")? digit+ 3974 * "0x"/"0X" are present only for x and X conversions, with F_ALT 3975 * set in flags. The case of hex digits will be correct, 3976 * There will be at least prec digits, zero-filled on the left if 3977 * necessary to get that many. 3978 * val object to be converted 3979 * flags bitmask of format flags; only F_ALT is looked at 3980 * prec minimum number of digits; 0-fill on left if needed 3981 * type a character in [duoxX]; u acts the same as d 3982 * 3983 * CAUTION: o, x and X conversions on regular ints can never 3984 * produce a '-' sign, but can for Python's unbounded ints. 3985 */ 3986 PyObject* 3987 _PyString_FormatLong(PyObject *val, int flags, int prec, int type, 3988 char **pbuf, int *plen) 3989 { 3990 PyObject *result = NULL; 3991 char *buf; 3992 Py_ssize_t i; 3993 int sign; /* 1 if '-', else 0 */ 3994 int len; /* number of characters */ 3995 Py_ssize_t llen; 3996 int numdigits; /* len == numnondigits + numdigits */ 3997 int numnondigits = 0; 3998 3999 switch (type) { 4000 case 'd': 4001 case 'u': 4002 result = Py_TYPE(val)->tp_str(val); 4003 break; 4004 case 'o': 4005 result = Py_TYPE(val)->tp_as_number->nb_oct(val); 4006 break; 4007 case 'x': 4008 case 'X': 4009 numnondigits = 2; 4010 result = Py_TYPE(val)->tp_as_number->nb_hex(val); 4011 break; 4012 default: 4013 assert(!"'type' not in [duoxX]"); 4014 } 4015 if (!result) 4016 return NULL; 4017 4018 buf = PyString_AsString(result); 4019 if (!buf) { 4020 Py_DECREF(result); 4021 return NULL; 4022 } 4023 4024 /* To modify the string in-place, there can only be one reference. */ 4025 if (Py_REFCNT(result) != 1) { 4026 PyErr_BadInternalCall(); 4027 return NULL; 4028 } 4029 llen = PyString_Size(result); 4030 if (llen > INT_MAX) { 4031 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); 4032 return NULL; 4033 } 4034 len = (int)llen; 4035 if (buf[len-1] == 'L') { 4036 --len; 4037 buf[len] = '\0'; 4038 } 4039 sign = buf[0] == '-'; 4040 numnondigits += sign; 4041 numdigits = len - numnondigits; 4042 assert(numdigits > 0); 4043 4044 /* Get rid of base marker unless F_ALT */ 4045 if ((flags & F_ALT) == 0) { 4046 /* Need to skip 0x, 0X or 0. */ 4047 int skipped = 0; 4048 switch (type) { 4049 case 'o': 4050 assert(buf[sign] == '0'); 4051 /* If 0 is only digit, leave it alone. */ 4052 if (numdigits > 1) { 4053 skipped = 1; 4054 --numdigits; 4055 } 4056 break; 4057 case 'x': 4058 case 'X': 4059 assert(buf[sign] == '0'); 4060 assert(buf[sign + 1] == 'x'); 4061 skipped = 2; 4062 numnondigits -= 2; 4063 break; 4064 } 4065 if (skipped) { 4066 buf += skipped; 4067 len -= skipped; 4068 if (sign) 4069 buf[0] = '-'; 4070 } 4071 assert(len == numnondigits + numdigits); 4072 assert(numdigits > 0); 4073 } 4074 4075 /* Fill with leading zeroes to meet minimum width. */ 4076 if (prec > numdigits) { 4077 PyObject *r1 = PyString_FromStringAndSize(NULL, 4078 numnondigits + prec); 4079 char *b1; 4080 if (!r1) { 4081 Py_DECREF(result); 4082 return NULL; 4083 } 4084 b1 = PyString_AS_STRING(r1); 4085 for (i = 0; i < numnondigits; ++i) 4086 *b1++ = *buf++; 4087 for (i = 0; i < prec - numdigits; i++) 4088 *b1++ = '0'; 4089 for (i = 0; i < numdigits; i++) 4090 *b1++ = *buf++; 4091 *b1 = '\0'; 4092 Py_DECREF(result); 4093 result = r1; 4094 buf = PyString_AS_STRING(result); 4095 len = numnondigits + prec; 4096 } 4097 4098 /* Fix up case for hex conversions. */ 4099 if (type == 'X') { 4100 /* Need to convert all lower case letters to upper case. 4101 and need to convert 0x to 0X (and -0x to -0X). */ 4102 for (i = 0; i < len; i++) 4103 if (buf[i] >= 'a' && buf[i] <= 'x') 4104 buf[i] -= 'a'-'A'; 4105 } 4106 *pbuf = buf; 4107 *plen = len; 4108 return result; 4109 } 4110 4111 Py_LOCAL_INLINE(int) 4112 formatint(char *buf, size_t buflen, int flags, 4113 int prec, int type, PyObject *v) 4114 { 4115 /* fmt = '%#.' + `prec` + 'l' + `type` 4116 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) 4117 + 1 + 1 = 24 */ 4118 char fmt[64]; /* plenty big enough! */ 4119 char *sign; 4120 long x; 4121 4122 x = PyInt_AsLong(v); 4123 if (x == -1 && PyErr_Occurred()) { 4124 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s", 4125 Py_TYPE(v)->tp_name); 4126 return -1; 4127 } 4128 if (x < 0 && type == 'u') { 4129 type = 'd'; 4130 } 4131 if (x < 0 && (type == 'x' || type == 'X' || type == 'o')) 4132 sign = "-"; 4133 else 4134 sign = ""; 4135 if (prec < 0) 4136 prec = 1; 4137 4138 if ((flags & F_ALT) && 4139 (type == 'x' || type == 'X')) { 4140 /* When converting under %#x or %#X, there are a number 4141 * of issues that cause pain: 4142 * - when 0 is being converted, the C standard leaves off 4143 * the '0x' or '0X', which is inconsistent with other 4144 * %#x/%#X conversions and inconsistent with Python's 4145 * hex() function 4146 * - there are platforms that violate the standard and 4147 * convert 0 with the '0x' or '0X' 4148 * (Metrowerks, Compaq Tru64) 4149 * - there are platforms that give '0x' when converting 4150 * under %#X, but convert 0 in accordance with the 4151 * standard (OS/2 EMX) 4152 * 4153 * We can achieve the desired consistency by inserting our 4154 * own '0x' or '0X' prefix, and substituting %x/%X in place 4155 * of %#x/%#X. 4156 * 4157 * Note that this is the same approach as used in 4158 * formatint() in unicodeobject.c 4159 */ 4160 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", 4161 sign, type, prec, type); 4162 } 4163 else { 4164 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", 4165 sign, (flags&F_ALT) ? "#" : "", 4166 prec, type); 4167 } 4168 4169 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal)) 4170 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11 4171 */ 4172 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) { 4173 PyErr_SetString(PyExc_OverflowError, 4174 "formatted integer is too long (precision too large?)"); 4175 return -1; 4176 } 4177 if (sign[0]) 4178 PyOS_snprintf(buf, buflen, fmt, -x); 4179 else 4180 PyOS_snprintf(buf, buflen, fmt, x); 4181 return (int)strlen(buf); 4182 } 4183 4184 Py_LOCAL_INLINE(int) 4185 formatchar(char *buf, size_t buflen, PyObject *v) 4186 { 4187 /* presume that the buffer is at least 2 characters long */ 4188 if (PyString_Check(v)) { 4189 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) 4190 return -1; 4191 } 4192 else { 4193 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0])) 4194 return -1; 4195 } 4196 buf[1] = '\0'; 4197 return 1; 4198 } 4199 4200 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) 4201 4202 FORMATBUFLEN is the length of the buffer in which the ints & 4203 chars are formatted. XXX This is a magic number. Each formatting 4204 routine does bounds checking to ensure no overflow, but a better 4205 solution may be to malloc a buffer of appropriate size for each 4206 format. For now, the current solution is sufficient. 4207 */ 4208 #define FORMATBUFLEN (size_t)120 4209 4210 PyObject * 4211 PyString_Format(PyObject *format, PyObject *args) 4212 { 4213 char *fmt, *res; 4214 Py_ssize_t arglen, argidx; 4215 Py_ssize_t reslen, rescnt, fmtcnt; 4216 int args_owned = 0; 4217 PyObject *result, *orig_args; 4218 #ifdef Py_USING_UNICODE 4219 PyObject *v, *w; 4220 #endif 4221 PyObject *dict = NULL; 4222 if (format == NULL || !PyString_Check(format) || args == NULL) { 4223 PyErr_BadInternalCall(); 4224 return NULL; 4225 } 4226 orig_args = args; 4227 fmt = PyString_AS_STRING(format); 4228 fmtcnt = PyString_GET_SIZE(format); 4229 reslen = rescnt = fmtcnt + 100; 4230 result = PyString_FromStringAndSize((char *)NULL, reslen); 4231 if (result == NULL) 4232 return NULL; 4233 res = PyString_AsString(result); 4234 if (PyTuple_Check(args)) { 4235 arglen = PyTuple_GET_SIZE(args); 4236 argidx = 0; 4237 } 4238 else { 4239 arglen = -1; 4240 argidx = -2; 4241 } 4242 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) && 4243 !PyObject_TypeCheck(args, &PyBaseString_Type)) 4244 dict = args; 4245 while (--fmtcnt >= 0) { 4246 if (*fmt != '%') { 4247 if (--rescnt < 0) { 4248 rescnt = fmtcnt + 100; 4249 reslen += rescnt; 4250 if (_PyString_Resize(&result, reslen)) 4251 return NULL; 4252 res = PyString_AS_STRING(result) 4253 + reslen - rescnt; 4254 --rescnt; 4255 } 4256 *res++ = *fmt++; 4257 } 4258 else { 4259 /* Got a format specifier */ 4260 int flags = 0; 4261 Py_ssize_t width = -1; 4262 int prec = -1; 4263 int c = '\0'; 4264 int fill; 4265 int isnumok; 4266 PyObject *v = NULL; 4267 PyObject *temp = NULL; 4268 char *pbuf = NULL; 4269 int sign; 4270 Py_ssize_t len; 4271 char formatbuf[FORMATBUFLEN]; 4272 /* For format{int,char}() */ 4273 #ifdef Py_USING_UNICODE 4274 char *fmt_start = fmt; 4275 Py_ssize_t argidx_start = argidx; 4276 #endif 4277 4278 fmt++; 4279 if (*fmt == '(') { 4280 char *keystart; 4281 Py_ssize_t keylen; 4282 PyObject *key; 4283 int pcount = 1; 4284 4285 if (dict == NULL) { 4286 PyErr_SetString(PyExc_TypeError, 4287 "format requires a mapping"); 4288 goto error; 4289 } 4290 ++fmt; 4291 --fmtcnt; 4292 keystart = fmt; 4293 /* Skip over balanced parentheses */ 4294 while (pcount > 0 && --fmtcnt >= 0) { 4295 if (*fmt == ')') 4296 --pcount; 4297 else if (*fmt == '(') 4298 ++pcount; 4299 fmt++; 4300 } 4301 keylen = fmt - keystart - 1; 4302 if (fmtcnt < 0 || pcount > 0) { 4303 PyErr_SetString(PyExc_ValueError, 4304 "incomplete format key"); 4305 goto error; 4306 } 4307 key = PyString_FromStringAndSize(keystart, 4308 keylen); 4309 if (key == NULL) 4310 goto error; 4311 if (args_owned) { 4312 Py_DECREF(args); 4313 args_owned = 0; 4314 } 4315 args = PyObject_GetItem(dict, key); 4316 Py_DECREF(key); 4317 if (args == NULL) { 4318 goto error; 4319 } 4320 args_owned = 1; 4321 arglen = -1; 4322 argidx = -2; 4323 } 4324 while (--fmtcnt >= 0) { 4325 switch (c = *fmt++) { 4326 case '-': flags |= F_LJUST; continue; 4327 case '+': flags |= F_SIGN; continue; 4328 case ' ': flags |= F_BLANK; continue; 4329 case '#': flags |= F_ALT; continue; 4330 case '0': flags |= F_ZERO; continue; 4331 } 4332 break; 4333 } 4334 if (c == '*') { 4335 v = getnextarg(args, arglen, &argidx); 4336 if (v == NULL) 4337 goto error; 4338 if (!PyInt_Check(v)) { 4339 PyErr_SetString(PyExc_TypeError, 4340 "* wants int"); 4341 goto error; 4342 } 4343 width = PyInt_AsLong(v); 4344 if (width < 0) { 4345 flags |= F_LJUST; 4346 width = -width; 4347 } 4348 if (--fmtcnt >= 0) 4349 c = *fmt++; 4350 } 4351 else if (c >= 0 && isdigit(c)) { 4352 width = c - '0'; 4353 while (--fmtcnt >= 0) { 4354 c = Py_CHARMASK(*fmt++); 4355 if (!isdigit(c)) 4356 break; 4357 if ((width*10) / 10 != width) { 4358 PyErr_SetString( 4359 PyExc_ValueError, 4360 "width too big"); 4361 goto error; 4362 } 4363 width = width*10 + (c - '0'); 4364 } 4365 } 4366 if (c == '.') { 4367 prec = 0; 4368 if (--fmtcnt >= 0) 4369 c = *fmt++; 4370 if (c == '*') { 4371 v = getnextarg(args, arglen, &argidx); 4372 if (v == NULL) 4373 goto error; 4374 if (!PyInt_Check(v)) { 4375 PyErr_SetString( 4376 PyExc_TypeError, 4377 "* wants int"); 4378 goto error; 4379 } 4380 prec = PyInt_AsLong(v); 4381 if (prec < 0) 4382 prec = 0; 4383 if (--fmtcnt >= 0) 4384 c = *fmt++; 4385 } 4386 else if (c >= 0 && isdigit(c)) { 4387 prec = c - '0'; 4388 while (--fmtcnt >= 0) { 4389 c = Py_CHARMASK(*fmt++); 4390 if (!isdigit(c)) 4391 break; 4392 if ((prec*10) / 10 != prec) { 4393 PyErr_SetString( 4394 PyExc_ValueError, 4395 "prec too big"); 4396 goto error; 4397 } 4398 prec = prec*10 + (c - '0'); 4399 } 4400 } 4401 } /* prec */ 4402 if (fmtcnt >= 0) { 4403 if (c == 'h' || c == 'l' || c == 'L') { 4404 if (--fmtcnt >= 0) 4405 c = *fmt++; 4406 } 4407 } 4408 if (fmtcnt < 0) { 4409 PyErr_SetString(PyExc_ValueError, 4410 "incomplete format"); 4411 goto error; 4412 } 4413 if (c != '%') { 4414 v = getnextarg(args, arglen, &argidx); 4415 if (v == NULL) 4416 goto error; 4417 } 4418 sign = 0; 4419 fill = ' '; 4420 switch (c) { 4421 case '%': 4422 pbuf = "%"; 4423 len = 1; 4424 break; 4425 case 's': 4426 #ifdef Py_USING_UNICODE 4427 if (PyUnicode_Check(v)) { 4428 fmt = fmt_start; 4429 argidx = argidx_start; 4430 goto unicode; 4431 } 4432 #endif 4433 temp = _PyObject_Str(v); 4434 #ifdef Py_USING_UNICODE 4435 if (temp != NULL && PyUnicode_Check(temp)) { 4436 Py_DECREF(temp); 4437 fmt = fmt_start; 4438 argidx = argidx_start; 4439 goto unicode; 4440 } 4441 #endif 4442 /* Fall through */ 4443 case 'r': 4444 if (c == 'r') 4445 temp = PyObject_Repr(v); 4446 if (temp == NULL) 4447 goto error; 4448 if (!PyString_Check(temp)) { 4449 PyErr_SetString(PyExc_TypeError, 4450 "%s argument has non-string str()"); 4451 Py_DECREF(temp); 4452 goto error; 4453 } 4454 pbuf = PyString_AS_STRING(temp); 4455 len = PyString_GET_SIZE(temp); 4456 if (prec >= 0 && len > prec) 4457 len = prec; 4458 break; 4459 case 'i': 4460 case 'd': 4461 case 'u': 4462 case 'o': 4463 case 'x': 4464 case 'X': 4465 if (c == 'i') 4466 c = 'd'; 4467 isnumok = 0; 4468 if (PyNumber_Check(v)) { 4469 PyObject *iobj=NULL; 4470 4471 if (PyInt_Check(v) || (PyLong_Check(v))) { 4472 iobj = v; 4473 Py_INCREF(iobj); 4474 } 4475 else { 4476 iobj = PyNumber_Int(v); 4477 if (iobj==NULL) iobj = PyNumber_Long(v); 4478 } 4479 if (iobj!=NULL) { 4480 if (PyInt_Check(iobj)) { 4481 isnumok = 1; 4482 pbuf = formatbuf; 4483 len = formatint(pbuf, 4484 sizeof(formatbuf), 4485 flags, prec, c, iobj); 4486 Py_DECREF(iobj); 4487 if (len < 0) 4488 goto error; 4489 sign = 1; 4490 } 4491 else if (PyLong_Check(iobj)) { 4492 int ilen; 4493 4494 isnumok = 1; 4495 temp = _PyString_FormatLong(iobj, flags, 4496 prec, c, &pbuf, &ilen); 4497 Py_DECREF(iobj); 4498 len = ilen; 4499 if (!temp) 4500 goto error; 4501 sign = 1; 4502 } 4503 else { 4504 Py_DECREF(iobj); 4505 } 4506 } 4507 } 4508 if (!isnumok) { 4509 PyErr_Format(PyExc_TypeError, 4510 "%%%c format: a number is required, " 4511 "not %.200s", c, Py_TYPE(v)->tp_name); 4512 goto error; 4513 } 4514 if (flags & F_ZERO) 4515 fill = '0'; 4516 break; 4517 case 'e': 4518 case 'E': 4519 case 'f': 4520 case 'F': 4521 case 'g': 4522 case 'G': 4523 temp = formatfloat(v, flags, prec, c); 4524 if (temp == NULL) 4525 goto error; 4526 pbuf = PyString_AS_STRING(temp); 4527 len = PyString_GET_SIZE(temp); 4528 sign = 1; 4529 if (flags & F_ZERO) 4530 fill = '0'; 4531 break; 4532 case 'c': 4533 #ifdef Py_USING_UNICODE 4534 if (PyUnicode_Check(v)) { 4535 fmt = fmt_start; 4536 argidx = argidx_start; 4537 goto unicode; 4538 } 4539 #endif 4540 pbuf = formatbuf; 4541 len = formatchar(pbuf, sizeof(formatbuf), v); 4542 if (len < 0) 4543 goto error; 4544 break; 4545 default: 4546 PyErr_Format(PyExc_ValueError, 4547 "unsupported format character '%c' (0x%x) " 4548 "at index %zd", 4549 c, c, 4550 (Py_ssize_t)(fmt - 1 - 4551 PyString_AsString(format))); 4552 goto error; 4553 } 4554 if (sign) { 4555 if (*pbuf == '-' || *pbuf == '+') { 4556 sign = *pbuf++; 4557 len--; 4558 } 4559 else if (flags & F_SIGN) 4560 sign = '+'; 4561 else if (flags & F_BLANK) 4562 sign = ' '; 4563 else 4564 sign = 0; 4565 } 4566 if (width < len) 4567 width = len; 4568 if (rescnt - (sign != 0) < width) { 4569 reslen -= rescnt; 4570 rescnt = width + fmtcnt + 100; 4571 reslen += rescnt; 4572 if (reslen < 0) { 4573 Py_DECREF(result); 4574 Py_XDECREF(temp); 4575 return PyErr_NoMemory(); 4576 } 4577 if (_PyString_Resize(&result, reslen)) { 4578 Py_XDECREF(temp); 4579 return NULL; 4580 } 4581 res = PyString_AS_STRING(result) 4582 + reslen - rescnt; 4583 } 4584 if (sign) { 4585 if (fill != ' ') 4586 *res++ = sign; 4587 rescnt--; 4588 if (width > len) 4589 width--; 4590 } 4591 if ((flags & F_ALT) && (c == 'x' || c == 'X')) { 4592 assert(pbuf[0] == '0'); 4593 assert(pbuf[1] == c); 4594 if (fill != ' ') { 4595 *res++ = *pbuf++; 4596 *res++ = *pbuf++; 4597 } 4598 rescnt -= 2; 4599 width -= 2; 4600 if (width < 0) 4601 width = 0; 4602 len -= 2; 4603 } 4604 if (width > len && !(flags & F_LJUST)) { 4605 do { 4606 --rescnt; 4607 *res++ = fill; 4608 } while (--width > len); 4609 } 4610 if (fill == ' ') { 4611 if (sign) 4612 *res++ = sign; 4613 if ((flags & F_ALT) && 4614 (c == 'x' || c == 'X')) { 4615 assert(pbuf[0] == '0'); 4616 assert(pbuf[1] == c); 4617 *res++ = *pbuf++; 4618 *res++ = *pbuf++; 4619 } 4620 } 4621 Py_MEMCPY(res, pbuf, len); 4622 res += len; 4623 rescnt -= len; 4624 while (--width >= len) { 4625 --rescnt; 4626 *res++ = ' '; 4627 } 4628 if (dict && (argidx < arglen) && c != '%') { 4629 PyErr_SetString(PyExc_TypeError, 4630 "not all arguments converted during string formatting"); 4631 Py_XDECREF(temp); 4632 goto error; 4633 } 4634 Py_XDECREF(temp); 4635 } /* '%' */ 4636 } /* until end */ 4637 if (argidx < arglen && !dict) { 4638 PyErr_SetString(PyExc_TypeError, 4639 "not all arguments converted during string formatting"); 4640 goto error; 4641 } 4642 if (args_owned) { 4643 Py_DECREF(args); 4644 } 4645 if (_PyString_Resize(&result, reslen - rescnt)) 4646 return NULL; 4647 return result; 4648 4649 #ifdef Py_USING_UNICODE 4650 unicode: 4651 if (args_owned) { 4652 Py_DECREF(args); 4653 args_owned = 0; 4654 } 4655 /* Fiddle args right (remove the first argidx arguments) */ 4656 if (PyTuple_Check(orig_args) && argidx > 0) { 4657 PyObject *v; 4658 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx; 4659 v = PyTuple_New(n); 4660 if (v == NULL) 4661 goto error; 4662 while (--n >= 0) { 4663 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx); 4664 Py_INCREF(w); 4665 PyTuple_SET_ITEM(v, n, w); 4666 } 4667 args = v; 4668 } else { 4669 Py_INCREF(orig_args); 4670 args = orig_args; 4671 } 4672 args_owned = 1; 4673 /* Take what we have of the result and let the Unicode formatting 4674 function format the rest of the input. */ 4675 rescnt = res - PyString_AS_STRING(result); 4676 if (_PyString_Resize(&result, rescnt)) 4677 goto error; 4678 fmtcnt = PyString_GET_SIZE(format) - \ 4679 (fmt - PyString_AS_STRING(format)); 4680 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); 4681 if (format == NULL) 4682 goto error; 4683 v = PyUnicode_Format(format, args); 4684 Py_DECREF(format); 4685 if (v == NULL) 4686 goto error; 4687 /* Paste what we have (result) to what the Unicode formatting 4688 function returned (v) and return the result (or error) */ 4689 w = PyUnicode_Concat(result, v); 4690 Py_DECREF(result); 4691 Py_DECREF(v); 4692 Py_DECREF(args); 4693 return w; 4694 #endif /* Py_USING_UNICODE */ 4695 4696 error: 4697 Py_DECREF(result); 4698 if (args_owned) { 4699 Py_DECREF(args); 4700 } 4701 return NULL; 4702 } 4703 4704 void 4705 PyString_InternInPlace(PyObject **p) 4706 { 4707 register PyStringObject *s = (PyStringObject *)(*p); 4708 PyObject *t; 4709 if (s == NULL || !PyString_Check(s)) 4710 Py_FatalError("PyString_InternInPlace: strings only please!"); 4711 /* If it's a string subclass, we don't really know what putting 4712 it in the interned dict might do. */ 4713 if (!PyString_CheckExact(s)) 4714 return; 4715 if (PyString_CHECK_INTERNED(s)) 4716 return; 4717 if (interned == NULL) { 4718 interned = PyDict_New(); 4719 if (interned == NULL) { 4720 PyErr_Clear(); /* Don't leave an exception */ 4721 return; 4722 } 4723 } 4724 t = PyDict_GetItem(interned, (PyObject *)s); 4725 if (t) { 4726 Py_INCREF(t); 4727 Py_DECREF(*p); 4728 *p = t; 4729 return; 4730 } 4731 4732 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { 4733 PyErr_Clear(); 4734 return; 4735 } 4736 /* The two references in interned are not counted by refcnt. 4737 The string deallocator will take care of this */ 4738 Py_REFCNT(s) -= 2; 4739 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; 4740 } 4741 4742 void 4743 PyString_InternImmortal(PyObject **p) 4744 { 4745 PyString_InternInPlace(p); 4746 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { 4747 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; 4748 Py_INCREF(*p); 4749 } 4750 } 4751 4752 4753 PyObject * 4754 PyString_InternFromString(const char *cp) 4755 { 4756 PyObject *s = PyString_FromString(cp); 4757 if (s == NULL) 4758 return NULL; 4759 PyString_InternInPlace(&s); 4760 return s; 4761 } 4762 4763 void 4764 PyString_Fini(void) 4765 { 4766 int i; 4767 for (i = 0; i < UCHAR_MAX + 1; i++) { 4768 Py_XDECREF(characters[i]); 4769 characters[i] = NULL; 4770 } 4771 Py_XDECREF(nullstring); 4772 nullstring = NULL; 4773 } 4774 4775 void _Py_ReleaseInternedStrings(void) 4776 { 4777 PyObject *keys; 4778 PyStringObject *s; 4779 Py_ssize_t i, n; 4780 Py_ssize_t immortal_size = 0, mortal_size = 0; 4781 4782 if (interned == NULL || !PyDict_Check(interned)) 4783 return; 4784 keys = PyDict_Keys(interned); 4785 if (keys == NULL || !PyList_Check(keys)) { 4786 PyErr_Clear(); 4787 return; 4788 } 4789 4790 /* Since _Py_ReleaseInternedStrings() is intended to help a leak 4791 detector, interned strings are not forcibly deallocated; rather, we 4792 give them their stolen references back, and then clear and DECREF 4793 the interned dict. */ 4794 4795 n = PyList_GET_SIZE(keys); 4796 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", 4797 n); 4798 for (i = 0; i < n; i++) { 4799 s = (PyStringObject *) PyList_GET_ITEM(keys, i); 4800 switch (s->ob_sstate) { 4801 case SSTATE_NOT_INTERNED: 4802 /* XXX Shouldn't happen */ 4803 break; 4804 case SSTATE_INTERNED_IMMORTAL: 4805 Py_REFCNT(s) += 1; 4806 immortal_size += Py_SIZE(s); 4807 break; 4808 case SSTATE_INTERNED_MORTAL: 4809 Py_REFCNT(s) += 2; 4810 mortal_size += Py_SIZE(s); 4811 break; 4812 default: 4813 Py_FatalError("Inconsistent interned string state."); 4814 } 4815 s->ob_sstate = SSTATE_NOT_INTERNED; 4816 } 4817 fprintf(stderr, "total size of all interned strings: " 4818 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " 4819 "mortal/immortal\n", mortal_size, immortal_size); 4820 Py_DECREF(keys); 4821 PyDict_Clear(interned); 4822 Py_DECREF(interned); 4823 interned = NULL; 4824 } 4825