1 /* 2 string_format.h -- implementation of string.format(). 3 4 It uses the Objects/stringlib conventions, so that it can be 5 compiled for both unicode and string objects. 6 */ 7 8 9 /* Defines for Python 2.6 compatibility */ 10 #if PY_VERSION_HEX < 0x03000000 11 #define PyLong_FromSsize_t _PyLong_FromSsize_t 12 #endif 13 14 /* Defines for more efficiently reallocating the string buffer */ 15 #define INITIAL_SIZE_INCREMENT 100 16 #define SIZE_MULTIPLIER 2 17 #define MAX_SIZE_INCREMENT 3200 18 19 20 /************************************************************************/ 21 /*********** Global data structures and forward declarations *********/ 22 /************************************************************************/ 23 24 /* 25 A SubString consists of the characters between two string or 26 unicode pointers. 27 */ 28 typedef struct { 29 STRINGLIB_CHAR *ptr; 30 STRINGLIB_CHAR *end; 31 } SubString; 32 33 34 typedef enum { 35 ANS_INIT, 36 ANS_AUTO, 37 ANS_MANUAL 38 } AutoNumberState; /* Keep track if we're auto-numbering fields */ 39 40 /* Keeps track of our auto-numbering state, and which number field we're on */ 41 typedef struct { 42 AutoNumberState an_state; 43 int an_field_number; 44 } AutoNumber; 45 46 47 /* forward declaration for recursion */ 48 static PyObject * 49 build_string(SubString *input, PyObject *args, PyObject *kwargs, 50 int recursion_depth, AutoNumber *auto_number); 51 52 53 54 /************************************************************************/ 55 /************************** Utility functions ************************/ 56 /************************************************************************/ 57 58 static void 59 AutoNumber_Init(AutoNumber *auto_number) 60 { 61 auto_number->an_state = ANS_INIT; 62 auto_number->an_field_number = 0; 63 } 64 65 /* fill in a SubString from a pointer and length */ 66 Py_LOCAL_INLINE(void) 67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) 68 { 69 str->ptr = p; 70 if (p == NULL) 71 str->end = NULL; 72 else 73 str->end = str->ptr + len; 74 } 75 76 /* return a new string. if str->ptr is NULL, return None */ 77 Py_LOCAL_INLINE(PyObject *) 78 SubString_new_object(SubString *str) 79 { 80 if (str->ptr == NULL) { 81 Py_INCREF(Py_None); 82 return Py_None; 83 } 84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr); 85 } 86 87 /* return a new string. if str->ptr is NULL, return None */ 88 Py_LOCAL_INLINE(PyObject *) 89 SubString_new_object_or_empty(SubString *str) 90 { 91 if (str->ptr == NULL) { 92 return STRINGLIB_NEW(NULL, 0); 93 } 94 return STRINGLIB_NEW(str->ptr, str->end - str->ptr); 95 } 96 97 /* Return 1 if an error has been detected switching between automatic 98 field numbering and manual field specification, else return 0. Set 99 ValueError on error. */ 100 static int 101 autonumber_state_error(AutoNumberState state, int field_name_is_empty) 102 { 103 if (state == ANS_MANUAL) { 104 if (field_name_is_empty) { 105 PyErr_SetString(PyExc_ValueError, "cannot switch from " 106 "manual field specification to " 107 "automatic field numbering"); 108 return 1; 109 } 110 } 111 else { 112 if (!field_name_is_empty) { 113 PyErr_SetString(PyExc_ValueError, "cannot switch from " 114 "automatic field numbering to " 115 "manual field specification"); 116 return 1; 117 } 118 } 119 return 0; 120 } 121 122 123 /************************************************************************/ 124 /*********** Output string management functions ****************/ 125 /************************************************************************/ 126 127 typedef struct { 128 STRINGLIB_CHAR *ptr; 129 STRINGLIB_CHAR *end; 130 PyObject *obj; 131 Py_ssize_t size_increment; 132 } OutputString; 133 134 /* initialize an OutputString object, reserving size characters */ 135 static int 136 output_initialize(OutputString *output, Py_ssize_t size) 137 { 138 output->obj = STRINGLIB_NEW(NULL, size); 139 if (output->obj == NULL) 140 return 0; 141 142 output->ptr = STRINGLIB_STR(output->obj); 143 output->end = STRINGLIB_LEN(output->obj) + output->ptr; 144 output->size_increment = INITIAL_SIZE_INCREMENT; 145 146 return 1; 147 } 148 149 /* 150 output_extend reallocates the output string buffer. 151 It returns a status: 0 for a failed reallocation, 152 1 for success. 153 */ 154 155 static int 156 output_extend(OutputString *output, Py_ssize_t count) 157 { 158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj); 159 Py_ssize_t curlen = output->ptr - startptr; 160 Py_ssize_t maxlen = curlen + count + output->size_increment; 161 162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0) 163 return 0; 164 startptr = STRINGLIB_STR(output->obj); 165 output->ptr = startptr + curlen; 166 output->end = startptr + maxlen; 167 if (output->size_increment < MAX_SIZE_INCREMENT) 168 output->size_increment *= SIZE_MULTIPLIER; 169 return 1; 170 } 171 172 /* 173 output_data dumps characters into our output string 174 buffer. 175 176 In some cases, it has to reallocate the string. 177 178 It returns a status: 0 for a failed reallocation, 179 1 for success. 180 */ 181 static int 182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count) 183 { 184 if ((count > output->end - output->ptr) && !output_extend(output, count)) 185 return 0; 186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR)); 187 output->ptr += count; 188 return 1; 189 } 190 191 /************************************************************************/ 192 /*********** Format string parsing -- integers and identifiers *********/ 193 /************************************************************************/ 194 195 static Py_ssize_t 196 get_integer(const SubString *str) 197 { 198 Py_ssize_t accumulator = 0; 199 Py_ssize_t digitval; 200 STRINGLIB_CHAR *p; 201 202 /* empty string is an error */ 203 if (str->ptr >= str->end) 204 return -1; 205 206 for (p = str->ptr; p < str->end; p++) { 207 digitval = STRINGLIB_TODECIMAL(*p); 208 if (digitval < 0) 209 return -1; 210 /* 211 Detect possible overflow before it happens: 212 213 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if 214 accumulator > (PY_SSIZE_T_MAX - digitval) / 10. 215 */ 216 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { 217 PyErr_Format(PyExc_ValueError, 218 "Too many decimal digits in format string"); 219 return -1; 220 } 221 accumulator = accumulator * 10 + digitval; 222 } 223 return accumulator; 224 } 225 226 /************************************************************************/ 227 /******** Functions to get field objects and specification strings ******/ 228 /************************************************************************/ 229 230 /* do the equivalent of obj.name */ 231 static PyObject * 232 getattr(PyObject *obj, SubString *name) 233 { 234 PyObject *newobj; 235 PyObject *str = SubString_new_object(name); 236 if (str == NULL) 237 return NULL; 238 newobj = PyObject_GetAttr(obj, str); 239 Py_DECREF(str); 240 return newobj; 241 } 242 243 /* do the equivalent of obj[idx], where obj is a sequence */ 244 static PyObject * 245 getitem_sequence(PyObject *obj, Py_ssize_t idx) 246 { 247 return PySequence_GetItem(obj, idx); 248 } 249 250 /* do the equivalent of obj[idx], where obj is not a sequence */ 251 static PyObject * 252 getitem_idx(PyObject *obj, Py_ssize_t idx) 253 { 254 PyObject *newobj; 255 PyObject *idx_obj = PyLong_FromSsize_t(idx); 256 if (idx_obj == NULL) 257 return NULL; 258 newobj = PyObject_GetItem(obj, idx_obj); 259 Py_DECREF(idx_obj); 260 return newobj; 261 } 262 263 /* do the equivalent of obj[name] */ 264 static PyObject * 265 getitem_str(PyObject *obj, SubString *name) 266 { 267 PyObject *newobj; 268 PyObject *str = SubString_new_object(name); 269 if (str == NULL) 270 return NULL; 271 newobj = PyObject_GetItem(obj, str); 272 Py_DECREF(str); 273 return newobj; 274 } 275 276 typedef struct { 277 /* the entire string we're parsing. we assume that someone else 278 is managing its lifetime, and that it will exist for the 279 lifetime of the iterator. can be empty */ 280 SubString str; 281 282 /* pointer to where we are inside field_name */ 283 STRINGLIB_CHAR *ptr; 284 } FieldNameIterator; 285 286 287 static int 288 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr, 289 Py_ssize_t len) 290 { 291 SubString_init(&self->str, ptr, len); 292 self->ptr = self->str.ptr; 293 return 1; 294 } 295 296 static int 297 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name) 298 { 299 STRINGLIB_CHAR c; 300 301 name->ptr = self->ptr; 302 303 /* return everything until '.' or '[' */ 304 while (self->ptr < self->str.end) { 305 switch (c = *self->ptr++) { 306 case '[': 307 case '.': 308 /* backup so that we this character will be seen next time */ 309 self->ptr--; 310 break; 311 default: 312 continue; 313 } 314 break; 315 } 316 /* end of string is okay */ 317 name->end = self->ptr; 318 return 1; 319 } 320 321 static int 322 _FieldNameIterator_item(FieldNameIterator *self, SubString *name) 323 { 324 int bracket_seen = 0; 325 STRINGLIB_CHAR c; 326 327 name->ptr = self->ptr; 328 329 /* return everything until ']' */ 330 while (self->ptr < self->str.end) { 331 switch (c = *self->ptr++) { 332 case ']': 333 bracket_seen = 1; 334 break; 335 default: 336 continue; 337 } 338 break; 339 } 340 /* make sure we ended with a ']' */ 341 if (!bracket_seen) { 342 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); 343 return 0; 344 } 345 346 /* end of string is okay */ 347 /* don't include the ']' */ 348 name->end = self->ptr-1; 349 return 1; 350 } 351 352 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ 353 static int 354 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, 355 Py_ssize_t *name_idx, SubString *name) 356 { 357 /* check at end of input */ 358 if (self->ptr >= self->str.end) 359 return 1; 360 361 switch (*self->ptr++) { 362 case '.': 363 *is_attribute = 1; 364 if (_FieldNameIterator_attr(self, name) == 0) 365 return 0; 366 *name_idx = -1; 367 break; 368 case '[': 369 *is_attribute = 0; 370 if (_FieldNameIterator_item(self, name) == 0) 371 return 0; 372 *name_idx = get_integer(name); 373 if (*name_idx == -1 && PyErr_Occurred()) 374 return 0; 375 break; 376 default: 377 /* Invalid character follows ']' */ 378 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " 379 "follow ']' in format field specifier"); 380 return 0; 381 } 382 383 /* empty string is an error */ 384 if (name->ptr == name->end) { 385 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); 386 return 0; 387 } 388 389 return 2; 390 } 391 392 393 /* input: field_name 394 output: 'first' points to the part before the first '[' or '.' 395 'first_idx' is -1 if 'first' is not an integer, otherwise 396 it's the value of first converted to an integer 397 'rest' is an iterator to return the rest 398 */ 399 static int 400 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, 401 Py_ssize_t *first_idx, FieldNameIterator *rest, 402 AutoNumber *auto_number) 403 { 404 STRINGLIB_CHAR c; 405 STRINGLIB_CHAR *p = ptr; 406 STRINGLIB_CHAR *end = ptr + len; 407 int field_name_is_empty; 408 int using_numeric_index; 409 410 /* find the part up until the first '.' or '[' */ 411 while (p < end) { 412 switch (c = *p++) { 413 case '[': 414 case '.': 415 /* backup so that we this character is available to the 416 "rest" iterator */ 417 p--; 418 break; 419 default: 420 continue; 421 } 422 break; 423 } 424 425 /* set up the return values */ 426 SubString_init(first, ptr, p - ptr); 427 FieldNameIterator_init(rest, p, end - p); 428 429 /* see if "first" is an integer, in which case it's used as an index */ 430 *first_idx = get_integer(first); 431 if (*first_idx == -1 && PyErr_Occurred()) 432 return 0; 433 434 field_name_is_empty = first->ptr >= first->end; 435 436 /* If the field name is omitted or if we have a numeric index 437 specified, then we're doing numeric indexing into args. */ 438 using_numeric_index = field_name_is_empty || *first_idx != -1; 439 440 /* We always get here exactly one time for each field we're 441 processing. And we get here in field order (counting by left 442 braces). So this is the perfect place to handle automatic field 443 numbering if the field name is omitted. */ 444 445 /* Check if we need to do the auto-numbering. It's not needed if 446 we're called from string.Format routines, because it's handled 447 in that class by itself. */ 448 if (auto_number) { 449 /* Initialize our auto numbering state if this is the first 450 time we're either auto-numbering or manually numbering. */ 451 if (auto_number->an_state == ANS_INIT && using_numeric_index) 452 auto_number->an_state = field_name_is_empty ? 453 ANS_AUTO : ANS_MANUAL; 454 455 /* Make sure our state is consistent with what we're doing 456 this time through. Only check if we're using a numeric 457 index. */ 458 if (using_numeric_index) 459 if (autonumber_state_error(auto_number->an_state, 460 field_name_is_empty)) 461 return 0; 462 /* Zero length field means we want to do auto-numbering of the 463 fields. */ 464 if (field_name_is_empty) 465 *first_idx = (auto_number->an_field_number)++; 466 } 467 468 return 1; 469 } 470 471 472 /* 473 get_field_object returns the object inside {}, before the 474 format_spec. It handles getindex and getattr lookups and consumes 475 the entire input string. 476 */ 477 static PyObject * 478 get_field_object(SubString *input, PyObject *args, PyObject *kwargs, 479 AutoNumber *auto_number) 480 { 481 PyObject *obj = NULL; 482 int ok; 483 int is_attribute; 484 SubString name; 485 SubString first; 486 Py_ssize_t index; 487 FieldNameIterator rest; 488 489 if (!field_name_split(input->ptr, input->end - input->ptr, &first, 490 &index, &rest, auto_number)) { 491 goto error; 492 } 493 494 if (index == -1) { 495 /* look up in kwargs */ 496 PyObject *key = SubString_new_object(&first); 497 if (key == NULL) 498 goto error; 499 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { 500 PyErr_SetObject(PyExc_KeyError, key); 501 Py_DECREF(key); 502 goto error; 503 } 504 Py_DECREF(key); 505 Py_INCREF(obj); 506 } 507 else { 508 /* look up in args */ 509 obj = PySequence_GetItem(args, index); 510 if (obj == NULL) 511 goto error; 512 } 513 514 /* iterate over the rest of the field_name */ 515 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, 516 &name)) == 2) { 517 PyObject *tmp; 518 519 if (is_attribute) 520 /* getattr lookup "." */ 521 tmp = getattr(obj, &name); 522 else 523 /* getitem lookup "[]" */ 524 if (index == -1) 525 tmp = getitem_str(obj, &name); 526 else 527 if (PySequence_Check(obj)) 528 tmp = getitem_sequence(obj, index); 529 else 530 /* not a sequence */ 531 tmp = getitem_idx(obj, index); 532 if (tmp == NULL) 533 goto error; 534 535 /* assign to obj */ 536 Py_DECREF(obj); 537 obj = tmp; 538 } 539 /* end of iterator, this is the non-error case */ 540 if (ok == 1) 541 return obj; 542 error: 543 Py_XDECREF(obj); 544 return NULL; 545 } 546 547 /************************************************************************/ 548 /***************** Field rendering functions **************************/ 549 /************************************************************************/ 550 551 /* 552 render_field() is the main function in this section. It takes the 553 field object and field specification string generated by 554 get_field_and_spec, and renders the field into the output string. 555 556 render_field calls fieldobj.__format__(format_spec) method, and 557 appends to the output. 558 */ 559 static int 560 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) 561 { 562 int ok = 0; 563 PyObject *result = NULL; 564 PyObject *format_spec_object = NULL; 565 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL; 566 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ? 567 format_spec->ptr : NULL; 568 Py_ssize_t format_spec_len = format_spec->ptr ? 569 format_spec->end - format_spec->ptr : 0; 570 571 /* If we know the type exactly, skip the lookup of __format__ and just 572 call the formatter directly. */ 573 #if STRINGLIB_IS_UNICODE 574 if (PyUnicode_CheckExact(fieldobj)) 575 formatter = _PyUnicode_FormatAdvanced; 576 /* Unfortunately, there's a problem with checking for int, long, 577 and float here. If we're being included as unicode, their 578 formatters expect string format_spec args. For now, just skip 579 this optimization for unicode. This could be fixed, but it's a 580 hassle. */ 581 #else 582 if (PyString_CheckExact(fieldobj)) 583 formatter = _PyBytes_FormatAdvanced; 584 else if (PyInt_CheckExact(fieldobj)) 585 formatter =_PyInt_FormatAdvanced; 586 else if (PyLong_CheckExact(fieldobj)) 587 formatter =_PyLong_FormatAdvanced; 588 else if (PyFloat_CheckExact(fieldobj)) 589 formatter = _PyFloat_FormatAdvanced; 590 #endif 591 592 if (formatter) { 593 /* we know exactly which formatter will be called when __format__ is 594 looked up, so call it directly, instead. */ 595 result = formatter(fieldobj, format_spec_start, format_spec_len); 596 } 597 else { 598 /* We need to create an object out of the pointers we have, because 599 __format__ takes a string/unicode object for format_spec. */ 600 format_spec_object = STRINGLIB_NEW(format_spec_start, 601 format_spec_len); 602 if (format_spec_object == NULL) 603 goto done; 604 605 result = PyObject_Format(fieldobj, format_spec_object); 606 } 607 if (result == NULL) 608 goto done; 609 610 #if PY_VERSION_HEX >= 0x03000000 611 assert(PyUnicode_Check(result)); 612 #else 613 assert(PyString_Check(result) || PyUnicode_Check(result)); 614 615 /* Convert result to our type. We could be str, and result could 616 be unicode */ 617 { 618 PyObject *tmp = STRINGLIB_TOSTR(result); 619 if (tmp == NULL) 620 goto done; 621 Py_DECREF(result); 622 result = tmp; 623 } 624 #endif 625 626 ok = output_data(output, 627 STRINGLIB_STR(result), STRINGLIB_LEN(result)); 628 done: 629 Py_XDECREF(format_spec_object); 630 Py_XDECREF(result); 631 return ok; 632 } 633 634 static int 635 parse_field(SubString *str, SubString *field_name, SubString *format_spec, 636 STRINGLIB_CHAR *conversion) 637 { 638 /* Note this function works if the field name is zero length, 639 which is good. Zero length field names are handled later, in 640 field_name_split. */ 641 642 STRINGLIB_CHAR c = 0; 643 644 /* initialize these, as they may be empty */ 645 *conversion = '\0'; 646 SubString_init(format_spec, NULL, 0); 647 648 /* Search for the field name. it's terminated by the end of 649 the string, or a ':' or '!' */ 650 field_name->ptr = str->ptr; 651 while (str->ptr < str->end) { 652 switch (c = *(str->ptr++)) { 653 case ':': 654 case '!': 655 break; 656 default: 657 continue; 658 } 659 break; 660 } 661 662 if (c == '!' || c == ':') { 663 /* we have a format specifier and/or a conversion */ 664 /* don't include the last character */ 665 field_name->end = str->ptr-1; 666 667 /* the format specifier is the rest of the string */ 668 format_spec->ptr = str->ptr; 669 format_spec->end = str->end; 670 671 /* see if there's a conversion specifier */ 672 if (c == '!') { 673 /* there must be another character present */ 674 if (format_spec->ptr >= format_spec->end) { 675 PyErr_SetString(PyExc_ValueError, 676 "end of format while looking for conversion " 677 "specifier"); 678 return 0; 679 } 680 *conversion = *(format_spec->ptr++); 681 682 /* if there is another character, it must be a colon */ 683 if (format_spec->ptr < format_spec->end) { 684 c = *(format_spec->ptr++); 685 if (c != ':') { 686 PyErr_SetString(PyExc_ValueError, 687 "expected ':' after format specifier"); 688 return 0; 689 } 690 } 691 } 692 } 693 else 694 /* end of string, there's no format_spec or conversion */ 695 field_name->end = str->ptr; 696 697 return 1; 698 } 699 700 /************************************************************************/ 701 /******* Output string allocation and escape-to-markup processing ******/ 702 /************************************************************************/ 703 704 /* MarkupIterator breaks the string into pieces of either literal 705 text, or things inside {} that need to be marked up. it is 706 designed to make it easy to wrap a Python iterator around it, for 707 use with the Formatter class */ 708 709 typedef struct { 710 SubString str; 711 } MarkupIterator; 712 713 static int 714 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) 715 { 716 SubString_init(&self->str, ptr, len); 717 return 1; 718 } 719 720 /* returns 0 on error, 1 on non-error termination, and 2 if it got a 721 string (or something to be expanded) */ 722 static int 723 MarkupIterator_next(MarkupIterator *self, SubString *literal, 724 int *field_present, SubString *field_name, 725 SubString *format_spec, STRINGLIB_CHAR *conversion, 726 int *format_spec_needs_expanding) 727 { 728 int at_end; 729 STRINGLIB_CHAR c = 0; 730 STRINGLIB_CHAR *start; 731 int count; 732 Py_ssize_t len; 733 int markup_follows = 0; 734 735 /* initialize all of the output variables */ 736 SubString_init(literal, NULL, 0); 737 SubString_init(field_name, NULL, 0); 738 SubString_init(format_spec, NULL, 0); 739 *conversion = '\0'; 740 *format_spec_needs_expanding = 0; 741 *field_present = 0; 742 743 /* No more input, end of iterator. This is the normal exit 744 path. */ 745 if (self->str.ptr >= self->str.end) 746 return 1; 747 748 start = self->str.ptr; 749 750 /* First read any literal text. Read until the end of string, an 751 escaped '{' or '}', or an unescaped '{'. In order to never 752 allocate memory and so I can just pass pointers around, if 753 there's an escaped '{' or '}' then we'll return the literal 754 including the brace, but no format object. The next time 755 through, we'll return the rest of the literal, skipping past 756 the second consecutive brace. */ 757 while (self->str.ptr < self->str.end) { 758 switch (c = *(self->str.ptr++)) { 759 case '{': 760 case '}': 761 markup_follows = 1; 762 break; 763 default: 764 continue; 765 } 766 break; 767 } 768 769 at_end = self->str.ptr >= self->str.end; 770 len = self->str.ptr - start; 771 772 if ((c == '}') && (at_end || (c != *self->str.ptr))) { 773 PyErr_SetString(PyExc_ValueError, "Single '}' encountered " 774 "in format string"); 775 return 0; 776 } 777 if (at_end && c == '{') { 778 PyErr_SetString(PyExc_ValueError, "Single '{' encountered " 779 "in format string"); 780 return 0; 781 } 782 if (!at_end) { 783 if (c == *self->str.ptr) { 784 /* escaped } or {, skip it in the input. there is no 785 markup object following us, just this literal text */ 786 self->str.ptr++; 787 markup_follows = 0; 788 } 789 else 790 len--; 791 } 792 793 /* record the literal text */ 794 literal->ptr = start; 795 literal->end = start + len; 796 797 if (!markup_follows) 798 return 2; 799 800 /* this is markup, find the end of the string by counting nested 801 braces. note that this prohibits escaped braces, so that 802 format_specs cannot have braces in them. */ 803 *field_present = 1; 804 count = 1; 805 806 start = self->str.ptr; 807 808 /* we know we can't have a zero length string, so don't worry 809 about that case */ 810 while (self->str.ptr < self->str.end) { 811 switch (c = *(self->str.ptr++)) { 812 case '{': 813 /* the format spec needs to be recursively expanded. 814 this is an optimization, and not strictly needed */ 815 *format_spec_needs_expanding = 1; 816 count++; 817 break; 818 case '}': 819 count--; 820 if (count <= 0) { 821 /* we're done. parse and get out */ 822 SubString s; 823 824 SubString_init(&s, start, self->str.ptr - 1 - start); 825 if (parse_field(&s, field_name, format_spec, conversion) == 0) 826 return 0; 827 828 /* success */ 829 return 2; 830 } 831 break; 832 } 833 } 834 835 /* end of string while searching for matching '}' */ 836 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); 837 return 0; 838 } 839 840 841 /* do the !r or !s conversion on obj */ 842 static PyObject * 843 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) 844 { 845 /* XXX in pre-3.0, do we need to convert this to unicode, since it 846 might have returned a string? */ 847 switch (conversion) { 848 case 'r': 849 return PyObject_Repr(obj); 850 case 's': 851 return STRINGLIB_TOSTR(obj); 852 default: 853 if (conversion > 32 && conversion < 127) { 854 /* It's the ASCII subrange; casting to char is safe 855 (assuming the execution character set is an ASCII 856 superset). */ 857 PyErr_Format(PyExc_ValueError, 858 "Unknown conversion specifier %c", 859 (char)conversion); 860 } else 861 PyErr_Format(PyExc_ValueError, 862 "Unknown conversion specifier \\x%x", 863 (unsigned int)conversion); 864 return NULL; 865 } 866 } 867 868 /* given: 869 870 {field_name!conversion:format_spec} 871 872 compute the result and write it to output. 873 format_spec_needs_expanding is an optimization. if it's false, 874 just output the string directly, otherwise recursively expand the 875 format_spec string. 876 877 field_name is allowed to be zero length, in which case we 878 are doing auto field numbering. 879 */ 880 881 static int 882 output_markup(SubString *field_name, SubString *format_spec, 883 int format_spec_needs_expanding, STRINGLIB_CHAR conversion, 884 OutputString *output, PyObject *args, PyObject *kwargs, 885 int recursion_depth, AutoNumber *auto_number) 886 { 887 PyObject *tmp = NULL; 888 PyObject *fieldobj = NULL; 889 SubString expanded_format_spec; 890 SubString *actual_format_spec; 891 int result = 0; 892 893 /* convert field_name to an object */ 894 fieldobj = get_field_object(field_name, args, kwargs, auto_number); 895 if (fieldobj == NULL) 896 goto done; 897 898 if (conversion != '\0') { 899 tmp = do_conversion(fieldobj, conversion); 900 if (tmp == NULL) 901 goto done; 902 903 /* do the assignment, transferring ownership: fieldobj = tmp */ 904 Py_DECREF(fieldobj); 905 fieldobj = tmp; 906 tmp = NULL; 907 } 908 909 /* if needed, recurively compute the format_spec */ 910 if (format_spec_needs_expanding) { 911 tmp = build_string(format_spec, args, kwargs, recursion_depth-1, 912 auto_number); 913 if (tmp == NULL) 914 goto done; 915 916 /* note that in the case we're expanding the format string, 917 tmp must be kept around until after the call to 918 render_field. */ 919 SubString_init(&expanded_format_spec, 920 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp)); 921 actual_format_spec = &expanded_format_spec; 922 } 923 else 924 actual_format_spec = format_spec; 925 926 if (render_field(fieldobj, actual_format_spec, output) == 0) 927 goto done; 928 929 result = 1; 930 931 done: 932 Py_XDECREF(fieldobj); 933 Py_XDECREF(tmp); 934 935 return result; 936 } 937 938 /* 939 do_markup is the top-level loop for the format() method. It 940 searches through the format string for escapes to markup codes, and 941 calls other functions to move non-markup text to the output, 942 and to perform the markup to the output. 943 */ 944 static int 945 do_markup(SubString *input, PyObject *args, PyObject *kwargs, 946 OutputString *output, int recursion_depth, AutoNumber *auto_number) 947 { 948 MarkupIterator iter; 949 int format_spec_needs_expanding; 950 int result; 951 int field_present; 952 SubString literal; 953 SubString field_name; 954 SubString format_spec; 955 STRINGLIB_CHAR conversion; 956 957 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); 958 while ((result = MarkupIterator_next(&iter, &literal, &field_present, 959 &field_name, &format_spec, 960 &conversion, 961 &format_spec_needs_expanding)) == 2) { 962 if (!output_data(output, literal.ptr, literal.end - literal.ptr)) 963 return 0; 964 if (field_present) 965 if (!output_markup(&field_name, &format_spec, 966 format_spec_needs_expanding, conversion, output, 967 args, kwargs, recursion_depth, auto_number)) 968 return 0; 969 } 970 return result; 971 } 972 973 974 /* 975 build_string allocates the output string and then 976 calls do_markup to do the heavy lifting. 977 */ 978 static PyObject * 979 build_string(SubString *input, PyObject *args, PyObject *kwargs, 980 int recursion_depth, AutoNumber *auto_number) 981 { 982 OutputString output; 983 PyObject *result = NULL; 984 Py_ssize_t count; 985 986 output.obj = NULL; /* needed so cleanup code always works */ 987 988 /* check the recursion level */ 989 if (recursion_depth <= 0) { 990 PyErr_SetString(PyExc_ValueError, 991 "Max string recursion exceeded"); 992 goto done; 993 } 994 995 /* initial size is the length of the format string, plus the size 996 increment. seems like a reasonable default */ 997 if (!output_initialize(&output, 998 input->end - input->ptr + 999 INITIAL_SIZE_INCREMENT)) 1000 goto done; 1001 1002 if (!do_markup(input, args, kwargs, &output, recursion_depth, 1003 auto_number)) { 1004 goto done; 1005 } 1006 1007 count = output.ptr - STRINGLIB_STR(output.obj); 1008 if (STRINGLIB_RESIZE(&output.obj, count) < 0) { 1009 goto done; 1010 } 1011 1012 /* transfer ownership to result */ 1013 result = output.obj; 1014 output.obj = NULL; 1015 1016 done: 1017 Py_XDECREF(output.obj); 1018 return result; 1019 } 1020 1021 /************************************************************************/ 1022 /*********** main routine ***********************************************/ 1023 /************************************************************************/ 1024 1025 /* this is the main entry point */ 1026 static PyObject * 1027 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) 1028 { 1029 SubString input; 1030 1031 /* PEP 3101 says only 2 levels, so that 1032 "{0:{1}}".format('abc', 's') # works 1033 "{0:{1:{2}}}".format('abc', 's', '') # fails 1034 */ 1035 int recursion_depth = 2; 1036 1037 AutoNumber auto_number; 1038 1039 AutoNumber_Init(&auto_number); 1040 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self)); 1041 return build_string(&input, args, kwargs, recursion_depth, &auto_number); 1042 } 1043 1044 1045 1046 /************************************************************************/ 1047 /*********** formatteriterator ******************************************/ 1048 /************************************************************************/ 1049 1050 /* This is used to implement string.Formatter.vparse(). It exists so 1051 Formatter can share code with the built in unicode.format() method. 1052 It's really just a wrapper around MarkupIterator that is callable 1053 from Python. */ 1054 1055 typedef struct { 1056 PyObject_HEAD 1057 1058 STRINGLIB_OBJECT *str; 1059 1060 MarkupIterator it_markup; 1061 } formatteriterobject; 1062 1063 static void 1064 formatteriter_dealloc(formatteriterobject *it) 1065 { 1066 Py_XDECREF(it->str); 1067 PyObject_FREE(it); 1068 } 1069 1070 /* returns a tuple: 1071 (literal, field_name, format_spec, conversion) 1072 1073 literal is any literal text to output. might be zero length 1074 field_name is the string before the ':'. might be None 1075 format_spec is the string after the ':'. mibht be None 1076 conversion is either None, or the string after the '!' 1077 */ 1078 static PyObject * 1079 formatteriter_next(formatteriterobject *it) 1080 { 1081 SubString literal; 1082 SubString field_name; 1083 SubString format_spec; 1084 STRINGLIB_CHAR conversion; 1085 int format_spec_needs_expanding; 1086 int field_present; 1087 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, 1088 &field_name, &format_spec, &conversion, 1089 &format_spec_needs_expanding); 1090 1091 /* all of the SubString objects point into it->str, so no 1092 memory management needs to be done on them */ 1093 assert(0 <= result && result <= 2); 1094 if (result == 0 || result == 1) 1095 /* if 0, error has already been set, if 1, iterator is empty */ 1096 return NULL; 1097 else { 1098 PyObject *literal_str = NULL; 1099 PyObject *field_name_str = NULL; 1100 PyObject *format_spec_str = NULL; 1101 PyObject *conversion_str = NULL; 1102 PyObject *tuple = NULL; 1103 1104 literal_str = SubString_new_object(&literal); 1105 if (literal_str == NULL) 1106 goto done; 1107 1108 field_name_str = SubString_new_object(&field_name); 1109 if (field_name_str == NULL) 1110 goto done; 1111 1112 /* if field_name is non-zero length, return a string for 1113 format_spec (even if zero length), else return None */ 1114 format_spec_str = (field_present ? 1115 SubString_new_object_or_empty : 1116 SubString_new_object)(&format_spec); 1117 if (format_spec_str == NULL) 1118 goto done; 1119 1120 /* if the conversion is not specified, return a None, 1121 otherwise create a one length string with the conversion 1122 character */ 1123 if (conversion == '\0') { 1124 conversion_str = Py_None; 1125 Py_INCREF(conversion_str); 1126 } 1127 else 1128 conversion_str = STRINGLIB_NEW(&conversion, 1); 1129 if (conversion_str == NULL) 1130 goto done; 1131 1132 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, 1133 conversion_str); 1134 done: 1135 Py_XDECREF(literal_str); 1136 Py_XDECREF(field_name_str); 1137 Py_XDECREF(format_spec_str); 1138 Py_XDECREF(conversion_str); 1139 return tuple; 1140 } 1141 } 1142 1143 static PyMethodDef formatteriter_methods[] = { 1144 {NULL, NULL} /* sentinel */ 1145 }; 1146 1147 static PyTypeObject PyFormatterIter_Type = { 1148 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1149 "formatteriterator", /* tp_name */ 1150 sizeof(formatteriterobject), /* tp_basicsize */ 1151 0, /* tp_itemsize */ 1152 /* methods */ 1153 (destructor)formatteriter_dealloc, /* tp_dealloc */ 1154 0, /* tp_print */ 1155 0, /* tp_getattr */ 1156 0, /* tp_setattr */ 1157 0, /* tp_compare */ 1158 0, /* tp_repr */ 1159 0, /* tp_as_number */ 1160 0, /* tp_as_sequence */ 1161 0, /* tp_as_mapping */ 1162 0, /* tp_hash */ 1163 0, /* tp_call */ 1164 0, /* tp_str */ 1165 PyObject_GenericGetAttr, /* tp_getattro */ 1166 0, /* tp_setattro */ 1167 0, /* tp_as_buffer */ 1168 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1169 0, /* tp_doc */ 1170 0, /* tp_traverse */ 1171 0, /* tp_clear */ 1172 0, /* tp_richcompare */ 1173 0, /* tp_weaklistoffset */ 1174 PyObject_SelfIter, /* tp_iter */ 1175 (iternextfunc)formatteriter_next, /* tp_iternext */ 1176 formatteriter_methods, /* tp_methods */ 1177 0, 1178 }; 1179 1180 /* unicode_formatter_parser is used to implement 1181 string.Formatter.vformat. it parses a string and returns tuples 1182 describing the parsed elements. It's a wrapper around 1183 stringlib/string_format.h's MarkupIterator */ 1184 static PyObject * 1185 formatter_parser(STRINGLIB_OBJECT *self) 1186 { 1187 formatteriterobject *it; 1188 1189 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); 1190 if (it == NULL) 1191 return NULL; 1192 1193 /* take ownership, give the object to the iterator */ 1194 Py_INCREF(self); 1195 it->str = self; 1196 1197 /* initialize the contained MarkupIterator */ 1198 MarkupIterator_init(&it->it_markup, 1199 STRINGLIB_STR(self), 1200 STRINGLIB_LEN(self)); 1201 1202 return (PyObject *)it; 1203 } 1204 1205 1206 /************************************************************************/ 1207 /*********** fieldnameiterator ******************************************/ 1208 /************************************************************************/ 1209 1210 1211 /* This is used to implement string.Formatter.vparse(). It parses the 1212 field name into attribute and item values. It's a Python-callable 1213 wrapper around FieldNameIterator */ 1214 1215 typedef struct { 1216 PyObject_HEAD 1217 1218 STRINGLIB_OBJECT *str; 1219 1220 FieldNameIterator it_field; 1221 } fieldnameiterobject; 1222 1223 static void 1224 fieldnameiter_dealloc(fieldnameiterobject *it) 1225 { 1226 Py_XDECREF(it->str); 1227 PyObject_FREE(it); 1228 } 1229 1230 /* returns a tuple: 1231 (is_attr, value) 1232 is_attr is true if we used attribute syntax (e.g., '.foo') 1233 false if we used index syntax (e.g., '[foo]') 1234 value is an integer or string 1235 */ 1236 static PyObject * 1237 fieldnameiter_next(fieldnameiterobject *it) 1238 { 1239 int result; 1240 int is_attr; 1241 Py_ssize_t idx; 1242 SubString name; 1243 1244 result = FieldNameIterator_next(&it->it_field, &is_attr, 1245 &idx, &name); 1246 if (result == 0 || result == 1) 1247 /* if 0, error has already been set, if 1, iterator is empty */ 1248 return NULL; 1249 else { 1250 PyObject* result = NULL; 1251 PyObject* is_attr_obj = NULL; 1252 PyObject* obj = NULL; 1253 1254 is_attr_obj = PyBool_FromLong(is_attr); 1255 if (is_attr_obj == NULL) 1256 goto done; 1257 1258 /* either an integer or a string */ 1259 if (idx != -1) 1260 obj = PyLong_FromSsize_t(idx); 1261 else 1262 obj = SubString_new_object(&name); 1263 if (obj == NULL) 1264 goto done; 1265 1266 /* return a tuple of values */ 1267 result = PyTuple_Pack(2, is_attr_obj, obj); 1268 1269 done: 1270 Py_XDECREF(is_attr_obj); 1271 Py_XDECREF(obj); 1272 return result; 1273 } 1274 } 1275 1276 static PyMethodDef fieldnameiter_methods[] = { 1277 {NULL, NULL} /* sentinel */ 1278 }; 1279 1280 static PyTypeObject PyFieldNameIter_Type = { 1281 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1282 "fieldnameiterator", /* tp_name */ 1283 sizeof(fieldnameiterobject), /* tp_basicsize */ 1284 0, /* tp_itemsize */ 1285 /* methods */ 1286 (destructor)fieldnameiter_dealloc, /* tp_dealloc */ 1287 0, /* tp_print */ 1288 0, /* tp_getattr */ 1289 0, /* tp_setattr */ 1290 0, /* tp_compare */ 1291 0, /* tp_repr */ 1292 0, /* tp_as_number */ 1293 0, /* tp_as_sequence */ 1294 0, /* tp_as_mapping */ 1295 0, /* tp_hash */ 1296 0, /* tp_call */ 1297 0, /* tp_str */ 1298 PyObject_GenericGetAttr, /* tp_getattro */ 1299 0, /* tp_setattro */ 1300 0, /* tp_as_buffer */ 1301 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1302 0, /* tp_doc */ 1303 0, /* tp_traverse */ 1304 0, /* tp_clear */ 1305 0, /* tp_richcompare */ 1306 0, /* tp_weaklistoffset */ 1307 PyObject_SelfIter, /* tp_iter */ 1308 (iternextfunc)fieldnameiter_next, /* tp_iternext */ 1309 fieldnameiter_methods, /* tp_methods */ 1310 0}; 1311 1312 /* unicode_formatter_field_name_split is used to implement 1313 string.Formatter.vformat. it takes an PEP 3101 "field name", and 1314 returns a tuple of (first, rest): "first", the part before the 1315 first '.' or '['; and "rest", an iterator for the rest of the field 1316 name. it's a wrapper around stringlib/string_format.h's 1317 field_name_split. The iterator it returns is a 1318 FieldNameIterator */ 1319 static PyObject * 1320 formatter_field_name_split(STRINGLIB_OBJECT *self) 1321 { 1322 SubString first; 1323 Py_ssize_t first_idx; 1324 fieldnameiterobject *it; 1325 1326 PyObject *first_obj = NULL; 1327 PyObject *result = NULL; 1328 1329 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); 1330 if (it == NULL) 1331 return NULL; 1332 1333 /* take ownership, give the object to the iterator. this is 1334 just to keep the field_name alive */ 1335 Py_INCREF(self); 1336 it->str = self; 1337 1338 /* Pass in auto_number = NULL. We'll return an empty string for 1339 first_obj in that case. */ 1340 if (!field_name_split(STRINGLIB_STR(self), 1341 STRINGLIB_LEN(self), 1342 &first, &first_idx, &it->it_field, NULL)) 1343 goto done; 1344 1345 /* first becomes an integer, if possible; else a string */ 1346 if (first_idx != -1) 1347 first_obj = PyLong_FromSsize_t(first_idx); 1348 else 1349 /* convert "first" into a string object */ 1350 first_obj = SubString_new_object(&first); 1351 if (first_obj == NULL) 1352 goto done; 1353 1354 /* return a tuple of values */ 1355 result = PyTuple_Pack(2, first_obj, it); 1356 1357 done: 1358 Py_XDECREF(it); 1359 Py_XDECREF(first_obj); 1360 return result; 1361 } 1362