1 /* 2 string_format.h -- implementation of string.format(). 3 4 It uses the Objects/stringlib conventions, so that it can be 5 compiled for both unicode and string objects. 6 */ 7 8 9 /* Defines for Python 2.6 compatibility */ 10 #if PY_VERSION_HEX < 0x03000000 11 #define PyLong_FromSsize_t _PyLong_FromSsize_t 12 #endif 13 14 /* Defines for more efficiently reallocating the string buffer */ 15 #define INITIAL_SIZE_INCREMENT 100 16 #define SIZE_MULTIPLIER 2 17 #define MAX_SIZE_INCREMENT 3200 18 19 20 /************************************************************************/ 21 /*********** Global data structures and forward declarations *********/ 22 /************************************************************************/ 23 24 /* 25 A SubString consists of the characters between two string or 26 unicode pointers. 27 */ 28 typedef struct { 29 STRINGLIB_CHAR *ptr; 30 STRINGLIB_CHAR *end; 31 } SubString; 32 33 34 typedef enum { 35 ANS_INIT, 36 ANS_AUTO, 37 ANS_MANUAL 38 } AutoNumberState; /* Keep track if we're auto-numbering fields */ 39 40 /* Keeps track of our auto-numbering state, and which number field we're on */ 41 typedef struct { 42 AutoNumberState an_state; 43 int an_field_number; 44 } AutoNumber; 45 46 47 /* forward declaration for recursion */ 48 static PyObject * 49 build_string(SubString *input, PyObject *args, PyObject *kwargs, 50 int recursion_depth, AutoNumber *auto_number); 51 52 53 54 /************************************************************************/ 55 /************************** Utility functions ************************/ 56 /************************************************************************/ 57 58 static void 59 AutoNumber_Init(AutoNumber *auto_number) 60 { 61 auto_number->an_state = ANS_INIT; 62 auto_number->an_field_number = 0; 63 } 64 65 /* fill in a SubString from a pointer and length */ 66 Py_LOCAL_INLINE(void) 67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) 68 { 69 str->ptr = p; 70 if (p == NULL) 71 str->end = NULL; 72 else 73 str->end = str->ptr + len; 74 } 75 76 /* return a new string. if str->ptr is NULL, return None */ 77 Py_LOCAL_INLINE(PyObject *) 78 SubString_new_object(SubString *str) 79 { 80 if (str->ptr == NULL) { 81 Py_INCREF(Py_None); 82 return Py_None; 83 } 84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr); 85 } 86 87 /* return a new string. if str->ptr is NULL, return None */ 88 Py_LOCAL_INLINE(PyObject *) 89 SubString_new_object_or_empty(SubString *str) 90 { 91 if (str->ptr == NULL) { 92 return STRINGLIB_NEW(NULL, 0); 93 } 94 return STRINGLIB_NEW(str->ptr, str->end - str->ptr); 95 } 96 97 /* Return 1 if an error has been detected switching between automatic 98 field numbering and manual field specification, else return 0. Set 99 ValueError on error. */ 100 static int 101 autonumber_state_error(AutoNumberState state, int field_name_is_empty) 102 { 103 if (state == ANS_MANUAL) { 104 if (field_name_is_empty) { 105 PyErr_SetString(PyExc_ValueError, "cannot switch from " 106 "manual field specification to " 107 "automatic field numbering"); 108 return 1; 109 } 110 } 111 else { 112 if (!field_name_is_empty) { 113 PyErr_SetString(PyExc_ValueError, "cannot switch from " 114 "automatic field numbering to " 115 "manual field specification"); 116 return 1; 117 } 118 } 119 return 0; 120 } 121 122 123 /************************************************************************/ 124 /*********** Output string management functions ****************/ 125 /************************************************************************/ 126 127 typedef struct { 128 STRINGLIB_CHAR *ptr; 129 STRINGLIB_CHAR *end; 130 PyObject *obj; 131 Py_ssize_t size_increment; 132 } OutputString; 133 134 /* initialize an OutputString object, reserving size characters */ 135 static int 136 output_initialize(OutputString *output, Py_ssize_t size) 137 { 138 output->obj = STRINGLIB_NEW(NULL, size); 139 if (output->obj == NULL) 140 return 0; 141 142 output->ptr = STRINGLIB_STR(output->obj); 143 output->end = STRINGLIB_LEN(output->obj) + output->ptr; 144 output->size_increment = INITIAL_SIZE_INCREMENT; 145 146 return 1; 147 } 148 149 /* 150 output_extend reallocates the output string buffer. 151 It returns a status: 0 for a failed reallocation, 152 1 for success. 153 */ 154 155 static int 156 output_extend(OutputString *output, Py_ssize_t count) 157 { 158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj); 159 Py_ssize_t curlen = output->ptr - startptr; 160 Py_ssize_t maxlen = curlen + count + output->size_increment; 161 162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0) 163 return 0; 164 startptr = STRINGLIB_STR(output->obj); 165 output->ptr = startptr + curlen; 166 output->end = startptr + maxlen; 167 if (output->size_increment < MAX_SIZE_INCREMENT) 168 output->size_increment *= SIZE_MULTIPLIER; 169 return 1; 170 } 171 172 /* 173 output_data dumps characters into our output string 174 buffer. 175 176 In some cases, it has to reallocate the string. 177 178 It returns a status: 0 for a failed reallocation, 179 1 for success. 180 */ 181 static int 182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count) 183 { 184 if ((count > output->end - output->ptr) && !output_extend(output, count)) 185 return 0; 186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR)); 187 output->ptr += count; 188 return 1; 189 } 190 191 /************************************************************************/ 192 /*********** Format string parsing -- integers and identifiers *********/ 193 /************************************************************************/ 194 195 static Py_ssize_t 196 get_integer(const SubString *str) 197 { 198 Py_ssize_t accumulator = 0; 199 Py_ssize_t digitval; 200 Py_ssize_t oldaccumulator; 201 STRINGLIB_CHAR *p; 202 203 /* empty string is an error */ 204 if (str->ptr >= str->end) 205 return -1; 206 207 for (p = str->ptr; p < str->end; p++) { 208 digitval = STRINGLIB_TODECIMAL(*p); 209 if (digitval < 0) 210 return -1; 211 /* 212 This trick was copied from old Unicode format code. It's cute, 213 but would really suck on an old machine with a slow divide 214 implementation. Fortunately, in the normal case we do not 215 expect too many digits. 216 */ 217 oldaccumulator = accumulator; 218 accumulator *= 10; 219 if ((accumulator+10)/10 != oldaccumulator+1) { 220 PyErr_Format(PyExc_ValueError, 221 "Too many decimal digits in format string"); 222 return -1; 223 } 224 accumulator += digitval; 225 } 226 return accumulator; 227 } 228 229 /************************************************************************/ 230 /******** Functions to get field objects and specification strings ******/ 231 /************************************************************************/ 232 233 /* do the equivalent of obj.name */ 234 static PyObject * 235 getattr(PyObject *obj, SubString *name) 236 { 237 PyObject *newobj; 238 PyObject *str = SubString_new_object(name); 239 if (str == NULL) 240 return NULL; 241 newobj = PyObject_GetAttr(obj, str); 242 Py_DECREF(str); 243 return newobj; 244 } 245 246 /* do the equivalent of obj[idx], where obj is a sequence */ 247 static PyObject * 248 getitem_sequence(PyObject *obj, Py_ssize_t idx) 249 { 250 return PySequence_GetItem(obj, idx); 251 } 252 253 /* do the equivalent of obj[idx], where obj is not a sequence */ 254 static PyObject * 255 getitem_idx(PyObject *obj, Py_ssize_t idx) 256 { 257 PyObject *newobj; 258 PyObject *idx_obj = PyLong_FromSsize_t(idx); 259 if (idx_obj == NULL) 260 return NULL; 261 newobj = PyObject_GetItem(obj, idx_obj); 262 Py_DECREF(idx_obj); 263 return newobj; 264 } 265 266 /* do the equivalent of obj[name] */ 267 static PyObject * 268 getitem_str(PyObject *obj, SubString *name) 269 { 270 PyObject *newobj; 271 PyObject *str = SubString_new_object(name); 272 if (str == NULL) 273 return NULL; 274 newobj = PyObject_GetItem(obj, str); 275 Py_DECREF(str); 276 return newobj; 277 } 278 279 typedef struct { 280 /* the entire string we're parsing. we assume that someone else 281 is managing its lifetime, and that it will exist for the 282 lifetime of the iterator. can be empty */ 283 SubString str; 284 285 /* pointer to where we are inside field_name */ 286 STRINGLIB_CHAR *ptr; 287 } FieldNameIterator; 288 289 290 static int 291 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr, 292 Py_ssize_t len) 293 { 294 SubString_init(&self->str, ptr, len); 295 self->ptr = self->str.ptr; 296 return 1; 297 } 298 299 static int 300 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name) 301 { 302 STRINGLIB_CHAR c; 303 304 name->ptr = self->ptr; 305 306 /* return everything until '.' or '[' */ 307 while (self->ptr < self->str.end) { 308 switch (c = *self->ptr++) { 309 case '[': 310 case '.': 311 /* backup so that we this character will be seen next time */ 312 self->ptr--; 313 break; 314 default: 315 continue; 316 } 317 break; 318 } 319 /* end of string is okay */ 320 name->end = self->ptr; 321 return 1; 322 } 323 324 static int 325 _FieldNameIterator_item(FieldNameIterator *self, SubString *name) 326 { 327 int bracket_seen = 0; 328 STRINGLIB_CHAR c; 329 330 name->ptr = self->ptr; 331 332 /* return everything until ']' */ 333 while (self->ptr < self->str.end) { 334 switch (c = *self->ptr++) { 335 case ']': 336 bracket_seen = 1; 337 break; 338 default: 339 continue; 340 } 341 break; 342 } 343 /* make sure we ended with a ']' */ 344 if (!bracket_seen) { 345 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); 346 return 0; 347 } 348 349 /* end of string is okay */ 350 /* don't include the ']' */ 351 name->end = self->ptr-1; 352 return 1; 353 } 354 355 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ 356 static int 357 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, 358 Py_ssize_t *name_idx, SubString *name) 359 { 360 /* check at end of input */ 361 if (self->ptr >= self->str.end) 362 return 1; 363 364 switch (*self->ptr++) { 365 case '.': 366 *is_attribute = 1; 367 if (_FieldNameIterator_attr(self, name) == 0) 368 return 0; 369 *name_idx = -1; 370 break; 371 case '[': 372 *is_attribute = 0; 373 if (_FieldNameIterator_item(self, name) == 0) 374 return 0; 375 *name_idx = get_integer(name); 376 if (*name_idx == -1 && PyErr_Occurred()) 377 return 0; 378 break; 379 default: 380 /* Invalid character follows ']' */ 381 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " 382 "follow ']' in format field specifier"); 383 return 0; 384 } 385 386 /* empty string is an error */ 387 if (name->ptr == name->end) { 388 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); 389 return 0; 390 } 391 392 return 2; 393 } 394 395 396 /* input: field_name 397 output: 'first' points to the part before the first '[' or '.' 398 'first_idx' is -1 if 'first' is not an integer, otherwise 399 it's the value of first converted to an integer 400 'rest' is an iterator to return the rest 401 */ 402 static int 403 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, 404 Py_ssize_t *first_idx, FieldNameIterator *rest, 405 AutoNumber *auto_number) 406 { 407 STRINGLIB_CHAR c; 408 STRINGLIB_CHAR *p = ptr; 409 STRINGLIB_CHAR *end = ptr + len; 410 int field_name_is_empty; 411 int using_numeric_index; 412 413 /* find the part up until the first '.' or '[' */ 414 while (p < end) { 415 switch (c = *p++) { 416 case '[': 417 case '.': 418 /* backup so that we this character is available to the 419 "rest" iterator */ 420 p--; 421 break; 422 default: 423 continue; 424 } 425 break; 426 } 427 428 /* set up the return values */ 429 SubString_init(first, ptr, p - ptr); 430 FieldNameIterator_init(rest, p, end - p); 431 432 /* see if "first" is an integer, in which case it's used as an index */ 433 *first_idx = get_integer(first); 434 if (*first_idx == -1 && PyErr_Occurred()) 435 return 0; 436 437 field_name_is_empty = first->ptr >= first->end; 438 439 /* If the field name is omitted or if we have a numeric index 440 specified, then we're doing numeric indexing into args. */ 441 using_numeric_index = field_name_is_empty || *first_idx != -1; 442 443 /* We always get here exactly one time for each field we're 444 processing. And we get here in field order (counting by left 445 braces). So this is the perfect place to handle automatic field 446 numbering if the field name is omitted. */ 447 448 /* Check if we need to do the auto-numbering. It's not needed if 449 we're called from string.Format routines, because it's handled 450 in that class by itself. */ 451 if (auto_number) { 452 /* Initialize our auto numbering state if this is the first 453 time we're either auto-numbering or manually numbering. */ 454 if (auto_number->an_state == ANS_INIT && using_numeric_index) 455 auto_number->an_state = field_name_is_empty ? 456 ANS_AUTO : ANS_MANUAL; 457 458 /* Make sure our state is consistent with what we're doing 459 this time through. Only check if we're using a numeric 460 index. */ 461 if (using_numeric_index) 462 if (autonumber_state_error(auto_number->an_state, 463 field_name_is_empty)) 464 return 0; 465 /* Zero length field means we want to do auto-numbering of the 466 fields. */ 467 if (field_name_is_empty) 468 *first_idx = (auto_number->an_field_number)++; 469 } 470 471 return 1; 472 } 473 474 475 /* 476 get_field_object returns the object inside {}, before the 477 format_spec. It handles getindex and getattr lookups and consumes 478 the entire input string. 479 */ 480 static PyObject * 481 get_field_object(SubString *input, PyObject *args, PyObject *kwargs, 482 AutoNumber *auto_number) 483 { 484 PyObject *obj = NULL; 485 int ok; 486 int is_attribute; 487 SubString name; 488 SubString first; 489 Py_ssize_t index; 490 FieldNameIterator rest; 491 492 if (!field_name_split(input->ptr, input->end - input->ptr, &first, 493 &index, &rest, auto_number)) { 494 goto error; 495 } 496 497 if (index == -1) { 498 /* look up in kwargs */ 499 PyObject *key = SubString_new_object(&first); 500 if (key == NULL) 501 goto error; 502 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { 503 PyErr_SetObject(PyExc_KeyError, key); 504 Py_DECREF(key); 505 goto error; 506 } 507 Py_DECREF(key); 508 Py_INCREF(obj); 509 } 510 else { 511 /* look up in args */ 512 obj = PySequence_GetItem(args, index); 513 if (obj == NULL) 514 goto error; 515 } 516 517 /* iterate over the rest of the field_name */ 518 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, 519 &name)) == 2) { 520 PyObject *tmp; 521 522 if (is_attribute) 523 /* getattr lookup "." */ 524 tmp = getattr(obj, &name); 525 else 526 /* getitem lookup "[]" */ 527 if (index == -1) 528 tmp = getitem_str(obj, &name); 529 else 530 if (PySequence_Check(obj)) 531 tmp = getitem_sequence(obj, index); 532 else 533 /* not a sequence */ 534 tmp = getitem_idx(obj, index); 535 if (tmp == NULL) 536 goto error; 537 538 /* assign to obj */ 539 Py_DECREF(obj); 540 obj = tmp; 541 } 542 /* end of iterator, this is the non-error case */ 543 if (ok == 1) 544 return obj; 545 error: 546 Py_XDECREF(obj); 547 return NULL; 548 } 549 550 /************************************************************************/ 551 /***************** Field rendering functions **************************/ 552 /************************************************************************/ 553 554 /* 555 render_field() is the main function in this section. It takes the 556 field object and field specification string generated by 557 get_field_and_spec, and renders the field into the output string. 558 559 render_field calls fieldobj.__format__(format_spec) method, and 560 appends to the output. 561 */ 562 static int 563 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) 564 { 565 int ok = 0; 566 PyObject *result = NULL; 567 PyObject *format_spec_object = NULL; 568 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL; 569 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ? 570 format_spec->ptr : NULL; 571 Py_ssize_t format_spec_len = format_spec->ptr ? 572 format_spec->end - format_spec->ptr : 0; 573 574 /* If we know the type exactly, skip the lookup of __format__ and just 575 call the formatter directly. */ 576 #if STRINGLIB_IS_UNICODE 577 if (PyUnicode_CheckExact(fieldobj)) 578 formatter = _PyUnicode_FormatAdvanced; 579 /* Unfortunately, there's a problem with checking for int, long, 580 and float here. If we're being included as unicode, their 581 formatters expect string format_spec args. For now, just skip 582 this optimization for unicode. This could be fixed, but it's a 583 hassle. */ 584 #else 585 if (PyString_CheckExact(fieldobj)) 586 formatter = _PyBytes_FormatAdvanced; 587 else if (PyInt_CheckExact(fieldobj)) 588 formatter =_PyInt_FormatAdvanced; 589 else if (PyLong_CheckExact(fieldobj)) 590 formatter =_PyLong_FormatAdvanced; 591 else if (PyFloat_CheckExact(fieldobj)) 592 formatter = _PyFloat_FormatAdvanced; 593 #endif 594 595 if (formatter) { 596 /* we know exactly which formatter will be called when __format__ is 597 looked up, so call it directly, instead. */ 598 result = formatter(fieldobj, format_spec_start, format_spec_len); 599 } 600 else { 601 /* We need to create an object out of the pointers we have, because 602 __format__ takes a string/unicode object for format_spec. */ 603 format_spec_object = STRINGLIB_NEW(format_spec_start, 604 format_spec_len); 605 if (format_spec_object == NULL) 606 goto done; 607 608 result = PyObject_Format(fieldobj, format_spec_object); 609 } 610 if (result == NULL) 611 goto done; 612 613 #if PY_VERSION_HEX >= 0x03000000 614 assert(PyUnicode_Check(result)); 615 #else 616 assert(PyString_Check(result) || PyUnicode_Check(result)); 617 618 /* Convert result to our type. We could be str, and result could 619 be unicode */ 620 { 621 PyObject *tmp = STRINGLIB_TOSTR(result); 622 if (tmp == NULL) 623 goto done; 624 Py_DECREF(result); 625 result = tmp; 626 } 627 #endif 628 629 ok = output_data(output, 630 STRINGLIB_STR(result), STRINGLIB_LEN(result)); 631 done: 632 Py_XDECREF(format_spec_object); 633 Py_XDECREF(result); 634 return ok; 635 } 636 637 static int 638 parse_field(SubString *str, SubString *field_name, SubString *format_spec, 639 STRINGLIB_CHAR *conversion) 640 { 641 /* Note this function works if the field name is zero length, 642 which is good. Zero length field names are handled later, in 643 field_name_split. */ 644 645 STRINGLIB_CHAR c = 0; 646 647 /* initialize these, as they may be empty */ 648 *conversion = '\0'; 649 SubString_init(format_spec, NULL, 0); 650 651 /* Search for the field name. it's terminated by the end of 652 the string, or a ':' or '!' */ 653 field_name->ptr = str->ptr; 654 while (str->ptr < str->end) { 655 switch (c = *(str->ptr++)) { 656 case ':': 657 case '!': 658 break; 659 default: 660 continue; 661 } 662 break; 663 } 664 665 if (c == '!' || c == ':') { 666 /* we have a format specifier and/or a conversion */ 667 /* don't include the last character */ 668 field_name->end = str->ptr-1; 669 670 /* the format specifier is the rest of the string */ 671 format_spec->ptr = str->ptr; 672 format_spec->end = str->end; 673 674 /* see if there's a conversion specifier */ 675 if (c == '!') { 676 /* there must be another character present */ 677 if (format_spec->ptr >= format_spec->end) { 678 PyErr_SetString(PyExc_ValueError, 679 "end of format while looking for conversion " 680 "specifier"); 681 return 0; 682 } 683 *conversion = *(format_spec->ptr++); 684 685 /* if there is another character, it must be a colon */ 686 if (format_spec->ptr < format_spec->end) { 687 c = *(format_spec->ptr++); 688 if (c != ':') { 689 PyErr_SetString(PyExc_ValueError, 690 "expected ':' after format specifier"); 691 return 0; 692 } 693 } 694 } 695 } 696 else 697 /* end of string, there's no format_spec or conversion */ 698 field_name->end = str->ptr; 699 700 return 1; 701 } 702 703 /************************************************************************/ 704 /******* Output string allocation and escape-to-markup processing ******/ 705 /************************************************************************/ 706 707 /* MarkupIterator breaks the string into pieces of either literal 708 text, or things inside {} that need to be marked up. it is 709 designed to make it easy to wrap a Python iterator around it, for 710 use with the Formatter class */ 711 712 typedef struct { 713 SubString str; 714 } MarkupIterator; 715 716 static int 717 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) 718 { 719 SubString_init(&self->str, ptr, len); 720 return 1; 721 } 722 723 /* returns 0 on error, 1 on non-error termination, and 2 if it got a 724 string (or something to be expanded) */ 725 static int 726 MarkupIterator_next(MarkupIterator *self, SubString *literal, 727 int *field_present, SubString *field_name, 728 SubString *format_spec, STRINGLIB_CHAR *conversion, 729 int *format_spec_needs_expanding) 730 { 731 int at_end; 732 STRINGLIB_CHAR c = 0; 733 STRINGLIB_CHAR *start; 734 int count; 735 Py_ssize_t len; 736 int markup_follows = 0; 737 738 /* initialize all of the output variables */ 739 SubString_init(literal, NULL, 0); 740 SubString_init(field_name, NULL, 0); 741 SubString_init(format_spec, NULL, 0); 742 *conversion = '\0'; 743 *format_spec_needs_expanding = 0; 744 *field_present = 0; 745 746 /* No more input, end of iterator. This is the normal exit 747 path. */ 748 if (self->str.ptr >= self->str.end) 749 return 1; 750 751 start = self->str.ptr; 752 753 /* First read any literal text. Read until the end of string, an 754 escaped '{' or '}', or an unescaped '{'. In order to never 755 allocate memory and so I can just pass pointers around, if 756 there's an escaped '{' or '}' then we'll return the literal 757 including the brace, but no format object. The next time 758 through, we'll return the rest of the literal, skipping past 759 the second consecutive brace. */ 760 while (self->str.ptr < self->str.end) { 761 switch (c = *(self->str.ptr++)) { 762 case '{': 763 case '}': 764 markup_follows = 1; 765 break; 766 default: 767 continue; 768 } 769 break; 770 } 771 772 at_end = self->str.ptr >= self->str.end; 773 len = self->str.ptr - start; 774 775 if ((c == '}') && (at_end || (c != *self->str.ptr))) { 776 PyErr_SetString(PyExc_ValueError, "Single '}' encountered " 777 "in format string"); 778 return 0; 779 } 780 if (at_end && c == '{') { 781 PyErr_SetString(PyExc_ValueError, "Single '{' encountered " 782 "in format string"); 783 return 0; 784 } 785 if (!at_end) { 786 if (c == *self->str.ptr) { 787 /* escaped } or {, skip it in the input. there is no 788 markup object following us, just this literal text */ 789 self->str.ptr++; 790 markup_follows = 0; 791 } 792 else 793 len--; 794 } 795 796 /* record the literal text */ 797 literal->ptr = start; 798 literal->end = start + len; 799 800 if (!markup_follows) 801 return 2; 802 803 /* this is markup, find the end of the string by counting nested 804 braces. note that this prohibits escaped braces, so that 805 format_specs cannot have braces in them. */ 806 *field_present = 1; 807 count = 1; 808 809 start = self->str.ptr; 810 811 /* we know we can't have a zero length string, so don't worry 812 about that case */ 813 while (self->str.ptr < self->str.end) { 814 switch (c = *(self->str.ptr++)) { 815 case '{': 816 /* the format spec needs to be recursively expanded. 817 this is an optimization, and not strictly needed */ 818 *format_spec_needs_expanding = 1; 819 count++; 820 break; 821 case '}': 822 count--; 823 if (count <= 0) { 824 /* we're done. parse and get out */ 825 SubString s; 826 827 SubString_init(&s, start, self->str.ptr - 1 - start); 828 if (parse_field(&s, field_name, format_spec, conversion) == 0) 829 return 0; 830 831 /* success */ 832 return 2; 833 } 834 break; 835 } 836 } 837 838 /* end of string while searching for matching '}' */ 839 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); 840 return 0; 841 } 842 843 844 /* do the !r or !s conversion on obj */ 845 static PyObject * 846 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) 847 { 848 /* XXX in pre-3.0, do we need to convert this to unicode, since it 849 might have returned a string? */ 850 switch (conversion) { 851 case 'r': 852 return PyObject_Repr(obj); 853 case 's': 854 return STRINGLIB_TOSTR(obj); 855 default: 856 if (conversion > 32 && conversion < 127) { 857 /* It's the ASCII subrange; casting to char is safe 858 (assuming the execution character set is an ASCII 859 superset). */ 860 PyErr_Format(PyExc_ValueError, 861 "Unknown conversion specifier %c", 862 (char)conversion); 863 } else 864 PyErr_Format(PyExc_ValueError, 865 "Unknown conversion specifier \\x%x", 866 (unsigned int)conversion); 867 return NULL; 868 } 869 } 870 871 /* given: 872 873 {field_name!conversion:format_spec} 874 875 compute the result and write it to output. 876 format_spec_needs_expanding is an optimization. if it's false, 877 just output the string directly, otherwise recursively expand the 878 format_spec string. 879 880 field_name is allowed to be zero length, in which case we 881 are doing auto field numbering. 882 */ 883 884 static int 885 output_markup(SubString *field_name, SubString *format_spec, 886 int format_spec_needs_expanding, STRINGLIB_CHAR conversion, 887 OutputString *output, PyObject *args, PyObject *kwargs, 888 int recursion_depth, AutoNumber *auto_number) 889 { 890 PyObject *tmp = NULL; 891 PyObject *fieldobj = NULL; 892 SubString expanded_format_spec; 893 SubString *actual_format_spec; 894 int result = 0; 895 896 /* convert field_name to an object */ 897 fieldobj = get_field_object(field_name, args, kwargs, auto_number); 898 if (fieldobj == NULL) 899 goto done; 900 901 if (conversion != '\0') { 902 tmp = do_conversion(fieldobj, conversion); 903 if (tmp == NULL) 904 goto done; 905 906 /* do the assignment, transferring ownership: fieldobj = tmp */ 907 Py_DECREF(fieldobj); 908 fieldobj = tmp; 909 tmp = NULL; 910 } 911 912 /* if needed, recurively compute the format_spec */ 913 if (format_spec_needs_expanding) { 914 tmp = build_string(format_spec, args, kwargs, recursion_depth-1, 915 auto_number); 916 if (tmp == NULL) 917 goto done; 918 919 /* note that in the case we're expanding the format string, 920 tmp must be kept around until after the call to 921 render_field. */ 922 SubString_init(&expanded_format_spec, 923 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp)); 924 actual_format_spec = &expanded_format_spec; 925 } 926 else 927 actual_format_spec = format_spec; 928 929 if (render_field(fieldobj, actual_format_spec, output) == 0) 930 goto done; 931 932 result = 1; 933 934 done: 935 Py_XDECREF(fieldobj); 936 Py_XDECREF(tmp); 937 938 return result; 939 } 940 941 /* 942 do_markup is the top-level loop for the format() method. It 943 searches through the format string for escapes to markup codes, and 944 calls other functions to move non-markup text to the output, 945 and to perform the markup to the output. 946 */ 947 static int 948 do_markup(SubString *input, PyObject *args, PyObject *kwargs, 949 OutputString *output, int recursion_depth, AutoNumber *auto_number) 950 { 951 MarkupIterator iter; 952 int format_spec_needs_expanding; 953 int result; 954 int field_present; 955 SubString literal; 956 SubString field_name; 957 SubString format_spec; 958 STRINGLIB_CHAR conversion; 959 960 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); 961 while ((result = MarkupIterator_next(&iter, &literal, &field_present, 962 &field_name, &format_spec, 963 &conversion, 964 &format_spec_needs_expanding)) == 2) { 965 if (!output_data(output, literal.ptr, literal.end - literal.ptr)) 966 return 0; 967 if (field_present) 968 if (!output_markup(&field_name, &format_spec, 969 format_spec_needs_expanding, conversion, output, 970 args, kwargs, recursion_depth, auto_number)) 971 return 0; 972 } 973 return result; 974 } 975 976 977 /* 978 build_string allocates the output string and then 979 calls do_markup to do the heavy lifting. 980 */ 981 static PyObject * 982 build_string(SubString *input, PyObject *args, PyObject *kwargs, 983 int recursion_depth, AutoNumber *auto_number) 984 { 985 OutputString output; 986 PyObject *result = NULL; 987 Py_ssize_t count; 988 989 output.obj = NULL; /* needed so cleanup code always works */ 990 991 /* check the recursion level */ 992 if (recursion_depth <= 0) { 993 PyErr_SetString(PyExc_ValueError, 994 "Max string recursion exceeded"); 995 goto done; 996 } 997 998 /* initial size is the length of the format string, plus the size 999 increment. seems like a reasonable default */ 1000 if (!output_initialize(&output, 1001 input->end - input->ptr + 1002 INITIAL_SIZE_INCREMENT)) 1003 goto done; 1004 1005 if (!do_markup(input, args, kwargs, &output, recursion_depth, 1006 auto_number)) { 1007 goto done; 1008 } 1009 1010 count = output.ptr - STRINGLIB_STR(output.obj); 1011 if (STRINGLIB_RESIZE(&output.obj, count) < 0) { 1012 goto done; 1013 } 1014 1015 /* transfer ownership to result */ 1016 result = output.obj; 1017 output.obj = NULL; 1018 1019 done: 1020 Py_XDECREF(output.obj); 1021 return result; 1022 } 1023 1024 /************************************************************************/ 1025 /*********** main routine ***********************************************/ 1026 /************************************************************************/ 1027 1028 /* this is the main entry point */ 1029 static PyObject * 1030 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) 1031 { 1032 SubString input; 1033 1034 /* PEP 3101 says only 2 levels, so that 1035 "{0:{1}}".format('abc', 's') # works 1036 "{0:{1:{2}}}".format('abc', 's', '') # fails 1037 */ 1038 int recursion_depth = 2; 1039 1040 AutoNumber auto_number; 1041 1042 AutoNumber_Init(&auto_number); 1043 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self)); 1044 return build_string(&input, args, kwargs, recursion_depth, &auto_number); 1045 } 1046 1047 1048 1049 /************************************************************************/ 1050 /*********** formatteriterator ******************************************/ 1051 /************************************************************************/ 1052 1053 /* This is used to implement string.Formatter.vparse(). It exists so 1054 Formatter can share code with the built in unicode.format() method. 1055 It's really just a wrapper around MarkupIterator that is callable 1056 from Python. */ 1057 1058 typedef struct { 1059 PyObject_HEAD 1060 1061 STRINGLIB_OBJECT *str; 1062 1063 MarkupIterator it_markup; 1064 } formatteriterobject; 1065 1066 static void 1067 formatteriter_dealloc(formatteriterobject *it) 1068 { 1069 Py_XDECREF(it->str); 1070 PyObject_FREE(it); 1071 } 1072 1073 /* returns a tuple: 1074 (literal, field_name, format_spec, conversion) 1075 1076 literal is any literal text to output. might be zero length 1077 field_name is the string before the ':'. might be None 1078 format_spec is the string after the ':'. mibht be None 1079 conversion is either None, or the string after the '!' 1080 */ 1081 static PyObject * 1082 formatteriter_next(formatteriterobject *it) 1083 { 1084 SubString literal; 1085 SubString field_name; 1086 SubString format_spec; 1087 STRINGLIB_CHAR conversion; 1088 int format_spec_needs_expanding; 1089 int field_present; 1090 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, 1091 &field_name, &format_spec, &conversion, 1092 &format_spec_needs_expanding); 1093 1094 /* all of the SubString objects point into it->str, so no 1095 memory management needs to be done on them */ 1096 assert(0 <= result && result <= 2); 1097 if (result == 0 || result == 1) 1098 /* if 0, error has already been set, if 1, iterator is empty */ 1099 return NULL; 1100 else { 1101 PyObject *literal_str = NULL; 1102 PyObject *field_name_str = NULL; 1103 PyObject *format_spec_str = NULL; 1104 PyObject *conversion_str = NULL; 1105 PyObject *tuple = NULL; 1106 1107 literal_str = SubString_new_object(&literal); 1108 if (literal_str == NULL) 1109 goto done; 1110 1111 field_name_str = SubString_new_object(&field_name); 1112 if (field_name_str == NULL) 1113 goto done; 1114 1115 /* if field_name is non-zero length, return a string for 1116 format_spec (even if zero length), else return None */ 1117 format_spec_str = (field_present ? 1118 SubString_new_object_or_empty : 1119 SubString_new_object)(&format_spec); 1120 if (format_spec_str == NULL) 1121 goto done; 1122 1123 /* if the conversion is not specified, return a None, 1124 otherwise create a one length string with the conversion 1125 character */ 1126 if (conversion == '\0') { 1127 conversion_str = Py_None; 1128 Py_INCREF(conversion_str); 1129 } 1130 else 1131 conversion_str = STRINGLIB_NEW(&conversion, 1); 1132 if (conversion_str == NULL) 1133 goto done; 1134 1135 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, 1136 conversion_str); 1137 done: 1138 Py_XDECREF(literal_str); 1139 Py_XDECREF(field_name_str); 1140 Py_XDECREF(format_spec_str); 1141 Py_XDECREF(conversion_str); 1142 return tuple; 1143 } 1144 } 1145 1146 static PyMethodDef formatteriter_methods[] = { 1147 {NULL, NULL} /* sentinel */ 1148 }; 1149 1150 static PyTypeObject PyFormatterIter_Type = { 1151 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1152 "formatteriterator", /* tp_name */ 1153 sizeof(formatteriterobject), /* tp_basicsize */ 1154 0, /* tp_itemsize */ 1155 /* methods */ 1156 (destructor)formatteriter_dealloc, /* tp_dealloc */ 1157 0, /* tp_print */ 1158 0, /* tp_getattr */ 1159 0, /* tp_setattr */ 1160 0, /* tp_compare */ 1161 0, /* tp_repr */ 1162 0, /* tp_as_number */ 1163 0, /* tp_as_sequence */ 1164 0, /* tp_as_mapping */ 1165 0, /* tp_hash */ 1166 0, /* tp_call */ 1167 0, /* tp_str */ 1168 PyObject_GenericGetAttr, /* tp_getattro */ 1169 0, /* tp_setattro */ 1170 0, /* tp_as_buffer */ 1171 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1172 0, /* tp_doc */ 1173 0, /* tp_traverse */ 1174 0, /* tp_clear */ 1175 0, /* tp_richcompare */ 1176 0, /* tp_weaklistoffset */ 1177 PyObject_SelfIter, /* tp_iter */ 1178 (iternextfunc)formatteriter_next, /* tp_iternext */ 1179 formatteriter_methods, /* tp_methods */ 1180 0, 1181 }; 1182 1183 /* unicode_formatter_parser is used to implement 1184 string.Formatter.vformat. it parses a string and returns tuples 1185 describing the parsed elements. It's a wrapper around 1186 stringlib/string_format.h's MarkupIterator */ 1187 static PyObject * 1188 formatter_parser(STRINGLIB_OBJECT *self) 1189 { 1190 formatteriterobject *it; 1191 1192 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); 1193 if (it == NULL) 1194 return NULL; 1195 1196 /* take ownership, give the object to the iterator */ 1197 Py_INCREF(self); 1198 it->str = self; 1199 1200 /* initialize the contained MarkupIterator */ 1201 MarkupIterator_init(&it->it_markup, 1202 STRINGLIB_STR(self), 1203 STRINGLIB_LEN(self)); 1204 1205 return (PyObject *)it; 1206 } 1207 1208 1209 /************************************************************************/ 1210 /*********** fieldnameiterator ******************************************/ 1211 /************************************************************************/ 1212 1213 1214 /* This is used to implement string.Formatter.vparse(). It parses the 1215 field name into attribute and item values. It's a Python-callable 1216 wrapper around FieldNameIterator */ 1217 1218 typedef struct { 1219 PyObject_HEAD 1220 1221 STRINGLIB_OBJECT *str; 1222 1223 FieldNameIterator it_field; 1224 } fieldnameiterobject; 1225 1226 static void 1227 fieldnameiter_dealloc(fieldnameiterobject *it) 1228 { 1229 Py_XDECREF(it->str); 1230 PyObject_FREE(it); 1231 } 1232 1233 /* returns a tuple: 1234 (is_attr, value) 1235 is_attr is true if we used attribute syntax (e.g., '.foo') 1236 false if we used index syntax (e.g., '[foo]') 1237 value is an integer or string 1238 */ 1239 static PyObject * 1240 fieldnameiter_next(fieldnameiterobject *it) 1241 { 1242 int result; 1243 int is_attr; 1244 Py_ssize_t idx; 1245 SubString name; 1246 1247 result = FieldNameIterator_next(&it->it_field, &is_attr, 1248 &idx, &name); 1249 if (result == 0 || result == 1) 1250 /* if 0, error has already been set, if 1, iterator is empty */ 1251 return NULL; 1252 else { 1253 PyObject* result = NULL; 1254 PyObject* is_attr_obj = NULL; 1255 PyObject* obj = NULL; 1256 1257 is_attr_obj = PyBool_FromLong(is_attr); 1258 if (is_attr_obj == NULL) 1259 goto done; 1260 1261 /* either an integer or a string */ 1262 if (idx != -1) 1263 obj = PyLong_FromSsize_t(idx); 1264 else 1265 obj = SubString_new_object(&name); 1266 if (obj == NULL) 1267 goto done; 1268 1269 /* return a tuple of values */ 1270 result = PyTuple_Pack(2, is_attr_obj, obj); 1271 1272 done: 1273 Py_XDECREF(is_attr_obj); 1274 Py_XDECREF(obj); 1275 return result; 1276 } 1277 } 1278 1279 static PyMethodDef fieldnameiter_methods[] = { 1280 {NULL, NULL} /* sentinel */ 1281 }; 1282 1283 static PyTypeObject PyFieldNameIter_Type = { 1284 PyVarObject_HEAD_INIT(&PyType_Type, 0) 1285 "fieldnameiterator", /* tp_name */ 1286 sizeof(fieldnameiterobject), /* tp_basicsize */ 1287 0, /* tp_itemsize */ 1288 /* methods */ 1289 (destructor)fieldnameiter_dealloc, /* tp_dealloc */ 1290 0, /* tp_print */ 1291 0, /* tp_getattr */ 1292 0, /* tp_setattr */ 1293 0, /* tp_compare */ 1294 0, /* tp_repr */ 1295 0, /* tp_as_number */ 1296 0, /* tp_as_sequence */ 1297 0, /* tp_as_mapping */ 1298 0, /* tp_hash */ 1299 0, /* tp_call */ 1300 0, /* tp_str */ 1301 PyObject_GenericGetAttr, /* tp_getattro */ 1302 0, /* tp_setattro */ 1303 0, /* tp_as_buffer */ 1304 Py_TPFLAGS_DEFAULT, /* tp_flags */ 1305 0, /* tp_doc */ 1306 0, /* tp_traverse */ 1307 0, /* tp_clear */ 1308 0, /* tp_richcompare */ 1309 0, /* tp_weaklistoffset */ 1310 PyObject_SelfIter, /* tp_iter */ 1311 (iternextfunc)fieldnameiter_next, /* tp_iternext */ 1312 fieldnameiter_methods, /* tp_methods */ 1313 0}; 1314 1315 /* unicode_formatter_field_name_split is used to implement 1316 string.Formatter.vformat. it takes an PEP 3101 "field name", and 1317 returns a tuple of (first, rest): "first", the part before the 1318 first '.' or '['; and "rest", an iterator for the rest of the field 1319 name. it's a wrapper around stringlib/string_format.h's 1320 field_name_split. The iterator it returns is a 1321 FieldNameIterator */ 1322 static PyObject * 1323 formatter_field_name_split(STRINGLIB_OBJECT *self) 1324 { 1325 SubString first; 1326 Py_ssize_t first_idx; 1327 fieldnameiterobject *it; 1328 1329 PyObject *first_obj = NULL; 1330 PyObject *result = NULL; 1331 1332 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); 1333 if (it == NULL) 1334 return NULL; 1335 1336 /* take ownership, give the object to the iterator. this is 1337 just to keep the field_name alive */ 1338 Py_INCREF(self); 1339 it->str = self; 1340 1341 /* Pass in auto_number = NULL. We'll return an empty string for 1342 first_obj in that case. */ 1343 if (!field_name_split(STRINGLIB_STR(self), 1344 STRINGLIB_LEN(self), 1345 &first, &first_idx, &it->it_field, NULL)) 1346 goto done; 1347 1348 /* first becomes an integer, if possible; else a string */ 1349 if (first_idx != -1) 1350 first_obj = PyLong_FromSsize_t(first_idx); 1351 else 1352 /* convert "first" into a string object */ 1353 first_obj = SubString_new_object(&first); 1354 if (first_obj == NULL) 1355 goto done; 1356 1357 /* return a tuple of values */ 1358 result = PyTuple_Pack(2, first_obj, it); 1359 1360 done: 1361 Py_XDECREF(it); 1362 Py_XDECREF(first_obj); 1363 return result; 1364 } 1365