1 /* implements the unicode (as opposed to string) version of the 2 built-in formatters for string, int, float. that is, the versions 3 of int.__float__, etc., that take and return unicode objects */ 4 5 #include "Python.h" 6 #include <locale.h> 7 8 /* Raises an exception about an unknown presentation type for this 9 * type. */ 10 11 static void 12 unknown_presentation_type(Py_UCS4 presentation_type, 13 const char* type_name) 14 { 15 /* %c might be out-of-range, hence the two cases. */ 16 if (presentation_type > 32 && presentation_type < 128) 17 PyErr_Format(PyExc_ValueError, 18 "Unknown format code '%c' " 19 "for object of type '%.200s'", 20 (char)presentation_type, 21 type_name); 22 else 23 PyErr_Format(PyExc_ValueError, 24 "Unknown format code '\\x%x' " 25 "for object of type '%.200s'", 26 (unsigned int)presentation_type, 27 type_name); 28 } 29 30 static void 31 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type) 32 { 33 assert(specifier == ',' || specifier == '_'); 34 if (presentation_type > 32 && presentation_type < 128) 35 PyErr_Format(PyExc_ValueError, 36 "Cannot specify '%c' with '%c'.", 37 specifier, (char)presentation_type); 38 else 39 PyErr_Format(PyExc_ValueError, 40 "Cannot specify '%c' with '\\x%x'.", 41 specifier, (unsigned int)presentation_type); 42 } 43 44 static void 45 invalid_comma_and_underscore(void) 46 { 47 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'."); 48 } 49 50 /* 51 get_integer consumes 0 or more decimal digit characters from an 52 input string, updates *result with the corresponding positive 53 integer, and returns the number of digits consumed. 54 55 returns -1 on error. 56 */ 57 static int 58 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end, 59 Py_ssize_t *result) 60 { 61 Py_ssize_t accumulator, digitval, pos = *ppos; 62 int numdigits; 63 int kind = PyUnicode_KIND(str); 64 void *data = PyUnicode_DATA(str); 65 66 accumulator = numdigits = 0; 67 for (; pos < end; pos++, numdigits++) { 68 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos)); 69 if (digitval < 0) 70 break; 71 /* 72 Detect possible overflow before it happens: 73 74 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if 75 accumulator > (PY_SSIZE_T_MAX - digitval) / 10. 76 */ 77 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { 78 PyErr_Format(PyExc_ValueError, 79 "Too many decimal digits in format string"); 80 *ppos = pos; 81 return -1; 82 } 83 accumulator = accumulator * 10 + digitval; 84 } 85 *ppos = pos; 86 *result = accumulator; 87 return numdigits; 88 } 89 90 /************************************************************************/ 91 /*********** standard format specifier parsing **************************/ 92 /************************************************************************/ 93 94 /* returns true if this character is a specifier alignment token */ 95 Py_LOCAL_INLINE(int) 96 is_alignment_token(Py_UCS4 c) 97 { 98 switch (c) { 99 case '<': case '>': case '=': case '^': 100 return 1; 101 default: 102 return 0; 103 } 104 } 105 106 /* returns true if this character is a sign element */ 107 Py_LOCAL_INLINE(int) 108 is_sign_element(Py_UCS4 c) 109 { 110 switch (c) { 111 case ' ': case '+': case '-': 112 return 1; 113 default: 114 return 0; 115 } 116 } 117 118 /* Locale type codes. LT_NO_LOCALE must be zero. */ 119 enum LocaleType { 120 LT_NO_LOCALE = 0, 121 LT_DEFAULT_LOCALE = ',', 122 LT_UNDERSCORE_LOCALE = '_', 123 LT_UNDER_FOUR_LOCALE, 124 LT_CURRENT_LOCALE 125 }; 126 127 typedef struct { 128 Py_UCS4 fill_char; 129 Py_UCS4 align; 130 int alternate; 131 Py_UCS4 sign; 132 Py_ssize_t width; 133 enum LocaleType thousands_separators; 134 Py_ssize_t precision; 135 Py_UCS4 type; 136 } InternalFormatSpec; 137 138 #if 0 139 /* Occasionally useful for debugging. Should normally be commented out. */ 140 static void 141 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format) 142 { 143 printf("internal format spec: fill_char %d\n", format->fill_char); 144 printf("internal format spec: align %d\n", format->align); 145 printf("internal format spec: alternate %d\n", format->alternate); 146 printf("internal format spec: sign %d\n", format->sign); 147 printf("internal format spec: width %zd\n", format->width); 148 printf("internal format spec: thousands_separators %d\n", 149 format->thousands_separators); 150 printf("internal format spec: precision %zd\n", format->precision); 151 printf("internal format spec: type %c\n", format->type); 152 printf("\n"); 153 } 154 #endif 155 156 157 /* 158 ptr points to the start of the format_spec, end points just past its end. 159 fills in format with the parsed information. 160 returns 1 on success, 0 on failure. 161 if failure, sets the exception 162 */ 163 static int 164 parse_internal_render_format_spec(PyObject *format_spec, 165 Py_ssize_t start, Py_ssize_t end, 166 InternalFormatSpec *format, 167 char default_type, 168 char default_align) 169 { 170 Py_ssize_t pos = start; 171 int kind = PyUnicode_KIND(format_spec); 172 void *data = PyUnicode_DATA(format_spec); 173 /* end-pos is used throughout this code to specify the length of 174 the input string */ 175 #define READ_spec(index) PyUnicode_READ(kind, data, index) 176 177 Py_ssize_t consumed; 178 int align_specified = 0; 179 int fill_char_specified = 0; 180 181 format->fill_char = ' '; 182 format->align = default_align; 183 format->alternate = 0; 184 format->sign = '\0'; 185 format->width = -1; 186 format->thousands_separators = LT_NO_LOCALE; 187 format->precision = -1; 188 format->type = default_type; 189 190 /* If the second char is an alignment token, 191 then parse the fill char */ 192 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) { 193 format->align = READ_spec(pos+1); 194 format->fill_char = READ_spec(pos); 195 fill_char_specified = 1; 196 align_specified = 1; 197 pos += 2; 198 } 199 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) { 200 format->align = READ_spec(pos); 201 align_specified = 1; 202 ++pos; 203 } 204 205 /* Parse the various sign options */ 206 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) { 207 format->sign = READ_spec(pos); 208 ++pos; 209 } 210 211 /* If the next character is #, we're in alternate mode. This only 212 applies to integers. */ 213 if (end-pos >= 1 && READ_spec(pos) == '#') { 214 format->alternate = 1; 215 ++pos; 216 } 217 218 /* The special case for 0-padding (backwards compat) */ 219 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') { 220 format->fill_char = '0'; 221 if (!align_specified) { 222 format->align = '='; 223 } 224 ++pos; 225 } 226 227 consumed = get_integer(format_spec, &pos, end, &format->width); 228 if (consumed == -1) 229 /* Overflow error. Exception already set. */ 230 return 0; 231 232 /* If consumed is 0, we didn't consume any characters for the 233 width. In that case, reset the width to -1, because 234 get_integer() will have set it to zero. -1 is how we record 235 that the width wasn't specified. */ 236 if (consumed == 0) 237 format->width = -1; 238 239 /* Comma signifies add thousands separators */ 240 if (end-pos && READ_spec(pos) == ',') { 241 format->thousands_separators = LT_DEFAULT_LOCALE; 242 ++pos; 243 } 244 /* Underscore signifies add thousands separators */ 245 if (end-pos && READ_spec(pos) == '_') { 246 if (format->thousands_separators != LT_NO_LOCALE) { 247 invalid_comma_and_underscore(); 248 return 0; 249 } 250 format->thousands_separators = LT_UNDERSCORE_LOCALE; 251 ++pos; 252 } 253 if (end-pos && READ_spec(pos) == ',') { 254 invalid_comma_and_underscore(); 255 return 0; 256 } 257 258 /* Parse field precision */ 259 if (end-pos && READ_spec(pos) == '.') { 260 ++pos; 261 262 consumed = get_integer(format_spec, &pos, end, &format->precision); 263 if (consumed == -1) 264 /* Overflow error. Exception already set. */ 265 return 0; 266 267 /* Not having a precision after a dot is an error. */ 268 if (consumed == 0) { 269 PyErr_Format(PyExc_ValueError, 270 "Format specifier missing precision"); 271 return 0; 272 } 273 274 } 275 276 /* Finally, parse the type field. */ 277 278 if (end-pos > 1) { 279 /* More than one char remain, invalid format specifier. */ 280 PyErr_Format(PyExc_ValueError, "Invalid format specifier"); 281 return 0; 282 } 283 284 if (end-pos == 1) { 285 format->type = READ_spec(pos); 286 ++pos; 287 } 288 289 /* Do as much validating as we can, just by looking at the format 290 specifier. Do not take into account what type of formatting 291 we're doing (int, float, string). */ 292 293 if (format->thousands_separators) { 294 switch (format->type) { 295 case 'd': 296 case 'e': 297 case 'f': 298 case 'g': 299 case 'E': 300 case 'G': 301 case '%': 302 case 'F': 303 case '\0': 304 /* These are allowed. See PEP 378.*/ 305 break; 306 case 'b': 307 case 'o': 308 case 'x': 309 case 'X': 310 /* Underscores are allowed in bin/oct/hex. See PEP 515. */ 311 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { 312 /* Every four digits, not every three, in bin/oct/hex. */ 313 format->thousands_separators = LT_UNDER_FOUR_LOCALE; 314 break; 315 } 316 /* fall through */ 317 default: 318 invalid_thousands_separator_type(format->thousands_separators, format->type); 319 return 0; 320 } 321 } 322 323 assert (format->align <= 127); 324 assert (format->sign <= 127); 325 return 1; 326 } 327 328 /* Calculate the padding needed. */ 329 static void 330 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align, 331 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding, 332 Py_ssize_t *n_total) 333 { 334 if (width >= 0) { 335 if (nchars > width) 336 *n_total = nchars; 337 else 338 *n_total = width; 339 } 340 else { 341 /* not specified, use all of the chars and no more */ 342 *n_total = nchars; 343 } 344 345 /* Figure out how much leading space we need, based on the 346 aligning */ 347 if (align == '>') 348 *n_lpadding = *n_total - nchars; 349 else if (align == '^') 350 *n_lpadding = (*n_total - nchars) / 2; 351 else if (align == '<' || align == '=') 352 *n_lpadding = 0; 353 else { 354 /* We should never have an unspecified alignment. */ 355 Py_UNREACHABLE(); 356 } 357 358 *n_rpadding = *n_total - nchars - *n_lpadding; 359 } 360 361 /* Do the padding, and return a pointer to where the caller-supplied 362 content goes. */ 363 static int 364 fill_padding(_PyUnicodeWriter *writer, 365 Py_ssize_t nchars, 366 Py_UCS4 fill_char, Py_ssize_t n_lpadding, 367 Py_ssize_t n_rpadding) 368 { 369 Py_ssize_t pos; 370 371 /* Pad on left. */ 372 if (n_lpadding) { 373 pos = writer->pos; 374 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char); 375 } 376 377 /* Pad on right. */ 378 if (n_rpadding) { 379 pos = writer->pos + nchars + n_lpadding; 380 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char); 381 } 382 383 /* Pointer to the user content. */ 384 writer->pos += n_lpadding; 385 return 0; 386 } 387 388 /************************************************************************/ 389 /*********** common routines for numeric formatting *********************/ 390 /************************************************************************/ 391 392 /* Locale info needed for formatting integers and the part of floats 393 before and including the decimal. Note that locales only support 394 8-bit chars, not unicode. */ 395 typedef struct { 396 PyObject *decimal_point; 397 PyObject *thousands_sep; 398 const char *grouping; 399 char *grouping_buffer; 400 } LocaleInfo; 401 402 #define STATIC_LOCALE_INFO_INIT {0, 0, 0, 0} 403 404 /* describes the layout for an integer, see the comment in 405 calc_number_widths() for details */ 406 typedef struct { 407 Py_ssize_t n_lpadding; 408 Py_ssize_t n_prefix; 409 Py_ssize_t n_spadding; 410 Py_ssize_t n_rpadding; 411 char sign; 412 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */ 413 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including 414 any grouping chars. */ 415 Py_ssize_t n_decimal; /* 0 if only an integer */ 416 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part, 417 excluding the decimal itself, if 418 present. */ 419 420 /* These 2 are not the widths of fields, but are needed by 421 STRINGLIB_GROUPING. */ 422 Py_ssize_t n_digits; /* The number of digits before a decimal 423 or exponent. */ 424 Py_ssize_t n_min_width; /* The min_width we used when we computed 425 the n_grouped_digits width. */ 426 } NumberFieldWidths; 427 428 429 /* Given a number of the form: 430 digits[remainder] 431 where ptr points to the start and end points to the end, find where 432 the integer part ends. This could be a decimal, an exponent, both, 433 or neither. 434 If a decimal point is present, set *has_decimal and increment 435 remainder beyond it. 436 Results are undefined (but shouldn't crash) for improperly 437 formatted strings. 438 */ 439 static void 440 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, 441 Py_ssize_t *n_remainder, int *has_decimal) 442 { 443 Py_ssize_t remainder; 444 int kind = PyUnicode_KIND(s); 445 void *data = PyUnicode_DATA(s); 446 447 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) 448 ++pos; 449 remainder = pos; 450 451 /* Does remainder start with a decimal point? */ 452 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.'; 453 454 /* Skip the decimal point. */ 455 if (*has_decimal) 456 remainder++; 457 458 *n_remainder = end - remainder; 459 } 460 461 /* not all fields of format are used. for example, precision is 462 unused. should this take discrete params in order to be more clear 463 about what it does? or is passing a single format parameter easier 464 and more efficient enough to justify a little obfuscation? 465 Return -1 on error. */ 466 static Py_ssize_t 467 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, 468 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, 469 Py_ssize_t n_end, Py_ssize_t n_remainder, 470 int has_decimal, const LocaleInfo *locale, 471 const InternalFormatSpec *format, Py_UCS4 *maxchar) 472 { 473 Py_ssize_t n_non_digit_non_padding; 474 Py_ssize_t n_padding; 475 476 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0); 477 spec->n_lpadding = 0; 478 spec->n_prefix = n_prefix; 479 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0; 480 spec->n_remainder = n_remainder; 481 spec->n_spadding = 0; 482 spec->n_rpadding = 0; 483 spec->sign = '\0'; 484 spec->n_sign = 0; 485 486 /* the output will look like: 487 | | 488 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> | 489 | | 490 491 sign is computed from format->sign and the actual 492 sign of the number 493 494 prefix is given (it's for the '0x' prefix) 495 496 digits is already known 497 498 the total width is either given, or computed from the 499 actual digits 500 501 only one of lpadding, spadding, and rpadding can be non-zero, 502 and it's calculated from the width and other fields 503 */ 504 505 /* compute the various parts we're going to write */ 506 switch (format->sign) { 507 case '+': 508 /* always put a + or - */ 509 spec->n_sign = 1; 510 spec->sign = (sign_char == '-' ? '-' : '+'); 511 break; 512 case ' ': 513 spec->n_sign = 1; 514 spec->sign = (sign_char == '-' ? '-' : ' '); 515 break; 516 default: 517 /* Not specified, or the default (-) */ 518 if (sign_char == '-') { 519 spec->n_sign = 1; 520 spec->sign = '-'; 521 } 522 } 523 524 /* The number of chars used for non-digits and non-padding. */ 525 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + 526 spec->n_remainder; 527 528 /* min_width can go negative, that's okay. format->width == -1 means 529 we don't care. */ 530 if (format->fill_char == '0' && format->align == '=') 531 spec->n_min_width = format->width - n_non_digit_non_padding; 532 else 533 spec->n_min_width = 0; 534 535 if (spec->n_digits == 0) 536 /* This case only occurs when using 'c' formatting, we need 537 to special case it because the grouping code always wants 538 to have at least one character. */ 539 spec->n_grouped_digits = 0; 540 else { 541 Py_UCS4 grouping_maxchar; 542 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( 543 NULL, 0, 544 NULL, 0, spec->n_digits, 545 spec->n_min_width, 546 locale->grouping, locale->thousands_sep, &grouping_maxchar); 547 if (spec->n_grouped_digits == -1) { 548 return -1; 549 } 550 *maxchar = Py_MAX(*maxchar, grouping_maxchar); 551 } 552 553 /* Given the desired width and the total of digit and non-digit 554 space we consume, see if we need any padding. format->width can 555 be negative (meaning no padding), but this code still works in 556 that case. */ 557 n_padding = format->width - 558 (n_non_digit_non_padding + spec->n_grouped_digits); 559 if (n_padding > 0) { 560 /* Some padding is needed. Determine if it's left, space, or right. */ 561 switch (format->align) { 562 case '<': 563 spec->n_rpadding = n_padding; 564 break; 565 case '^': 566 spec->n_lpadding = n_padding / 2; 567 spec->n_rpadding = n_padding - spec->n_lpadding; 568 break; 569 case '=': 570 spec->n_spadding = n_padding; 571 break; 572 case '>': 573 spec->n_lpadding = n_padding; 574 break; 575 default: 576 /* Shouldn't get here, but treat it as '>' */ 577 Py_UNREACHABLE(); 578 } 579 } 580 581 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding) 582 *maxchar = Py_MAX(*maxchar, format->fill_char); 583 584 if (spec->n_decimal) 585 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point)); 586 587 return spec->n_lpadding + spec->n_sign + spec->n_prefix + 588 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + 589 spec->n_remainder + spec->n_rpadding; 590 } 591 592 /* Fill in the digit parts of a numbers's string representation, 593 as determined in calc_number_widths(). 594 Return -1 on error, or 0 on success. */ 595 static int 596 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, 597 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, 598 PyObject *prefix, Py_ssize_t p_start, 599 Py_UCS4 fill_char, 600 LocaleInfo *locale, int toupper) 601 { 602 /* Used to keep track of digits, decimal, and remainder. */ 603 Py_ssize_t d_pos = d_start; 604 const unsigned int kind = writer->kind; 605 const void *data = writer->data; 606 Py_ssize_t r; 607 608 if (spec->n_lpadding) { 609 _PyUnicode_FastFill(writer->buffer, 610 writer->pos, spec->n_lpadding, fill_char); 611 writer->pos += spec->n_lpadding; 612 } 613 if (spec->n_sign == 1) { 614 PyUnicode_WRITE(kind, data, writer->pos, spec->sign); 615 writer->pos++; 616 } 617 if (spec->n_prefix) { 618 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, 619 prefix, p_start, 620 spec->n_prefix); 621 if (toupper) { 622 Py_ssize_t t; 623 for (t = 0; t < spec->n_prefix; t++) { 624 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); 625 c = Py_TOUPPER(c); 626 assert (c <= 127); 627 PyUnicode_WRITE(kind, data, writer->pos + t, c); 628 } 629 } 630 writer->pos += spec->n_prefix; 631 } 632 if (spec->n_spadding) { 633 _PyUnicode_FastFill(writer->buffer, 634 writer->pos, spec->n_spadding, fill_char); 635 writer->pos += spec->n_spadding; 636 } 637 638 /* Only for type 'c' special case, it has no digits. */ 639 if (spec->n_digits != 0) { 640 /* Fill the digits with InsertThousandsGrouping. */ 641 r = _PyUnicode_InsertThousandsGrouping( 642 writer, spec->n_grouped_digits, 643 digits, d_pos, spec->n_digits, 644 spec->n_min_width, 645 locale->grouping, locale->thousands_sep, NULL); 646 if (r == -1) 647 return -1; 648 assert(r == spec->n_grouped_digits); 649 d_pos += spec->n_digits; 650 } 651 if (toupper) { 652 Py_ssize_t t; 653 for (t = 0; t < spec->n_grouped_digits; t++) { 654 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); 655 c = Py_TOUPPER(c); 656 if (c > 127) { 657 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); 658 return -1; 659 } 660 PyUnicode_WRITE(kind, data, writer->pos + t, c); 661 } 662 } 663 writer->pos += spec->n_grouped_digits; 664 665 if (spec->n_decimal) { 666 _PyUnicode_FastCopyCharacters( 667 writer->buffer, writer->pos, 668 locale->decimal_point, 0, spec->n_decimal); 669 writer->pos += spec->n_decimal; 670 d_pos += 1; 671 } 672 673 if (spec->n_remainder) { 674 _PyUnicode_FastCopyCharacters( 675 writer->buffer, writer->pos, 676 digits, d_pos, spec->n_remainder); 677 writer->pos += spec->n_remainder; 678 /* d_pos += spec->n_remainder; */ 679 } 680 681 if (spec->n_rpadding) { 682 _PyUnicode_FastFill(writer->buffer, 683 writer->pos, spec->n_rpadding, 684 fill_char); 685 writer->pos += spec->n_rpadding; 686 } 687 return 0; 688 } 689 690 static const char no_grouping[1] = {CHAR_MAX}; 691 692 /* Find the decimal point character(s?), thousands_separator(s?), and 693 grouping description, either for the current locale if type is 694 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or 695 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ 696 static int 697 get_locale_info(enum LocaleType type, LocaleInfo *locale_info) 698 { 699 switch (type) { 700 case LT_CURRENT_LOCALE: { 701 const char *grouping; 702 if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point, 703 &locale_info->thousands_sep, 704 &grouping) < 0) { 705 return -1; 706 } 707 708 /* localeconv() grouping can become a dangling pointer or point 709 to a different string if another thread calls localeconv() during 710 the string formatting. Copy the string to avoid this risk. */ 711 locale_info->grouping_buffer = _PyMem_Strdup(grouping); 712 if (locale_info->grouping_buffer == NULL) { 713 PyErr_NoMemory(); 714 return -1; 715 } 716 locale_info->grouping = locale_info->grouping_buffer; 717 break; 718 } 719 case LT_DEFAULT_LOCALE: 720 case LT_UNDERSCORE_LOCALE: 721 case LT_UNDER_FOUR_LOCALE: 722 locale_info->decimal_point = PyUnicode_FromOrdinal('.'); 723 locale_info->thousands_sep = PyUnicode_FromOrdinal( 724 type == LT_DEFAULT_LOCALE ? ',' : '_'); 725 if (!locale_info->decimal_point || !locale_info->thousands_sep) 726 return -1; 727 if (type != LT_UNDER_FOUR_LOCALE) 728 locale_info->grouping = "\3"; /* Group every 3 characters. The 729 (implicit) trailing 0 means repeat 730 infinitely. */ 731 else 732 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */ 733 break; 734 case LT_NO_LOCALE: 735 locale_info->decimal_point = PyUnicode_FromOrdinal('.'); 736 locale_info->thousands_sep = PyUnicode_New(0, 0); 737 if (!locale_info->decimal_point || !locale_info->thousands_sep) 738 return -1; 739 locale_info->grouping = no_grouping; 740 break; 741 } 742 return 0; 743 } 744 745 static void 746 free_locale_info(LocaleInfo *locale_info) 747 { 748 Py_XDECREF(locale_info->decimal_point); 749 Py_XDECREF(locale_info->thousands_sep); 750 PyMem_Free(locale_info->grouping_buffer); 751 } 752 753 /************************************************************************/ 754 /*********** string formatting ******************************************/ 755 /************************************************************************/ 756 757 static int 758 format_string_internal(PyObject *value, const InternalFormatSpec *format, 759 _PyUnicodeWriter *writer) 760 { 761 Py_ssize_t lpad; 762 Py_ssize_t rpad; 763 Py_ssize_t total; 764 Py_ssize_t len; 765 int result = -1; 766 Py_UCS4 maxchar; 767 768 assert(PyUnicode_IS_READY(value)); 769 len = PyUnicode_GET_LENGTH(value); 770 771 /* sign is not allowed on strings */ 772 if (format->sign != '\0') { 773 PyErr_SetString(PyExc_ValueError, 774 "Sign not allowed in string format specifier"); 775 goto done; 776 } 777 778 /* alternate is not allowed on strings */ 779 if (format->alternate) { 780 PyErr_SetString(PyExc_ValueError, 781 "Alternate form (#) not allowed in string format " 782 "specifier"); 783 goto done; 784 } 785 786 /* '=' alignment not allowed on strings */ 787 if (format->align == '=') { 788 PyErr_SetString(PyExc_ValueError, 789 "'=' alignment not allowed " 790 "in string format specifier"); 791 goto done; 792 } 793 794 if ((format->width == -1 || format->width <= len) 795 && (format->precision == -1 || format->precision >= len)) { 796 /* Fast path */ 797 return _PyUnicodeWriter_WriteStr(writer, value); 798 } 799 800 /* if precision is specified, output no more that format.precision 801 characters */ 802 if (format->precision >= 0 && len >= format->precision) { 803 len = format->precision; 804 } 805 806 calc_padding(len, format->width, format->align, &lpad, &rpad, &total); 807 808 maxchar = writer->maxchar; 809 if (lpad != 0 || rpad != 0) 810 maxchar = Py_MAX(maxchar, format->fill_char); 811 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) { 812 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len); 813 maxchar = Py_MAX(maxchar, valmaxchar); 814 } 815 816 /* allocate the resulting string */ 817 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) 818 goto done; 819 820 /* Write into that space. First the padding. */ 821 result = fill_padding(writer, len, format->fill_char, lpad, rpad); 822 if (result == -1) 823 goto done; 824 825 /* Then the source string. */ 826 if (len) { 827 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, 828 value, 0, len); 829 } 830 writer->pos += (len + rpad); 831 result = 0; 832 833 done: 834 return result; 835 } 836 837 838 /************************************************************************/ 839 /*********** long formatting ********************************************/ 840 /************************************************************************/ 841 842 static int 843 format_long_internal(PyObject *value, const InternalFormatSpec *format, 844 _PyUnicodeWriter *writer) 845 { 846 int result = -1; 847 Py_UCS4 maxchar = 127; 848 PyObject *tmp = NULL; 849 Py_ssize_t inumeric_chars; 850 Py_UCS4 sign_char = '\0'; 851 Py_ssize_t n_digits; /* count of digits need from the computed 852 string */ 853 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which 854 produces non-digits */ 855 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */ 856 Py_ssize_t n_total; 857 Py_ssize_t prefix = 0; 858 NumberFieldWidths spec; 859 long x; 860 861 /* Locale settings, either from the actual locale or 862 from a hard-code pseudo-locale */ 863 LocaleInfo locale = STATIC_LOCALE_INFO_INIT; 864 865 /* no precision allowed on integers */ 866 if (format->precision != -1) { 867 PyErr_SetString(PyExc_ValueError, 868 "Precision not allowed in integer format specifier"); 869 goto done; 870 } 871 872 /* special case for character formatting */ 873 if (format->type == 'c') { 874 /* error to specify a sign */ 875 if (format->sign != '\0') { 876 PyErr_SetString(PyExc_ValueError, 877 "Sign not allowed with integer" 878 " format specifier 'c'"); 879 goto done; 880 } 881 /* error to request alternate format */ 882 if (format->alternate) { 883 PyErr_SetString(PyExc_ValueError, 884 "Alternate form (#) not allowed with integer" 885 " format specifier 'c'"); 886 goto done; 887 } 888 889 /* taken from unicodeobject.c formatchar() */ 890 /* Integer input truncated to a character */ 891 x = PyLong_AsLong(value); 892 if (x == -1 && PyErr_Occurred()) 893 goto done; 894 if (x < 0 || x > 0x10ffff) { 895 PyErr_SetString(PyExc_OverflowError, 896 "%c arg not in range(0x110000)"); 897 goto done; 898 } 899 tmp = PyUnicode_FromOrdinal(x); 900 inumeric_chars = 0; 901 n_digits = 1; 902 maxchar = Py_MAX(maxchar, (Py_UCS4)x); 903 904 /* As a sort-of hack, we tell calc_number_widths that we only 905 have "remainder" characters. calc_number_widths thinks 906 these are characters that don't get formatted, only copied 907 into the output string. We do this for 'c' formatting, 908 because the characters are likely to be non-digits. */ 909 n_remainder = 1; 910 } 911 else { 912 int base; 913 int leading_chars_to_skip = 0; /* Number of characters added by 914 PyNumber_ToBase that we want to 915 skip over. */ 916 917 /* Compute the base and how many characters will be added by 918 PyNumber_ToBase */ 919 switch (format->type) { 920 case 'b': 921 base = 2; 922 leading_chars_to_skip = 2; /* 0b */ 923 break; 924 case 'o': 925 base = 8; 926 leading_chars_to_skip = 2; /* 0o */ 927 break; 928 case 'x': 929 case 'X': 930 base = 16; 931 leading_chars_to_skip = 2; /* 0x */ 932 break; 933 default: /* shouldn't be needed, but stops a compiler warning */ 934 case 'd': 935 case 'n': 936 base = 10; 937 break; 938 } 939 940 if (format->sign != '+' && format->sign != ' ' 941 && format->width == -1 942 && format->type != 'X' && format->type != 'n' 943 && !format->thousands_separators 944 && PyLong_CheckExact(value)) 945 { 946 /* Fast path */ 947 return _PyLong_FormatWriter(writer, value, base, format->alternate); 948 } 949 950 /* The number of prefix chars is the same as the leading 951 chars to skip */ 952 if (format->alternate) 953 n_prefix = leading_chars_to_skip; 954 955 /* Do the hard part, converting to a string in a given base */ 956 tmp = _PyLong_Format(value, base); 957 if (tmp == NULL || PyUnicode_READY(tmp) == -1) 958 goto done; 959 960 inumeric_chars = 0; 961 n_digits = PyUnicode_GET_LENGTH(tmp); 962 963 prefix = inumeric_chars; 964 965 /* Is a sign character present in the output? If so, remember it 966 and skip it */ 967 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') { 968 sign_char = '-'; 969 ++prefix; 970 ++leading_chars_to_skip; 971 } 972 973 /* Skip over the leading chars (0x, 0b, etc.) */ 974 n_digits -= leading_chars_to_skip; 975 inumeric_chars += leading_chars_to_skip; 976 } 977 978 /* Determine the grouping, separator, and decimal point, if any. */ 979 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : 980 format->thousands_separators, 981 &locale) == -1) 982 goto done; 983 984 /* Calculate how much memory we'll need. */ 985 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, 986 inumeric_chars + n_digits, n_remainder, 0, 987 &locale, format, &maxchar); 988 if (n_total == -1) { 989 goto done; 990 } 991 992 /* Allocate the memory. */ 993 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) 994 goto done; 995 996 /* Populate the memory. */ 997 result = fill_number(writer, &spec, 998 tmp, inumeric_chars, inumeric_chars + n_digits, 999 tmp, prefix, format->fill_char, 1000 &locale, format->type == 'X'); 1001 1002 done: 1003 Py_XDECREF(tmp); 1004 free_locale_info(&locale); 1005 return result; 1006 } 1007 1008 /************************************************************************/ 1009 /*********** float formatting *******************************************/ 1010 /************************************************************************/ 1011 1012 /* much of this is taken from unicodeobject.c */ 1013 static int 1014 format_float_internal(PyObject *value, 1015 const InternalFormatSpec *format, 1016 _PyUnicodeWriter *writer) 1017 { 1018 char *buf = NULL; /* buffer returned from PyOS_double_to_string */ 1019 Py_ssize_t n_digits; 1020 Py_ssize_t n_remainder; 1021 Py_ssize_t n_total; 1022 int has_decimal; 1023 double val; 1024 int precision, default_precision = 6; 1025 Py_UCS4 type = format->type; 1026 int add_pct = 0; 1027 Py_ssize_t index; 1028 NumberFieldWidths spec; 1029 int flags = 0; 1030 int result = -1; 1031 Py_UCS4 maxchar = 127; 1032 Py_UCS4 sign_char = '\0'; 1033 int float_type; /* Used to see if we have a nan, inf, or regular float. */ 1034 PyObject *unicode_tmp = NULL; 1035 1036 /* Locale settings, either from the actual locale or 1037 from a hard-code pseudo-locale */ 1038 LocaleInfo locale = STATIC_LOCALE_INFO_INIT; 1039 1040 if (format->precision > INT_MAX) { 1041 PyErr_SetString(PyExc_ValueError, "precision too big"); 1042 goto done; 1043 } 1044 precision = (int)format->precision; 1045 1046 if (format->alternate) 1047 flags |= Py_DTSF_ALT; 1048 1049 if (type == '\0') { 1050 /* Omitted type specifier. Behaves in the same way as repr(x) 1051 and str(x) if no precision is given, else like 'g', but with 1052 at least one digit after the decimal point. */ 1053 flags |= Py_DTSF_ADD_DOT_0; 1054 type = 'r'; 1055 default_precision = 0; 1056 } 1057 1058 if (type == 'n') 1059 /* 'n' is the same as 'g', except for the locale used to 1060 format the result. We take care of that later. */ 1061 type = 'g'; 1062 1063 val = PyFloat_AsDouble(value); 1064 if (val == -1.0 && PyErr_Occurred()) 1065 goto done; 1066 1067 if (type == '%') { 1068 type = 'f'; 1069 val *= 100; 1070 add_pct = 1; 1071 } 1072 1073 if (precision < 0) 1074 precision = default_precision; 1075 else if (type == 'r') 1076 type = 'g'; 1077 1078 /* Cast "type", because if we're in unicode we need to pass an 1079 8-bit char. This is safe, because we've restricted what "type" 1080 can be. */ 1081 buf = PyOS_double_to_string(val, (char)type, precision, flags, 1082 &float_type); 1083 if (buf == NULL) 1084 goto done; 1085 n_digits = strlen(buf); 1086 1087 if (add_pct) { 1088 /* We know that buf has a trailing zero (since we just called 1089 strlen() on it), and we don't use that fact any more. So we 1090 can just write over the trailing zero. */ 1091 buf[n_digits] = '%'; 1092 n_digits += 1; 1093 } 1094 1095 if (format->sign != '+' && format->sign != ' ' 1096 && format->width == -1 1097 && format->type != 'n' 1098 && !format->thousands_separators) 1099 { 1100 /* Fast path */ 1101 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits); 1102 PyMem_Free(buf); 1103 return result; 1104 } 1105 1106 /* Since there is no unicode version of PyOS_double_to_string, 1107 just use the 8 bit version and then convert to unicode. */ 1108 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits); 1109 PyMem_Free(buf); 1110 if (unicode_tmp == NULL) 1111 goto done; 1112 1113 /* Is a sign character present in the output? If so, remember it 1114 and skip it */ 1115 index = 0; 1116 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') { 1117 sign_char = '-'; 1118 ++index; 1119 --n_digits; 1120 } 1121 1122 /* Determine if we have any "remainder" (after the digits, might include 1123 decimal or exponent or both (or neither)) */ 1124 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal); 1125 1126 /* Determine the grouping, separator, and decimal point, if any. */ 1127 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : 1128 format->thousands_separators, 1129 &locale) == -1) 1130 goto done; 1131 1132 /* Calculate how much memory we'll need. */ 1133 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, 1134 index + n_digits, n_remainder, has_decimal, 1135 &locale, format, &maxchar); 1136 if (n_total == -1) { 1137 goto done; 1138 } 1139 1140 /* Allocate the memory. */ 1141 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) 1142 goto done; 1143 1144 /* Populate the memory. */ 1145 result = fill_number(writer, &spec, 1146 unicode_tmp, index, index + n_digits, 1147 NULL, 0, format->fill_char, 1148 &locale, 0); 1149 1150 done: 1151 Py_XDECREF(unicode_tmp); 1152 free_locale_info(&locale); 1153 return result; 1154 } 1155 1156 /************************************************************************/ 1157 /*********** complex formatting *****************************************/ 1158 /************************************************************************/ 1159 1160 static int 1161 format_complex_internal(PyObject *value, 1162 const InternalFormatSpec *format, 1163 _PyUnicodeWriter *writer) 1164 { 1165 double re; 1166 double im; 1167 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */ 1168 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */ 1169 1170 InternalFormatSpec tmp_format = *format; 1171 Py_ssize_t n_re_digits; 1172 Py_ssize_t n_im_digits; 1173 Py_ssize_t n_re_remainder; 1174 Py_ssize_t n_im_remainder; 1175 Py_ssize_t n_re_total; 1176 Py_ssize_t n_im_total; 1177 int re_has_decimal; 1178 int im_has_decimal; 1179 int precision, default_precision = 6; 1180 Py_UCS4 type = format->type; 1181 Py_ssize_t i_re; 1182 Py_ssize_t i_im; 1183 NumberFieldWidths re_spec; 1184 NumberFieldWidths im_spec; 1185 int flags = 0; 1186 int result = -1; 1187 Py_UCS4 maxchar = 127; 1188 enum PyUnicode_Kind rkind; 1189 void *rdata; 1190 Py_UCS4 re_sign_char = '\0'; 1191 Py_UCS4 im_sign_char = '\0'; 1192 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ 1193 int im_float_type; 1194 int add_parens = 0; 1195 int skip_re = 0; 1196 Py_ssize_t lpad; 1197 Py_ssize_t rpad; 1198 Py_ssize_t total; 1199 PyObject *re_unicode_tmp = NULL; 1200 PyObject *im_unicode_tmp = NULL; 1201 1202 /* Locale settings, either from the actual locale or 1203 from a hard-code pseudo-locale */ 1204 LocaleInfo locale = STATIC_LOCALE_INFO_INIT; 1205 1206 if (format->precision > INT_MAX) { 1207 PyErr_SetString(PyExc_ValueError, "precision too big"); 1208 goto done; 1209 } 1210 precision = (int)format->precision; 1211 1212 /* Zero padding is not allowed. */ 1213 if (format->fill_char == '0') { 1214 PyErr_SetString(PyExc_ValueError, 1215 "Zero padding is not allowed in complex format " 1216 "specifier"); 1217 goto done; 1218 } 1219 1220 /* Neither is '=' alignment . */ 1221 if (format->align == '=') { 1222 PyErr_SetString(PyExc_ValueError, 1223 "'=' alignment flag is not allowed in complex format " 1224 "specifier"); 1225 goto done; 1226 } 1227 1228 re = PyComplex_RealAsDouble(value); 1229 if (re == -1.0 && PyErr_Occurred()) 1230 goto done; 1231 im = PyComplex_ImagAsDouble(value); 1232 if (im == -1.0 && PyErr_Occurred()) 1233 goto done; 1234 1235 if (format->alternate) 1236 flags |= Py_DTSF_ALT; 1237 1238 if (type == '\0') { 1239 /* Omitted type specifier. Should be like str(self). */ 1240 type = 'r'; 1241 default_precision = 0; 1242 if (re == 0.0 && copysign(1.0, re) == 1.0) 1243 skip_re = 1; 1244 else 1245 add_parens = 1; 1246 } 1247 1248 if (type == 'n') 1249 /* 'n' is the same as 'g', except for the locale used to 1250 format the result. We take care of that later. */ 1251 type = 'g'; 1252 1253 if (precision < 0) 1254 precision = default_precision; 1255 else if (type == 'r') 1256 type = 'g'; 1257 1258 /* Cast "type", because if we're in unicode we need to pass an 1259 8-bit char. This is safe, because we've restricted what "type" 1260 can be. */ 1261 re_buf = PyOS_double_to_string(re, (char)type, precision, flags, 1262 &re_float_type); 1263 if (re_buf == NULL) 1264 goto done; 1265 im_buf = PyOS_double_to_string(im, (char)type, precision, flags, 1266 &im_float_type); 1267 if (im_buf == NULL) 1268 goto done; 1269 1270 n_re_digits = strlen(re_buf); 1271 n_im_digits = strlen(im_buf); 1272 1273 /* Since there is no unicode version of PyOS_double_to_string, 1274 just use the 8 bit version and then convert to unicode. */ 1275 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits); 1276 if (re_unicode_tmp == NULL) 1277 goto done; 1278 i_re = 0; 1279 1280 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits); 1281 if (im_unicode_tmp == NULL) 1282 goto done; 1283 i_im = 0; 1284 1285 /* Is a sign character present in the output? If so, remember it 1286 and skip it */ 1287 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') { 1288 re_sign_char = '-'; 1289 ++i_re; 1290 --n_re_digits; 1291 } 1292 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') { 1293 im_sign_char = '-'; 1294 ++i_im; 1295 --n_im_digits; 1296 } 1297 1298 /* Determine if we have any "remainder" (after the digits, might include 1299 decimal or exponent or both (or neither)) */ 1300 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits, 1301 &n_re_remainder, &re_has_decimal); 1302 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits, 1303 &n_im_remainder, &im_has_decimal); 1304 1305 /* Determine the grouping, separator, and decimal point, if any. */ 1306 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : 1307 format->thousands_separators, 1308 &locale) == -1) 1309 goto done; 1310 1311 /* Turn off any padding. We'll do it later after we've composed 1312 the numbers without padding. */ 1313 tmp_format.fill_char = '\0'; 1314 tmp_format.align = '<'; 1315 tmp_format.width = -1; 1316 1317 /* Calculate how much memory we'll need. */ 1318 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp, 1319 i_re, i_re + n_re_digits, n_re_remainder, 1320 re_has_decimal, &locale, &tmp_format, 1321 &maxchar); 1322 if (n_re_total == -1) { 1323 goto done; 1324 } 1325 1326 /* Same formatting, but always include a sign, unless the real part is 1327 * going to be omitted, in which case we use whatever sign convention was 1328 * requested by the original format. */ 1329 if (!skip_re) 1330 tmp_format.sign = '+'; 1331 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp, 1332 i_im, i_im + n_im_digits, n_im_remainder, 1333 im_has_decimal, &locale, &tmp_format, 1334 &maxchar); 1335 if (n_im_total == -1) { 1336 goto done; 1337 } 1338 1339 if (skip_re) 1340 n_re_total = 0; 1341 1342 /* Add 1 for the 'j', and optionally 2 for parens. */ 1343 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2, 1344 format->width, format->align, &lpad, &rpad, &total); 1345 1346 if (lpad || rpad) 1347 maxchar = Py_MAX(maxchar, format->fill_char); 1348 1349 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) 1350 goto done; 1351 rkind = writer->kind; 1352 rdata = writer->data; 1353 1354 /* Populate the memory. First, the padding. */ 1355 result = fill_padding(writer, 1356 n_re_total + n_im_total + 1 + add_parens * 2, 1357 format->fill_char, lpad, rpad); 1358 if (result == -1) 1359 goto done; 1360 1361 if (add_parens) { 1362 PyUnicode_WRITE(rkind, rdata, writer->pos, '('); 1363 writer->pos++; 1364 } 1365 1366 if (!skip_re) { 1367 result = fill_number(writer, &re_spec, 1368 re_unicode_tmp, i_re, i_re + n_re_digits, 1369 NULL, 0, 1370 0, 1371 &locale, 0); 1372 if (result == -1) 1373 goto done; 1374 } 1375 result = fill_number(writer, &im_spec, 1376 im_unicode_tmp, i_im, i_im + n_im_digits, 1377 NULL, 0, 1378 0, 1379 &locale, 0); 1380 if (result == -1) 1381 goto done; 1382 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j'); 1383 writer->pos++; 1384 1385 if (add_parens) { 1386 PyUnicode_WRITE(rkind, rdata, writer->pos, ')'); 1387 writer->pos++; 1388 } 1389 1390 writer->pos += rpad; 1391 1392 done: 1393 PyMem_Free(re_buf); 1394 PyMem_Free(im_buf); 1395 Py_XDECREF(re_unicode_tmp); 1396 Py_XDECREF(im_unicode_tmp); 1397 free_locale_info(&locale); 1398 return result; 1399 } 1400 1401 /************************************************************************/ 1402 /*********** built in formatters ****************************************/ 1403 /************************************************************************/ 1404 static int 1405 format_obj(PyObject *obj, _PyUnicodeWriter *writer) 1406 { 1407 PyObject *str; 1408 int err; 1409 1410 str = PyObject_Str(obj); 1411 if (str == NULL) 1412 return -1; 1413 err = _PyUnicodeWriter_WriteStr(writer, str); 1414 Py_DECREF(str); 1415 return err; 1416 } 1417 1418 int 1419 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer, 1420 PyObject *obj, 1421 PyObject *format_spec, 1422 Py_ssize_t start, Py_ssize_t end) 1423 { 1424 InternalFormatSpec format; 1425 1426 assert(PyUnicode_Check(obj)); 1427 1428 /* check for the special case of zero length format spec, make 1429 it equivalent to str(obj) */ 1430 if (start == end) { 1431 if (PyUnicode_CheckExact(obj)) 1432 return _PyUnicodeWriter_WriteStr(writer, obj); 1433 else 1434 return format_obj(obj, writer); 1435 } 1436 1437 /* parse the format_spec */ 1438 if (!parse_internal_render_format_spec(format_spec, start, end, 1439 &format, 's', '<')) 1440 return -1; 1441 1442 /* type conversion? */ 1443 switch (format.type) { 1444 case 's': 1445 /* no type conversion needed, already a string. do the formatting */ 1446 return format_string_internal(obj, &format, writer); 1447 default: 1448 /* unknown */ 1449 unknown_presentation_type(format.type, obj->ob_type->tp_name); 1450 return -1; 1451 } 1452 } 1453 1454 int 1455 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer, 1456 PyObject *obj, 1457 PyObject *format_spec, 1458 Py_ssize_t start, Py_ssize_t end) 1459 { 1460 PyObject *tmp = NULL, *str = NULL; 1461 InternalFormatSpec format; 1462 int result = -1; 1463 1464 /* check for the special case of zero length format spec, make 1465 it equivalent to str(obj) */ 1466 if (start == end) { 1467 if (PyLong_CheckExact(obj)) 1468 return _PyLong_FormatWriter(writer, obj, 10, 0); 1469 else 1470 return format_obj(obj, writer); 1471 } 1472 1473 /* parse the format_spec */ 1474 if (!parse_internal_render_format_spec(format_spec, start, end, 1475 &format, 'd', '>')) 1476 goto done; 1477 1478 /* type conversion? */ 1479 switch (format.type) { 1480 case 'b': 1481 case 'c': 1482 case 'd': 1483 case 'o': 1484 case 'x': 1485 case 'X': 1486 case 'n': 1487 /* no type conversion needed, already an int. do the formatting */ 1488 result = format_long_internal(obj, &format, writer); 1489 break; 1490 1491 case 'e': 1492 case 'E': 1493 case 'f': 1494 case 'F': 1495 case 'g': 1496 case 'G': 1497 case '%': 1498 /* convert to float */ 1499 tmp = PyNumber_Float(obj); 1500 if (tmp == NULL) 1501 goto done; 1502 result = format_float_internal(tmp, &format, writer); 1503 break; 1504 1505 default: 1506 /* unknown */ 1507 unknown_presentation_type(format.type, obj->ob_type->tp_name); 1508 goto done; 1509 } 1510 1511 done: 1512 Py_XDECREF(tmp); 1513 Py_XDECREF(str); 1514 return result; 1515 } 1516 1517 int 1518 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer, 1519 PyObject *obj, 1520 PyObject *format_spec, 1521 Py_ssize_t start, Py_ssize_t end) 1522 { 1523 InternalFormatSpec format; 1524 1525 /* check for the special case of zero length format spec, make 1526 it equivalent to str(obj) */ 1527 if (start == end) 1528 return format_obj(obj, writer); 1529 1530 /* parse the format_spec */ 1531 if (!parse_internal_render_format_spec(format_spec, start, end, 1532 &format, '\0', '>')) 1533 return -1; 1534 1535 /* type conversion? */ 1536 switch (format.type) { 1537 case '\0': /* No format code: like 'g', but with at least one decimal. */ 1538 case 'e': 1539 case 'E': 1540 case 'f': 1541 case 'F': 1542 case 'g': 1543 case 'G': 1544 case 'n': 1545 case '%': 1546 /* no conversion, already a float. do the formatting */ 1547 return format_float_internal(obj, &format, writer); 1548 1549 default: 1550 /* unknown */ 1551 unknown_presentation_type(format.type, obj->ob_type->tp_name); 1552 return -1; 1553 } 1554 } 1555 1556 int 1557 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer, 1558 PyObject *obj, 1559 PyObject *format_spec, 1560 Py_ssize_t start, Py_ssize_t end) 1561 { 1562 InternalFormatSpec format; 1563 1564 /* check for the special case of zero length format spec, make 1565 it equivalent to str(obj) */ 1566 if (start == end) 1567 return format_obj(obj, writer); 1568 1569 /* parse the format_spec */ 1570 if (!parse_internal_render_format_spec(format_spec, start, end, 1571 &format, '\0', '>')) 1572 return -1; 1573 1574 /* type conversion? */ 1575 switch (format.type) { 1576 case '\0': /* No format code: like 'g', but with at least one decimal. */ 1577 case 'e': 1578 case 'E': 1579 case 'f': 1580 case 'F': 1581 case 'g': 1582 case 'G': 1583 case 'n': 1584 /* no conversion, already a complex. do the formatting */ 1585 return format_complex_internal(obj, &format, writer); 1586 1587 default: 1588 /* unknown */ 1589 unknown_presentation_type(format.type, obj->ob_type->tp_name); 1590 return -1; 1591 } 1592 } 1593