Home | History | Annotate | Download | only in Python
      1 /* implements the unicode (as opposed to string) version of the
      2    built-in formatters for string, int, float.  that is, the versions
      3    of int.__float__, etc., that take and return unicode objects */
      4 
      5 #include "Python.h"
      6 #include <locale.h>
      7 
      8 /* Raises an exception about an unknown presentation type for this
      9  * type. */
     10 
     11 static void
     12 unknown_presentation_type(Py_UCS4 presentation_type,
     13                           const char* type_name)
     14 {
     15     /* %c might be out-of-range, hence the two cases. */
     16     if (presentation_type > 32 && presentation_type < 128)
     17         PyErr_Format(PyExc_ValueError,
     18                      "Unknown format code '%c' "
     19                      "for object of type '%.200s'",
     20                      (char)presentation_type,
     21                      type_name);
     22     else
     23         PyErr_Format(PyExc_ValueError,
     24                      "Unknown format code '\\x%x' "
     25                      "for object of type '%.200s'",
     26                      (unsigned int)presentation_type,
     27                      type_name);
     28 }
     29 
     30 static void
     31 invalid_comma_type(Py_UCS4 presentation_type)
     32 {
     33     if (presentation_type > 32 && presentation_type < 128)
     34         PyErr_Format(PyExc_ValueError,
     35                      "Cannot specify ',' or '_' with '%c'.",
     36                      (char)presentation_type);
     37     else
     38         PyErr_Format(PyExc_ValueError,
     39                      "Cannot specify ',' or '_' with '\\x%x'.",
     40                      (unsigned int)presentation_type);
     41 }
     42 
     43 static void
     44 invalid_comma_and_underscore(void)
     45 {
     46     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
     47 }
     48 
     49 /*
     50     get_integer consumes 0 or more decimal digit characters from an
     51     input string, updates *result with the corresponding positive
     52     integer, and returns the number of digits consumed.
     53 
     54     returns -1 on error.
     55 */
     56 static int
     57 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
     58                   Py_ssize_t *result)
     59 {
     60     Py_ssize_t accumulator, digitval, pos = *ppos;
     61     int numdigits;
     62     int kind = PyUnicode_KIND(str);
     63     void *data = PyUnicode_DATA(str);
     64 
     65     accumulator = numdigits = 0;
     66     for (; pos < end; pos++, numdigits++) {
     67         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
     68         if (digitval < 0)
     69             break;
     70         /*
     71            Detect possible overflow before it happens:
     72 
     73               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
     74               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
     75         */
     76         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
     77             PyErr_Format(PyExc_ValueError,
     78                          "Too many decimal digits in format string");
     79             *ppos = pos;
     80             return -1;
     81         }
     82         accumulator = accumulator * 10 + digitval;
     83     }
     84     *ppos = pos;
     85     *result = accumulator;
     86     return numdigits;
     87 }
     88 
     89 /************************************************************************/
     90 /*********** standard format specifier parsing **************************/
     91 /************************************************************************/
     92 
     93 /* returns true if this character is a specifier alignment token */
     94 Py_LOCAL_INLINE(int)
     95 is_alignment_token(Py_UCS4 c)
     96 {
     97     switch (c) {
     98     case '<': case '>': case '=': case '^':
     99         return 1;
    100     default:
    101         return 0;
    102     }
    103 }
    104 
    105 /* returns true if this character is a sign element */
    106 Py_LOCAL_INLINE(int)
    107 is_sign_element(Py_UCS4 c)
    108 {
    109     switch (c) {
    110     case ' ': case '+': case '-':
    111         return 1;
    112     default:
    113         return 0;
    114     }
    115 }
    116 
    117 /* Locale type codes. LT_NO_LOCALE must be zero. */
    118 enum LocaleType {
    119     LT_NO_LOCALE = 0,
    120     LT_DEFAULT_LOCALE,
    121     LT_UNDERSCORE_LOCALE,
    122     LT_UNDER_FOUR_LOCALE,
    123     LT_CURRENT_LOCALE
    124 };
    125 
    126 typedef struct {
    127     Py_UCS4 fill_char;
    128     Py_UCS4 align;
    129     int alternate;
    130     Py_UCS4 sign;
    131     Py_ssize_t width;
    132     enum LocaleType thousands_separators;
    133     Py_ssize_t precision;
    134     Py_UCS4 type;
    135 } InternalFormatSpec;
    136 
    137 #if 0
    138 /* Occasionally useful for debugging. Should normally be commented out. */
    139 static void
    140 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
    141 {
    142     printf("internal format spec: fill_char %d\n", format->fill_char);
    143     printf("internal format spec: align %d\n", format->align);
    144     printf("internal format spec: alternate %d\n", format->alternate);
    145     printf("internal format spec: sign %d\n", format->sign);
    146     printf("internal format spec: width %zd\n", format->width);
    147     printf("internal format spec: thousands_separators %d\n",
    148            format->thousands_separators);
    149     printf("internal format spec: precision %zd\n", format->precision);
    150     printf("internal format spec: type %c\n", format->type);
    151     printf("\n");
    152 }
    153 #endif
    154 
    155 
    156 /*
    157   ptr points to the start of the format_spec, end points just past its end.
    158   fills in format with the parsed information.
    159   returns 1 on success, 0 on failure.
    160   if failure, sets the exception
    161 */
    162 static int
    163 parse_internal_render_format_spec(PyObject *format_spec,
    164                                   Py_ssize_t start, Py_ssize_t end,
    165                                   InternalFormatSpec *format,
    166                                   char default_type,
    167                                   char default_align)
    168 {
    169     Py_ssize_t pos = start;
    170     int kind = PyUnicode_KIND(format_spec);
    171     void *data = PyUnicode_DATA(format_spec);
    172     /* end-pos is used throughout this code to specify the length of
    173        the input string */
    174 #define READ_spec(index) PyUnicode_READ(kind, data, index)
    175 
    176     Py_ssize_t consumed;
    177     int align_specified = 0;
    178     int fill_char_specified = 0;
    179 
    180     format->fill_char = ' ';
    181     format->align = default_align;
    182     format->alternate = 0;
    183     format->sign = '\0';
    184     format->width = -1;
    185     format->thousands_separators = LT_NO_LOCALE;
    186     format->precision = -1;
    187     format->type = default_type;
    188 
    189     /* If the second char is an alignment token,
    190        then parse the fill char */
    191     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
    192         format->align = READ_spec(pos+1);
    193         format->fill_char = READ_spec(pos);
    194         fill_char_specified = 1;
    195         align_specified = 1;
    196         pos += 2;
    197     }
    198     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
    199         format->align = READ_spec(pos);
    200         align_specified = 1;
    201         ++pos;
    202     }
    203 
    204     /* Parse the various sign options */
    205     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
    206         format->sign = READ_spec(pos);
    207         ++pos;
    208     }
    209 
    210     /* If the next character is #, we're in alternate mode.  This only
    211        applies to integers. */
    212     if (end-pos >= 1 && READ_spec(pos) == '#') {
    213         format->alternate = 1;
    214         ++pos;
    215     }
    216 
    217     /* The special case for 0-padding (backwards compat) */
    218     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
    219         format->fill_char = '0';
    220         if (!align_specified) {
    221             format->align = '=';
    222         }
    223         ++pos;
    224     }
    225 
    226     consumed = get_integer(format_spec, &pos, end, &format->width);
    227     if (consumed == -1)
    228         /* Overflow error. Exception already set. */
    229         return 0;
    230 
    231     /* If consumed is 0, we didn't consume any characters for the
    232        width. In that case, reset the width to -1, because
    233        get_integer() will have set it to zero. -1 is how we record
    234        that the width wasn't specified. */
    235     if (consumed == 0)
    236         format->width = -1;
    237 
    238     /* Comma signifies add thousands separators */
    239     if (end-pos && READ_spec(pos) == ',') {
    240         format->thousands_separators = LT_DEFAULT_LOCALE;
    241         ++pos;
    242     }
    243     /* Underscore signifies add thousands separators */
    244     if (end-pos && READ_spec(pos) == '_') {
    245         if (format->thousands_separators != LT_NO_LOCALE) {
    246             invalid_comma_and_underscore();
    247             return 0;
    248         }
    249         format->thousands_separators = LT_UNDERSCORE_LOCALE;
    250         ++pos;
    251     }
    252     if (end-pos && READ_spec(pos) == ',') {
    253         invalid_comma_and_underscore();
    254         return 0;
    255     }
    256 
    257     /* Parse field precision */
    258     if (end-pos && READ_spec(pos) == '.') {
    259         ++pos;
    260 
    261         consumed = get_integer(format_spec, &pos, end, &format->precision);
    262         if (consumed == -1)
    263             /* Overflow error. Exception already set. */
    264             return 0;
    265 
    266         /* Not having a precision after a dot is an error. */
    267         if (consumed == 0) {
    268             PyErr_Format(PyExc_ValueError,
    269                          "Format specifier missing precision");
    270             return 0;
    271         }
    272 
    273     }
    274 
    275     /* Finally, parse the type field. */
    276 
    277     if (end-pos > 1) {
    278         /* More than one char remain, invalid format specifier. */
    279         PyErr_Format(PyExc_ValueError, "Invalid format specifier");
    280         return 0;
    281     }
    282 
    283     if (end-pos == 1) {
    284         format->type = READ_spec(pos);
    285         ++pos;
    286     }
    287 
    288     /* Do as much validating as we can, just by looking at the format
    289        specifier.  Do not take into account what type of formatting
    290        we're doing (int, float, string). */
    291 
    292     if (format->thousands_separators) {
    293         switch (format->type) {
    294         case 'd':
    295         case 'e':
    296         case 'f':
    297         case 'g':
    298         case 'E':
    299         case 'G':
    300         case '%':
    301         case 'F':
    302         case '\0':
    303             /* These are allowed. See PEP 378.*/
    304             break;
    305         case 'b':
    306         case 'o':
    307         case 'x':
    308         case 'X':
    309             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
    310             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
    311                 /* Every four digits, not every three, in bin/oct/hex. */
    312                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
    313                 break;
    314             }
    315         default:
    316             invalid_comma_type(format->type);
    317             return 0;
    318         }
    319     }
    320 
    321     assert (format->align <= 127);
    322     assert (format->sign <= 127);
    323     return 1;
    324 }
    325 
    326 /* Calculate the padding needed. */
    327 static void
    328 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
    329              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
    330              Py_ssize_t *n_total)
    331 {
    332     if (width >= 0) {
    333         if (nchars > width)
    334             *n_total = nchars;
    335         else
    336             *n_total = width;
    337     }
    338     else {
    339         /* not specified, use all of the chars and no more */
    340         *n_total = nchars;
    341     }
    342 
    343     /* Figure out how much leading space we need, based on the
    344        aligning */
    345     if (align == '>')
    346         *n_lpadding = *n_total - nchars;
    347     else if (align == '^')
    348         *n_lpadding = (*n_total - nchars) / 2;
    349     else if (align == '<' || align == '=')
    350         *n_lpadding = 0;
    351     else {
    352         /* We should never have an unspecified alignment. */
    353         *n_lpadding = 0;
    354         assert(0);
    355     }
    356 
    357     *n_rpadding = *n_total - nchars - *n_lpadding;
    358 }
    359 
    360 /* Do the padding, and return a pointer to where the caller-supplied
    361    content goes. */
    362 static int
    363 fill_padding(_PyUnicodeWriter *writer,
    364              Py_ssize_t nchars,
    365              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
    366              Py_ssize_t n_rpadding)
    367 {
    368     Py_ssize_t pos;
    369 
    370     /* Pad on left. */
    371     if (n_lpadding) {
    372         pos = writer->pos;
    373         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
    374     }
    375 
    376     /* Pad on right. */
    377     if (n_rpadding) {
    378         pos = writer->pos + nchars + n_lpadding;
    379         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
    380     }
    381 
    382     /* Pointer to the user content. */
    383     writer->pos += n_lpadding;
    384     return 0;
    385 }
    386 
    387 /************************************************************************/
    388 /*********** common routines for numeric formatting *********************/
    389 /************************************************************************/
    390 
    391 /* Locale info needed for formatting integers and the part of floats
    392    before and including the decimal. Note that locales only support
    393    8-bit chars, not unicode. */
    394 typedef struct {
    395     PyObject *decimal_point;
    396     PyObject *thousands_sep;
    397     const char *grouping;
    398 } LocaleInfo;
    399 
    400 #define STATIC_LOCALE_INFO_INIT {0, 0, 0}
    401 
    402 /* describes the layout for an integer, see the comment in
    403    calc_number_widths() for details */
    404 typedef struct {
    405     Py_ssize_t n_lpadding;
    406     Py_ssize_t n_prefix;
    407     Py_ssize_t n_spadding;
    408     Py_ssize_t n_rpadding;
    409     char sign;
    410     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
    411     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
    412                                     any grouping chars. */
    413     Py_ssize_t n_decimal;   /* 0 if only an integer */
    414     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
    415                                excluding the decimal itself, if
    416                                present. */
    417 
    418     /* These 2 are not the widths of fields, but are needed by
    419        STRINGLIB_GROUPING. */
    420     Py_ssize_t n_digits;    /* The number of digits before a decimal
    421                                or exponent. */
    422     Py_ssize_t n_min_width; /* The min_width we used when we computed
    423                                the n_grouped_digits width. */
    424 } NumberFieldWidths;
    425 
    426 
    427 /* Given a number of the form:
    428    digits[remainder]
    429    where ptr points to the start and end points to the end, find where
    430     the integer part ends. This could be a decimal, an exponent, both,
    431     or neither.
    432    If a decimal point is present, set *has_decimal and increment
    433     remainder beyond it.
    434    Results are undefined (but shouldn't crash) for improperly
    435     formatted strings.
    436 */
    437 static void
    438 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
    439              Py_ssize_t *n_remainder, int *has_decimal)
    440 {
    441     Py_ssize_t remainder;
    442     int kind = PyUnicode_KIND(s);
    443     void *data = PyUnicode_DATA(s);
    444 
    445     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
    446         ++pos;
    447     remainder = pos;
    448 
    449     /* Does remainder start with a decimal point? */
    450     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
    451 
    452     /* Skip the decimal point. */
    453     if (*has_decimal)
    454         remainder++;
    455 
    456     *n_remainder = end - remainder;
    457 }
    458 
    459 /* not all fields of format are used.  for example, precision is
    460    unused.  should this take discrete params in order to be more clear
    461    about what it does?  or is passing a single format parameter easier
    462    and more efficient enough to justify a little obfuscation? */
    463 static Py_ssize_t
    464 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
    465                    Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
    466                    Py_ssize_t n_end, Py_ssize_t n_remainder,
    467                    int has_decimal, const LocaleInfo *locale,
    468                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
    469 {
    470     Py_ssize_t n_non_digit_non_padding;
    471     Py_ssize_t n_padding;
    472 
    473     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
    474     spec->n_lpadding = 0;
    475     spec->n_prefix = n_prefix;
    476     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
    477     spec->n_remainder = n_remainder;
    478     spec->n_spadding = 0;
    479     spec->n_rpadding = 0;
    480     spec->sign = '\0';
    481     spec->n_sign = 0;
    482 
    483     /* the output will look like:
    484        |                                                                                         |
    485        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
    486        |                                                                                         |
    487 
    488        sign is computed from format->sign and the actual
    489        sign of the number
    490 
    491        prefix is given (it's for the '0x' prefix)
    492 
    493        digits is already known
    494 
    495        the total width is either given, or computed from the
    496        actual digits
    497 
    498        only one of lpadding, spadding, and rpadding can be non-zero,
    499        and it's calculated from the width and other fields
    500     */
    501 
    502     /* compute the various parts we're going to write */
    503     switch (format->sign) {
    504     case '+':
    505         /* always put a + or - */
    506         spec->n_sign = 1;
    507         spec->sign = (sign_char == '-' ? '-' : '+');
    508         break;
    509     case ' ':
    510         spec->n_sign = 1;
    511         spec->sign = (sign_char == '-' ? '-' : ' ');
    512         break;
    513     default:
    514         /* Not specified, or the default (-) */
    515         if (sign_char == '-') {
    516             spec->n_sign = 1;
    517             spec->sign = '-';
    518         }
    519     }
    520 
    521     /* The number of chars used for non-digits and non-padding. */
    522     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
    523         spec->n_remainder;
    524 
    525     /* min_width can go negative, that's okay. format->width == -1 means
    526        we don't care. */
    527     if (format->fill_char == '0' && format->align == '=')
    528         spec->n_min_width = format->width - n_non_digit_non_padding;
    529     else
    530         spec->n_min_width = 0;
    531 
    532     if (spec->n_digits == 0)
    533         /* This case only occurs when using 'c' formatting, we need
    534            to special case it because the grouping code always wants
    535            to have at least one character. */
    536         spec->n_grouped_digits = 0;
    537     else {
    538         Py_UCS4 grouping_maxchar;
    539         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
    540             NULL, 0,
    541             0, NULL,
    542             spec->n_digits, spec->n_min_width,
    543             locale->grouping, locale->thousands_sep, &grouping_maxchar);
    544         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
    545     }
    546 
    547     /* Given the desired width and the total of digit and non-digit
    548        space we consume, see if we need any padding. format->width can
    549        be negative (meaning no padding), but this code still works in
    550        that case. */
    551     n_padding = format->width -
    552                         (n_non_digit_non_padding + spec->n_grouped_digits);
    553     if (n_padding > 0) {
    554         /* Some padding is needed. Determine if it's left, space, or right. */
    555         switch (format->align) {
    556         case '<':
    557             spec->n_rpadding = n_padding;
    558             break;
    559         case '^':
    560             spec->n_lpadding = n_padding / 2;
    561             spec->n_rpadding = n_padding - spec->n_lpadding;
    562             break;
    563         case '=':
    564             spec->n_spadding = n_padding;
    565             break;
    566         case '>':
    567             spec->n_lpadding = n_padding;
    568             break;
    569         default:
    570             /* Shouldn't get here, but treat it as '>' */
    571             spec->n_lpadding = n_padding;
    572             assert(0);
    573             break;
    574         }
    575     }
    576 
    577     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
    578         *maxchar = Py_MAX(*maxchar, format->fill_char);
    579 
    580     if (spec->n_decimal)
    581         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
    582 
    583     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
    584         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
    585         spec->n_remainder + spec->n_rpadding;
    586 }
    587 
    588 /* Fill in the digit parts of a numbers's string representation,
    589    as determined in calc_number_widths().
    590    Return -1 on error, or 0 on success. */
    591 static int
    592 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
    593             PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
    594             PyObject *prefix, Py_ssize_t p_start,
    595             Py_UCS4 fill_char,
    596             LocaleInfo *locale, int toupper)
    597 {
    598     /* Used to keep track of digits, decimal, and remainder. */
    599     Py_ssize_t d_pos = d_start;
    600     const unsigned int kind = writer->kind;
    601     const void *data = writer->data;
    602     Py_ssize_t r;
    603 
    604     if (spec->n_lpadding) {
    605         _PyUnicode_FastFill(writer->buffer,
    606                             writer->pos, spec->n_lpadding, fill_char);
    607         writer->pos += spec->n_lpadding;
    608     }
    609     if (spec->n_sign == 1) {
    610         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
    611         writer->pos++;
    612     }
    613     if (spec->n_prefix) {
    614         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
    615                                       prefix, p_start,
    616                                       spec->n_prefix);
    617         if (toupper) {
    618             Py_ssize_t t;
    619             for (t = 0; t < spec->n_prefix; t++) {
    620                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
    621                 c = Py_TOUPPER(c);
    622                 assert (c <= 127);
    623                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
    624             }
    625         }
    626         writer->pos += spec->n_prefix;
    627     }
    628     if (spec->n_spadding) {
    629         _PyUnicode_FastFill(writer->buffer,
    630                             writer->pos, spec->n_spadding, fill_char);
    631         writer->pos += spec->n_spadding;
    632     }
    633 
    634     /* Only for type 'c' special case, it has no digits. */
    635     if (spec->n_digits != 0) {
    636         /* Fill the digits with InsertThousandsGrouping. */
    637         char *pdigits;
    638         if (PyUnicode_READY(digits))
    639             return -1;
    640         pdigits = PyUnicode_DATA(digits);
    641         if (PyUnicode_KIND(digits) < kind) {
    642             pdigits = _PyUnicode_AsKind(digits, kind);
    643             if (pdigits == NULL)
    644                 return -1;
    645         }
    646         r = _PyUnicode_InsertThousandsGrouping(
    647                 writer->buffer, writer->pos,
    648                 spec->n_grouped_digits,
    649                 pdigits + kind * d_pos,
    650                 spec->n_digits, spec->n_min_width,
    651                 locale->grouping, locale->thousands_sep, NULL);
    652         if (r == -1)
    653             return -1;
    654         assert(r == spec->n_grouped_digits);
    655         if (PyUnicode_KIND(digits) < kind)
    656             PyMem_Free(pdigits);
    657         d_pos += spec->n_digits;
    658     }
    659     if (toupper) {
    660         Py_ssize_t t;
    661         for (t = 0; t < spec->n_grouped_digits; t++) {
    662             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
    663             c = Py_TOUPPER(c);
    664             if (c > 127) {
    665                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
    666                 return -1;
    667             }
    668             PyUnicode_WRITE(kind, data, writer->pos + t, c);
    669         }
    670     }
    671     writer->pos += spec->n_grouped_digits;
    672 
    673     if (spec->n_decimal) {
    674         _PyUnicode_FastCopyCharacters(
    675             writer->buffer, writer->pos,
    676             locale->decimal_point, 0, spec->n_decimal);
    677         writer->pos += spec->n_decimal;
    678         d_pos += 1;
    679     }
    680 
    681     if (spec->n_remainder) {
    682         _PyUnicode_FastCopyCharacters(
    683             writer->buffer, writer->pos,
    684             digits, d_pos, spec->n_remainder);
    685         writer->pos += spec->n_remainder;
    686         /* d_pos += spec->n_remainder; */
    687     }
    688 
    689     if (spec->n_rpadding) {
    690         _PyUnicode_FastFill(writer->buffer,
    691                             writer->pos, spec->n_rpadding,
    692                             fill_char);
    693         writer->pos += spec->n_rpadding;
    694     }
    695     return 0;
    696 }
    697 
    698 static const char no_grouping[1] = {CHAR_MAX};
    699 
    700 /* Find the decimal point character(s?), thousands_separator(s?), and
    701    grouping description, either for the current locale if type is
    702    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
    703    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
    704 static int
    705 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
    706 {
    707     switch (type) {
    708     case LT_CURRENT_LOCALE: {
    709         struct lconv *locale_data = localeconv();
    710         locale_info->decimal_point = PyUnicode_DecodeLocale(
    711                                          locale_data->decimal_point,
    712                                          NULL);
    713         if (locale_info->decimal_point == NULL)
    714             return -1;
    715         locale_info->thousands_sep = PyUnicode_DecodeLocale(
    716                                          locale_data->thousands_sep,
    717                                          NULL);
    718         if (locale_info->thousands_sep == NULL)
    719             return -1;
    720         locale_info->grouping = locale_data->grouping;
    721         break;
    722     }
    723     case LT_DEFAULT_LOCALE:
    724     case LT_UNDERSCORE_LOCALE:
    725     case LT_UNDER_FOUR_LOCALE:
    726         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
    727         locale_info->thousands_sep = PyUnicode_FromOrdinal(
    728             type == LT_DEFAULT_LOCALE ? ',' : '_');
    729         if (!locale_info->decimal_point || !locale_info->thousands_sep)
    730             return -1;
    731         if (type != LT_UNDER_FOUR_LOCALE)
    732             locale_info->grouping = "\3"; /* Group every 3 characters.  The
    733                                          (implicit) trailing 0 means repeat
    734                                          infinitely. */
    735         else
    736             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
    737         break;
    738     case LT_NO_LOCALE:
    739         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
    740         locale_info->thousands_sep = PyUnicode_New(0, 0);
    741         if (!locale_info->decimal_point || !locale_info->thousands_sep)
    742             return -1;
    743         locale_info->grouping = no_grouping;
    744         break;
    745     }
    746     return 0;
    747 }
    748 
    749 static void
    750 free_locale_info(LocaleInfo *locale_info)
    751 {
    752     Py_XDECREF(locale_info->decimal_point);
    753     Py_XDECREF(locale_info->thousands_sep);
    754 }
    755 
    756 /************************************************************************/
    757 /*********** string formatting ******************************************/
    758 /************************************************************************/
    759 
    760 static int
    761 format_string_internal(PyObject *value, const InternalFormatSpec *format,
    762                        _PyUnicodeWriter *writer)
    763 {
    764     Py_ssize_t lpad;
    765     Py_ssize_t rpad;
    766     Py_ssize_t total;
    767     Py_ssize_t len;
    768     int result = -1;
    769     Py_UCS4 maxchar;
    770 
    771     assert(PyUnicode_IS_READY(value));
    772     len = PyUnicode_GET_LENGTH(value);
    773 
    774     /* sign is not allowed on strings */
    775     if (format->sign != '\0') {
    776         PyErr_SetString(PyExc_ValueError,
    777                         "Sign not allowed in string format specifier");
    778         goto done;
    779     }
    780 
    781     /* alternate is not allowed on strings */
    782     if (format->alternate) {
    783         PyErr_SetString(PyExc_ValueError,
    784                         "Alternate form (#) not allowed in string format "
    785                         "specifier");
    786         goto done;
    787     }
    788 
    789     /* '=' alignment not allowed on strings */
    790     if (format->align == '=') {
    791         PyErr_SetString(PyExc_ValueError,
    792                         "'=' alignment not allowed "
    793                         "in string format specifier");
    794         goto done;
    795     }
    796 
    797     if ((format->width == -1 || format->width <= len)
    798         && (format->precision == -1 || format->precision >= len)) {
    799         /* Fast path */
    800         return _PyUnicodeWriter_WriteStr(writer, value);
    801     }
    802 
    803     /* if precision is specified, output no more that format.precision
    804        characters */
    805     if (format->precision >= 0 && len >= format->precision) {
    806         len = format->precision;
    807     }
    808 
    809     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
    810 
    811     maxchar = writer->maxchar;
    812     if (lpad != 0 || rpad != 0)
    813         maxchar = Py_MAX(maxchar, format->fill_char);
    814     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
    815         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
    816         maxchar = Py_MAX(maxchar, valmaxchar);
    817     }
    818 
    819     /* allocate the resulting string */
    820     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
    821         goto done;
    822 
    823     /* Write into that space. First the padding. */
    824     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
    825     if (result == -1)
    826         goto done;
    827 
    828     /* Then the source string. */
    829     if (len) {
    830         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
    831                                       value, 0, len);
    832     }
    833     writer->pos += (len + rpad);
    834     result = 0;
    835 
    836 done:
    837     return result;
    838 }
    839 
    840 
    841 /************************************************************************/
    842 /*********** long formatting ********************************************/
    843 /************************************************************************/
    844 
    845 static int
    846 format_long_internal(PyObject *value, const InternalFormatSpec *format,
    847                      _PyUnicodeWriter *writer)
    848 {
    849     int result = -1;
    850     Py_UCS4 maxchar = 127;
    851     PyObject *tmp = NULL;
    852     Py_ssize_t inumeric_chars;
    853     Py_UCS4 sign_char = '\0';
    854     Py_ssize_t n_digits;       /* count of digits need from the computed
    855                                   string */
    856     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
    857                                    produces non-digits */
    858     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
    859     Py_ssize_t n_total;
    860     Py_ssize_t prefix = 0;
    861     NumberFieldWidths spec;
    862     long x;
    863 
    864     /* Locale settings, either from the actual locale or
    865        from a hard-code pseudo-locale */
    866     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
    867 
    868     /* no precision allowed on integers */
    869     if (format->precision != -1) {
    870         PyErr_SetString(PyExc_ValueError,
    871                         "Precision not allowed in integer format specifier");
    872         goto done;
    873     }
    874 
    875     /* special case for character formatting */
    876     if (format->type == 'c') {
    877         /* error to specify a sign */
    878         if (format->sign != '\0') {
    879             PyErr_SetString(PyExc_ValueError,
    880                             "Sign not allowed with integer"
    881                             " format specifier 'c'");
    882             goto done;
    883         }
    884         /* error to request alternate format */
    885         if (format->alternate) {
    886             PyErr_SetString(PyExc_ValueError,
    887                             "Alternate form (#) not allowed with integer"
    888                             " format specifier 'c'");
    889             goto done;
    890         }
    891 
    892         /* taken from unicodeobject.c formatchar() */
    893         /* Integer input truncated to a character */
    894         x = PyLong_AsLong(value);
    895         if (x == -1 && PyErr_Occurred())
    896             goto done;
    897         if (x < 0 || x > 0x10ffff) {
    898             PyErr_SetString(PyExc_OverflowError,
    899                             "%c arg not in range(0x110000)");
    900             goto done;
    901         }
    902         tmp = PyUnicode_FromOrdinal(x);
    903         inumeric_chars = 0;
    904         n_digits = 1;
    905         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
    906 
    907         /* As a sort-of hack, we tell calc_number_widths that we only
    908            have "remainder" characters. calc_number_widths thinks
    909            these are characters that don't get formatted, only copied
    910            into the output string. We do this for 'c' formatting,
    911            because the characters are likely to be non-digits. */
    912         n_remainder = 1;
    913     }
    914     else {
    915         int base;
    916         int leading_chars_to_skip = 0;  /* Number of characters added by
    917                                            PyNumber_ToBase that we want to
    918                                            skip over. */
    919 
    920         /* Compute the base and how many characters will be added by
    921            PyNumber_ToBase */
    922         switch (format->type) {
    923         case 'b':
    924             base = 2;
    925             leading_chars_to_skip = 2; /* 0b */
    926             break;
    927         case 'o':
    928             base = 8;
    929             leading_chars_to_skip = 2; /* 0o */
    930             break;
    931         case 'x':
    932         case 'X':
    933             base = 16;
    934             leading_chars_to_skip = 2; /* 0x */
    935             break;
    936         default:  /* shouldn't be needed, but stops a compiler warning */
    937         case 'd':
    938         case 'n':
    939             base = 10;
    940             break;
    941         }
    942 
    943         if (format->sign != '+' && format->sign != ' '
    944             && format->width == -1
    945             && format->type != 'X' && format->type != 'n'
    946             && !format->thousands_separators
    947             && PyLong_CheckExact(value))
    948         {
    949             /* Fast path */
    950             return _PyLong_FormatWriter(writer, value, base, format->alternate);
    951         }
    952 
    953         /* The number of prefix chars is the same as the leading
    954            chars to skip */
    955         if (format->alternate)
    956             n_prefix = leading_chars_to_skip;
    957 
    958         /* Do the hard part, converting to a string in a given base */
    959         tmp = _PyLong_Format(value, base);
    960         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
    961             goto done;
    962 
    963         inumeric_chars = 0;
    964         n_digits = PyUnicode_GET_LENGTH(tmp);
    965 
    966         prefix = inumeric_chars;
    967 
    968         /* Is a sign character present in the output?  If so, remember it
    969            and skip it */
    970         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
    971             sign_char = '-';
    972             ++prefix;
    973             ++leading_chars_to_skip;
    974         }
    975 
    976         /* Skip over the leading chars (0x, 0b, etc.) */
    977         n_digits -= leading_chars_to_skip;
    978         inumeric_chars += leading_chars_to_skip;
    979     }
    980 
    981     /* Determine the grouping, separator, and decimal point, if any. */
    982     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
    983                         format->thousands_separators,
    984                         &locale) == -1)
    985         goto done;
    986 
    987     /* Calculate how much memory we'll need. */
    988     n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
    989                                  inumeric_chars + n_digits, n_remainder, 0,
    990                                  &locale, format, &maxchar);
    991 
    992     /* Allocate the memory. */
    993     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
    994         goto done;
    995 
    996     /* Populate the memory. */
    997     result = fill_number(writer, &spec,
    998                          tmp, inumeric_chars, inumeric_chars + n_digits,
    999                          tmp, prefix, format->fill_char,
   1000                          &locale, format->type == 'X');
   1001 
   1002 done:
   1003     Py_XDECREF(tmp);
   1004     free_locale_info(&locale);
   1005     return result;
   1006 }
   1007 
   1008 /************************************************************************/
   1009 /*********** float formatting *******************************************/
   1010 /************************************************************************/
   1011 
   1012 /* much of this is taken from unicodeobject.c */
   1013 static int
   1014 format_float_internal(PyObject *value,
   1015                       const InternalFormatSpec *format,
   1016                       _PyUnicodeWriter *writer)
   1017 {
   1018     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
   1019     Py_ssize_t n_digits;
   1020     Py_ssize_t n_remainder;
   1021     Py_ssize_t n_total;
   1022     int has_decimal;
   1023     double val;
   1024     int precision, default_precision = 6;
   1025     Py_UCS4 type = format->type;
   1026     int add_pct = 0;
   1027     Py_ssize_t index;
   1028     NumberFieldWidths spec;
   1029     int flags = 0;
   1030     int result = -1;
   1031     Py_UCS4 maxchar = 127;
   1032     Py_UCS4 sign_char = '\0';
   1033     int float_type; /* Used to see if we have a nan, inf, or regular float. */
   1034     PyObject *unicode_tmp = NULL;
   1035 
   1036     /* Locale settings, either from the actual locale or
   1037        from a hard-code pseudo-locale */
   1038     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
   1039 
   1040     if (format->precision > INT_MAX) {
   1041         PyErr_SetString(PyExc_ValueError, "precision too big");
   1042         goto done;
   1043     }
   1044     precision = (int)format->precision;
   1045 
   1046     if (format->alternate)
   1047         flags |= Py_DTSF_ALT;
   1048 
   1049     if (type == '\0') {
   1050         /* Omitted type specifier.  Behaves in the same way as repr(x)
   1051            and str(x) if no precision is given, else like 'g', but with
   1052            at least one digit after the decimal point. */
   1053         flags |= Py_DTSF_ADD_DOT_0;
   1054         type = 'r';
   1055         default_precision = 0;
   1056     }
   1057 
   1058     if (type == 'n')
   1059         /* 'n' is the same as 'g', except for the locale used to
   1060            format the result. We take care of that later. */
   1061         type = 'g';
   1062 
   1063     val = PyFloat_AsDouble(value);
   1064     if (val == -1.0 && PyErr_Occurred())
   1065         goto done;
   1066 
   1067     if (type == '%') {
   1068         type = 'f';
   1069         val *= 100;
   1070         add_pct = 1;
   1071     }
   1072 
   1073     if (precision < 0)
   1074         precision = default_precision;
   1075     else if (type == 'r')
   1076         type = 'g';
   1077 
   1078     /* Cast "type", because if we're in unicode we need to pass an
   1079        8-bit char. This is safe, because we've restricted what "type"
   1080        can be. */
   1081     buf = PyOS_double_to_string(val, (char)type, precision, flags,
   1082                                 &float_type);
   1083     if (buf == NULL)
   1084         goto done;
   1085     n_digits = strlen(buf);
   1086 
   1087     if (add_pct) {
   1088         /* We know that buf has a trailing zero (since we just called
   1089            strlen() on it), and we don't use that fact any more. So we
   1090            can just write over the trailing zero. */
   1091         buf[n_digits] = '%';
   1092         n_digits += 1;
   1093     }
   1094 
   1095     if (format->sign != '+' && format->sign != ' '
   1096         && format->width == -1
   1097         && format->type != 'n'
   1098         && !format->thousands_separators)
   1099     {
   1100         /* Fast path */
   1101         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
   1102         PyMem_Free(buf);
   1103         return result;
   1104     }
   1105 
   1106     /* Since there is no unicode version of PyOS_double_to_string,
   1107        just use the 8 bit version and then convert to unicode. */
   1108     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
   1109     PyMem_Free(buf);
   1110     if (unicode_tmp == NULL)
   1111         goto done;
   1112 
   1113     /* Is a sign character present in the output?  If so, remember it
   1114        and skip it */
   1115     index = 0;
   1116     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
   1117         sign_char = '-';
   1118         ++index;
   1119         --n_digits;
   1120     }
   1121 
   1122     /* Determine if we have any "remainder" (after the digits, might include
   1123        decimal or exponent or both (or neither)) */
   1124     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
   1125 
   1126     /* Determine the grouping, separator, and decimal point, if any. */
   1127     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
   1128                         format->thousands_separators,
   1129                         &locale) == -1)
   1130         goto done;
   1131 
   1132     /* Calculate how much memory we'll need. */
   1133     n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
   1134                                  index + n_digits, n_remainder, has_decimal,
   1135                                  &locale, format, &maxchar);
   1136 
   1137     /* Allocate the memory. */
   1138     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
   1139         goto done;
   1140 
   1141     /* Populate the memory. */
   1142     result = fill_number(writer, &spec,
   1143                          unicode_tmp, index, index + n_digits,
   1144                          NULL, 0, format->fill_char,
   1145                          &locale, 0);
   1146 
   1147 done:
   1148     Py_XDECREF(unicode_tmp);
   1149     free_locale_info(&locale);
   1150     return result;
   1151 }
   1152 
   1153 /************************************************************************/
   1154 /*********** complex formatting *****************************************/
   1155 /************************************************************************/
   1156 
   1157 static int
   1158 format_complex_internal(PyObject *value,
   1159                         const InternalFormatSpec *format,
   1160                         _PyUnicodeWriter *writer)
   1161 {
   1162     double re;
   1163     double im;
   1164     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
   1165     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
   1166 
   1167     InternalFormatSpec tmp_format = *format;
   1168     Py_ssize_t n_re_digits;
   1169     Py_ssize_t n_im_digits;
   1170     Py_ssize_t n_re_remainder;
   1171     Py_ssize_t n_im_remainder;
   1172     Py_ssize_t n_re_total;
   1173     Py_ssize_t n_im_total;
   1174     int re_has_decimal;
   1175     int im_has_decimal;
   1176     int precision, default_precision = 6;
   1177     Py_UCS4 type = format->type;
   1178     Py_ssize_t i_re;
   1179     Py_ssize_t i_im;
   1180     NumberFieldWidths re_spec;
   1181     NumberFieldWidths im_spec;
   1182     int flags = 0;
   1183     int result = -1;
   1184     Py_UCS4 maxchar = 127;
   1185     enum PyUnicode_Kind rkind;
   1186     void *rdata;
   1187     Py_UCS4 re_sign_char = '\0';
   1188     Py_UCS4 im_sign_char = '\0';
   1189     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
   1190     int im_float_type;
   1191     int add_parens = 0;
   1192     int skip_re = 0;
   1193     Py_ssize_t lpad;
   1194     Py_ssize_t rpad;
   1195     Py_ssize_t total;
   1196     PyObject *re_unicode_tmp = NULL;
   1197     PyObject *im_unicode_tmp = NULL;
   1198 
   1199     /* Locale settings, either from the actual locale or
   1200        from a hard-code pseudo-locale */
   1201     LocaleInfo locale = STATIC_LOCALE_INFO_INIT;
   1202 
   1203     if (format->precision > INT_MAX) {
   1204         PyErr_SetString(PyExc_ValueError, "precision too big");
   1205         goto done;
   1206     }
   1207     precision = (int)format->precision;
   1208 
   1209     /* Zero padding is not allowed. */
   1210     if (format->fill_char == '0') {
   1211         PyErr_SetString(PyExc_ValueError,
   1212                         "Zero padding is not allowed in complex format "
   1213                         "specifier");
   1214         goto done;
   1215     }
   1216 
   1217     /* Neither is '=' alignment . */
   1218     if (format->align == '=') {
   1219         PyErr_SetString(PyExc_ValueError,
   1220                         "'=' alignment flag is not allowed in complex format "
   1221                         "specifier");
   1222         goto done;
   1223     }
   1224 
   1225     re = PyComplex_RealAsDouble(value);
   1226     if (re == -1.0 && PyErr_Occurred())
   1227         goto done;
   1228     im = PyComplex_ImagAsDouble(value);
   1229     if (im == -1.0 && PyErr_Occurred())
   1230         goto done;
   1231 
   1232     if (format->alternate)
   1233         flags |= Py_DTSF_ALT;
   1234 
   1235     if (type == '\0') {
   1236         /* Omitted type specifier. Should be like str(self). */
   1237         type = 'r';
   1238         default_precision = 0;
   1239         if (re == 0.0 && copysign(1.0, re) == 1.0)
   1240             skip_re = 1;
   1241         else
   1242             add_parens = 1;
   1243     }
   1244 
   1245     if (type == 'n')
   1246         /* 'n' is the same as 'g', except for the locale used to
   1247            format the result. We take care of that later. */
   1248         type = 'g';
   1249 
   1250     if (precision < 0)
   1251         precision = default_precision;
   1252     else if (type == 'r')
   1253         type = 'g';
   1254 
   1255     /* Cast "type", because if we're in unicode we need to pass an
   1256        8-bit char. This is safe, because we've restricted what "type"
   1257        can be. */
   1258     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
   1259                                    &re_float_type);
   1260     if (re_buf == NULL)
   1261         goto done;
   1262     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
   1263                                    &im_float_type);
   1264     if (im_buf == NULL)
   1265         goto done;
   1266 
   1267     n_re_digits = strlen(re_buf);
   1268     n_im_digits = strlen(im_buf);
   1269 
   1270     /* Since there is no unicode version of PyOS_double_to_string,
   1271        just use the 8 bit version and then convert to unicode. */
   1272     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
   1273     if (re_unicode_tmp == NULL)
   1274         goto done;
   1275     i_re = 0;
   1276 
   1277     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
   1278     if (im_unicode_tmp == NULL)
   1279         goto done;
   1280     i_im = 0;
   1281 
   1282     /* Is a sign character present in the output?  If so, remember it
   1283        and skip it */
   1284     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
   1285         re_sign_char = '-';
   1286         ++i_re;
   1287         --n_re_digits;
   1288     }
   1289     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
   1290         im_sign_char = '-';
   1291         ++i_im;
   1292         --n_im_digits;
   1293     }
   1294 
   1295     /* Determine if we have any "remainder" (after the digits, might include
   1296        decimal or exponent or both (or neither)) */
   1297     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
   1298                  &n_re_remainder, &re_has_decimal);
   1299     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
   1300                  &n_im_remainder, &im_has_decimal);
   1301 
   1302     /* Determine the grouping, separator, and decimal point, if any. */
   1303     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
   1304                         format->thousands_separators,
   1305                         &locale) == -1)
   1306         goto done;
   1307 
   1308     /* Turn off any padding. We'll do it later after we've composed
   1309        the numbers without padding. */
   1310     tmp_format.fill_char = '\0';
   1311     tmp_format.align = '<';
   1312     tmp_format.width = -1;
   1313 
   1314     /* Calculate how much memory we'll need. */
   1315     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
   1316                                     i_re, i_re + n_re_digits, n_re_remainder,
   1317                                     re_has_decimal, &locale, &tmp_format,
   1318                                     &maxchar);
   1319 
   1320     /* Same formatting, but always include a sign, unless the real part is
   1321      * going to be omitted, in which case we use whatever sign convention was
   1322      * requested by the original format. */
   1323     if (!skip_re)
   1324         tmp_format.sign = '+';
   1325     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
   1326                                     i_im, i_im + n_im_digits, n_im_remainder,
   1327                                     im_has_decimal, &locale, &tmp_format,
   1328                                     &maxchar);
   1329 
   1330     if (skip_re)
   1331         n_re_total = 0;
   1332 
   1333     /* Add 1 for the 'j', and optionally 2 for parens. */
   1334     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
   1335                  format->width, format->align, &lpad, &rpad, &total);
   1336 
   1337     if (lpad || rpad)
   1338         maxchar = Py_MAX(maxchar, format->fill_char);
   1339 
   1340     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
   1341         goto done;
   1342     rkind = writer->kind;
   1343     rdata = writer->data;
   1344 
   1345     /* Populate the memory. First, the padding. */
   1346     result = fill_padding(writer,
   1347                           n_re_total + n_im_total + 1 + add_parens * 2,
   1348                           format->fill_char, lpad, rpad);
   1349     if (result == -1)
   1350         goto done;
   1351 
   1352     if (add_parens) {
   1353         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
   1354         writer->pos++;
   1355     }
   1356 
   1357     if (!skip_re) {
   1358         result = fill_number(writer, &re_spec,
   1359                              re_unicode_tmp, i_re, i_re + n_re_digits,
   1360                              NULL, 0,
   1361                              0,
   1362                              &locale, 0);
   1363         if (result == -1)
   1364             goto done;
   1365     }
   1366     result = fill_number(writer, &im_spec,
   1367                          im_unicode_tmp, i_im, i_im + n_im_digits,
   1368                          NULL, 0,
   1369                          0,
   1370                          &locale, 0);
   1371     if (result == -1)
   1372         goto done;
   1373     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
   1374     writer->pos++;
   1375 
   1376     if (add_parens) {
   1377         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
   1378         writer->pos++;
   1379     }
   1380 
   1381     writer->pos += rpad;
   1382 
   1383 done:
   1384     PyMem_Free(re_buf);
   1385     PyMem_Free(im_buf);
   1386     Py_XDECREF(re_unicode_tmp);
   1387     Py_XDECREF(im_unicode_tmp);
   1388     free_locale_info(&locale);
   1389     return result;
   1390 }
   1391 
   1392 /************************************************************************/
   1393 /*********** built in formatters ****************************************/
   1394 /************************************************************************/
   1395 static int
   1396 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
   1397 {
   1398     PyObject *str;
   1399     int err;
   1400 
   1401     str = PyObject_Str(obj);
   1402     if (str == NULL)
   1403         return -1;
   1404     err = _PyUnicodeWriter_WriteStr(writer, str);
   1405     Py_DECREF(str);
   1406     return err;
   1407 }
   1408 
   1409 int
   1410 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
   1411                                 PyObject *obj,
   1412                                 PyObject *format_spec,
   1413                                 Py_ssize_t start, Py_ssize_t end)
   1414 {
   1415     InternalFormatSpec format;
   1416 
   1417     assert(PyUnicode_Check(obj));
   1418 
   1419     /* check for the special case of zero length format spec, make
   1420        it equivalent to str(obj) */
   1421     if (start == end) {
   1422         if (PyUnicode_CheckExact(obj))
   1423             return _PyUnicodeWriter_WriteStr(writer, obj);
   1424         else
   1425             return format_obj(obj, writer);
   1426     }
   1427 
   1428     /* parse the format_spec */
   1429     if (!parse_internal_render_format_spec(format_spec, start, end,
   1430                                            &format, 's', '<'))
   1431         return -1;
   1432 
   1433     /* type conversion? */
   1434     switch (format.type) {
   1435     case 's':
   1436         /* no type conversion needed, already a string.  do the formatting */
   1437         return format_string_internal(obj, &format, writer);
   1438     default:
   1439         /* unknown */
   1440         unknown_presentation_type(format.type, obj->ob_type->tp_name);
   1441         return -1;
   1442     }
   1443 }
   1444 
   1445 int
   1446 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
   1447                              PyObject *obj,
   1448                              PyObject *format_spec,
   1449                              Py_ssize_t start, Py_ssize_t end)
   1450 {
   1451     PyObject *tmp = NULL, *str = NULL;
   1452     InternalFormatSpec format;
   1453     int result = -1;
   1454 
   1455     /* check for the special case of zero length format spec, make
   1456        it equivalent to str(obj) */
   1457     if (start == end) {
   1458         if (PyLong_CheckExact(obj))
   1459             return _PyLong_FormatWriter(writer, obj, 10, 0);
   1460         else
   1461             return format_obj(obj, writer);
   1462     }
   1463 
   1464     /* parse the format_spec */
   1465     if (!parse_internal_render_format_spec(format_spec, start, end,
   1466                                            &format, 'd', '>'))
   1467         goto done;
   1468 
   1469     /* type conversion? */
   1470     switch (format.type) {
   1471     case 'b':
   1472     case 'c':
   1473     case 'd':
   1474     case 'o':
   1475     case 'x':
   1476     case 'X':
   1477     case 'n':
   1478         /* no type conversion needed, already an int.  do the formatting */
   1479         result = format_long_internal(obj, &format, writer);
   1480         break;
   1481 
   1482     case 'e':
   1483     case 'E':
   1484     case 'f':
   1485     case 'F':
   1486     case 'g':
   1487     case 'G':
   1488     case '%':
   1489         /* convert to float */
   1490         tmp = PyNumber_Float(obj);
   1491         if (tmp == NULL)
   1492             goto done;
   1493         result = format_float_internal(tmp, &format, writer);
   1494         break;
   1495 
   1496     default:
   1497         /* unknown */
   1498         unknown_presentation_type(format.type, obj->ob_type->tp_name);
   1499         goto done;
   1500     }
   1501 
   1502 done:
   1503     Py_XDECREF(tmp);
   1504     Py_XDECREF(str);
   1505     return result;
   1506 }
   1507 
   1508 int
   1509 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
   1510                               PyObject *obj,
   1511                               PyObject *format_spec,
   1512                               Py_ssize_t start, Py_ssize_t end)
   1513 {
   1514     InternalFormatSpec format;
   1515 
   1516     /* check for the special case of zero length format spec, make
   1517        it equivalent to str(obj) */
   1518     if (start == end)
   1519         return format_obj(obj, writer);
   1520 
   1521     /* parse the format_spec */
   1522     if (!parse_internal_render_format_spec(format_spec, start, end,
   1523                                            &format, '\0', '>'))
   1524         return -1;
   1525 
   1526     /* type conversion? */
   1527     switch (format.type) {
   1528     case '\0': /* No format code: like 'g', but with at least one decimal. */
   1529     case 'e':
   1530     case 'E':
   1531     case 'f':
   1532     case 'F':
   1533     case 'g':
   1534     case 'G':
   1535     case 'n':
   1536     case '%':
   1537         /* no conversion, already a float.  do the formatting */
   1538         return format_float_internal(obj, &format, writer);
   1539 
   1540     default:
   1541         /* unknown */
   1542         unknown_presentation_type(format.type, obj->ob_type->tp_name);
   1543         return -1;
   1544     }
   1545 }
   1546 
   1547 int
   1548 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
   1549                                 PyObject *obj,
   1550                                 PyObject *format_spec,
   1551                                 Py_ssize_t start, Py_ssize_t end)
   1552 {
   1553     InternalFormatSpec format;
   1554 
   1555     /* check for the special case of zero length format spec, make
   1556        it equivalent to str(obj) */
   1557     if (start == end)
   1558         return format_obj(obj, writer);
   1559 
   1560     /* parse the format_spec */
   1561     if (!parse_internal_render_format_spec(format_spec, start, end,
   1562                                            &format, '\0', '>'))
   1563         return -1;
   1564 
   1565     /* type conversion? */
   1566     switch (format.type) {
   1567     case '\0': /* No format code: like 'g', but with at least one decimal. */
   1568     case 'e':
   1569     case 'E':
   1570     case 'f':
   1571     case 'F':
   1572     case 'g':
   1573     case 'G':
   1574     case 'n':
   1575         /* no conversion, already a complex.  do the formatting */
   1576         return format_complex_internal(obj, &format, writer);
   1577 
   1578     default:
   1579         /* unknown */
   1580         unknown_presentation_type(format.type, obj->ob_type->tp_name);
   1581         return -1;
   1582     }
   1583 }
   1584