Home | History | Annotate | Download | only in stringlib
      1 /*
      2     string_format.h -- implementation of string.format().
      3 
      4     It uses the Objects/stringlib conventions, so that it can be
      5     compiled for both unicode and string objects.
      6 */
      7 
      8 
      9 /* Defines for Python 2.6 compatibility */
     10 #if PY_VERSION_HEX < 0x03000000
     11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
     12 #endif
     13 
     14 /* Defines for more efficiently reallocating the string buffer */
     15 #define INITIAL_SIZE_INCREMENT 100
     16 #define SIZE_MULTIPLIER 2
     17 #define MAX_SIZE_INCREMENT  3200
     18 
     19 
     20 /************************************************************************/
     21 /***********   Global data structures and forward declarations  *********/
     22 /************************************************************************/
     23 
     24 /*
     25    A SubString consists of the characters between two string or
     26    unicode pointers.
     27 */
     28 typedef struct {
     29     STRINGLIB_CHAR *ptr;
     30     STRINGLIB_CHAR *end;
     31 } SubString;
     32 
     33 
     34 typedef enum {
     35     ANS_INIT,
     36     ANS_AUTO,
     37     ANS_MANUAL
     38 } AutoNumberState;   /* Keep track if we're auto-numbering fields */
     39 
     40 /* Keeps track of our auto-numbering state, and which number field we're on */
     41 typedef struct {
     42     AutoNumberState an_state;
     43     int an_field_number;
     44 } AutoNumber;
     45 
     46 
     47 /* forward declaration for recursion */
     48 static PyObject *
     49 build_string(SubString *input, PyObject *args, PyObject *kwargs,
     50              int recursion_depth, AutoNumber *auto_number);
     51 
     52 
     53 
     54 /************************************************************************/
     55 /**************************  Utility  functions  ************************/
     56 /************************************************************************/
     57 
     58 static void
     59 AutoNumber_Init(AutoNumber *auto_number)
     60 {
     61     auto_number->an_state = ANS_INIT;
     62     auto_number->an_field_number = 0;
     63 }
     64 
     65 /* fill in a SubString from a pointer and length */
     66 Py_LOCAL_INLINE(void)
     67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
     68 {
     69     str->ptr = p;
     70     if (p == NULL)
     71         str->end = NULL;
     72     else
     73         str->end = str->ptr + len;
     74 }
     75 
     76 /* return a new string.  if str->ptr is NULL, return None */
     77 Py_LOCAL_INLINE(PyObject *)
     78 SubString_new_object(SubString *str)
     79 {
     80     if (str->ptr == NULL) {
     81         Py_INCREF(Py_None);
     82         return Py_None;
     83     }
     84     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
     85 }
     86 
     87 /* return a new string.  if str->ptr is NULL, return None */
     88 Py_LOCAL_INLINE(PyObject *)
     89 SubString_new_object_or_empty(SubString *str)
     90 {
     91     if (str->ptr == NULL) {
     92         return STRINGLIB_NEW(NULL, 0);
     93     }
     94     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
     95 }
     96 
     97 /* Return 1 if an error has been detected switching between automatic
     98    field numbering and manual field specification, else return 0. Set
     99    ValueError on error. */
    100 static int
    101 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
    102 {
    103     if (state == ANS_MANUAL) {
    104         if (field_name_is_empty) {
    105             PyErr_SetString(PyExc_ValueError, "cannot switch from "
    106                             "manual field specification to "
    107                             "automatic field numbering");
    108             return 1;
    109         }
    110     }
    111     else {
    112         if (!field_name_is_empty) {
    113             PyErr_SetString(PyExc_ValueError, "cannot switch from "
    114                             "automatic field numbering to "
    115                             "manual field specification");
    116             return 1;
    117         }
    118     }
    119     return 0;
    120 }
    121 
    122 
    123 /************************************************************************/
    124 /***********    Output string management functions       ****************/
    125 /************************************************************************/
    126 
    127 typedef struct {
    128     STRINGLIB_CHAR *ptr;
    129     STRINGLIB_CHAR *end;
    130     PyObject *obj;
    131     Py_ssize_t size_increment;
    132 } OutputString;
    133 
    134 /* initialize an OutputString object, reserving size characters */
    135 static int
    136 output_initialize(OutputString *output, Py_ssize_t size)
    137 {
    138     output->obj = STRINGLIB_NEW(NULL, size);
    139     if (output->obj == NULL)
    140         return 0;
    141 
    142     output->ptr = STRINGLIB_STR(output->obj);
    143     output->end = STRINGLIB_LEN(output->obj) + output->ptr;
    144     output->size_increment = INITIAL_SIZE_INCREMENT;
    145 
    146     return 1;
    147 }
    148 
    149 /*
    150     output_extend reallocates the output string buffer.
    151     It returns a status:  0 for a failed reallocation,
    152     1 for success.
    153 */
    154 
    155 static int
    156 output_extend(OutputString *output, Py_ssize_t count)
    157 {
    158     STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
    159     Py_ssize_t curlen = output->ptr - startptr;
    160     Py_ssize_t maxlen = curlen + count + output->size_increment;
    161 
    162     if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
    163         return 0;
    164     startptr = STRINGLIB_STR(output->obj);
    165     output->ptr = startptr + curlen;
    166     output->end = startptr + maxlen;
    167     if (output->size_increment < MAX_SIZE_INCREMENT)
    168         output->size_increment *= SIZE_MULTIPLIER;
    169     return 1;
    170 }
    171 
    172 /*
    173     output_data dumps characters into our output string
    174     buffer.
    175 
    176     In some cases, it has to reallocate the string.
    177 
    178     It returns a status:  0 for a failed reallocation,
    179     1 for success.
    180 */
    181 static int
    182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
    183 {
    184     if ((count > output->end - output->ptr) && !output_extend(output, count))
    185         return 0;
    186     memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
    187     output->ptr += count;
    188     return 1;
    189 }
    190 
    191 /************************************************************************/
    192 /***********  Format string parsing -- integers and identifiers *********/
    193 /************************************************************************/
    194 
    195 static Py_ssize_t
    196 get_integer(const SubString *str)
    197 {
    198     Py_ssize_t accumulator = 0;
    199     Py_ssize_t digitval;
    200     Py_ssize_t oldaccumulator;
    201     STRINGLIB_CHAR *p;
    202 
    203     /* empty string is an error */
    204     if (str->ptr >= str->end)
    205         return -1;
    206 
    207     for (p = str->ptr; p < str->end; p++) {
    208         digitval = STRINGLIB_TODECIMAL(*p);
    209         if (digitval < 0)
    210             return -1;
    211         /*
    212            This trick was copied from old Unicode format code.  It's cute,
    213            but would really suck on an old machine with a slow divide
    214            implementation.  Fortunately, in the normal case we do not
    215            expect too many digits.
    216         */
    217         oldaccumulator = accumulator;
    218         accumulator *= 10;
    219         if ((accumulator+10)/10 != oldaccumulator+1) {
    220             PyErr_Format(PyExc_ValueError,
    221                          "Too many decimal digits in format string");
    222             return -1;
    223         }
    224         accumulator += digitval;
    225     }
    226     return accumulator;
    227 }
    228 
    229 /************************************************************************/
    230 /******** Functions to get field objects and specification strings ******/
    231 /************************************************************************/
    232 
    233 /* do the equivalent of obj.name */
    234 static PyObject *
    235 getattr(PyObject *obj, SubString *name)
    236 {
    237     PyObject *newobj;
    238     PyObject *str = SubString_new_object(name);
    239     if (str == NULL)
    240         return NULL;
    241     newobj = PyObject_GetAttr(obj, str);
    242     Py_DECREF(str);
    243     return newobj;
    244 }
    245 
    246 /* do the equivalent of obj[idx], where obj is a sequence */
    247 static PyObject *
    248 getitem_sequence(PyObject *obj, Py_ssize_t idx)
    249 {
    250     return PySequence_GetItem(obj, idx);
    251 }
    252 
    253 /* do the equivalent of obj[idx], where obj is not a sequence */
    254 static PyObject *
    255 getitem_idx(PyObject *obj, Py_ssize_t idx)
    256 {
    257     PyObject *newobj;
    258     PyObject *idx_obj = PyLong_FromSsize_t(idx);
    259     if (idx_obj == NULL)
    260         return NULL;
    261     newobj = PyObject_GetItem(obj, idx_obj);
    262     Py_DECREF(idx_obj);
    263     return newobj;
    264 }
    265 
    266 /* do the equivalent of obj[name] */
    267 static PyObject *
    268 getitem_str(PyObject *obj, SubString *name)
    269 {
    270     PyObject *newobj;
    271     PyObject *str = SubString_new_object(name);
    272     if (str == NULL)
    273         return NULL;
    274     newobj = PyObject_GetItem(obj, str);
    275     Py_DECREF(str);
    276     return newobj;
    277 }
    278 
    279 typedef struct {
    280     /* the entire string we're parsing.  we assume that someone else
    281        is managing its lifetime, and that it will exist for the
    282        lifetime of the iterator.  can be empty */
    283     SubString str;
    284 
    285     /* pointer to where we are inside field_name */
    286     STRINGLIB_CHAR *ptr;
    287 } FieldNameIterator;
    288 
    289 
    290 static int
    291 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
    292                        Py_ssize_t len)
    293 {
    294     SubString_init(&self->str, ptr, len);
    295     self->ptr = self->str.ptr;
    296     return 1;
    297 }
    298 
    299 static int
    300 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
    301 {
    302     STRINGLIB_CHAR c;
    303 
    304     name->ptr = self->ptr;
    305 
    306     /* return everything until '.' or '[' */
    307     while (self->ptr < self->str.end) {
    308         switch (c = *self->ptr++) {
    309         case '[':
    310         case '.':
    311             /* backup so that we this character will be seen next time */
    312             self->ptr--;
    313             break;
    314         default:
    315             continue;
    316         }
    317         break;
    318     }
    319     /* end of string is okay */
    320     name->end = self->ptr;
    321     return 1;
    322 }
    323 
    324 static int
    325 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
    326 {
    327     int bracket_seen = 0;
    328     STRINGLIB_CHAR c;
    329 
    330     name->ptr = self->ptr;
    331 
    332     /* return everything until ']' */
    333     while (self->ptr < self->str.end) {
    334         switch (c = *self->ptr++) {
    335         case ']':
    336             bracket_seen = 1;
    337             break;
    338         default:
    339             continue;
    340         }
    341         break;
    342     }
    343     /* make sure we ended with a ']' */
    344     if (!bracket_seen) {
    345         PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
    346         return 0;
    347     }
    348 
    349     /* end of string is okay */
    350     /* don't include the ']' */
    351     name->end = self->ptr-1;
    352     return 1;
    353 }
    354 
    355 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
    356 static int
    357 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
    358                        Py_ssize_t *name_idx, SubString *name)
    359 {
    360     /* check at end of input */
    361     if (self->ptr >= self->str.end)
    362         return 1;
    363 
    364     switch (*self->ptr++) {
    365     case '.':
    366         *is_attribute = 1;
    367         if (_FieldNameIterator_attr(self, name) == 0)
    368             return 0;
    369         *name_idx = -1;
    370         break;
    371     case '[':
    372         *is_attribute = 0;
    373         if (_FieldNameIterator_item(self, name) == 0)
    374             return 0;
    375         *name_idx = get_integer(name);
    376         if (*name_idx == -1 && PyErr_Occurred())
    377             return 0;
    378         break;
    379     default:
    380         /* Invalid character follows ']' */
    381         PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
    382                         "follow ']' in format field specifier");
    383         return 0;
    384     }
    385 
    386     /* empty string is an error */
    387     if (name->ptr == name->end) {
    388         PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
    389         return 0;
    390     }
    391 
    392     return 2;
    393 }
    394 
    395 
    396 /* input: field_name
    397    output: 'first' points to the part before the first '[' or '.'
    398            'first_idx' is -1 if 'first' is not an integer, otherwise
    399                        it's the value of first converted to an integer
    400            'rest' is an iterator to return the rest
    401 */
    402 static int
    403 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
    404                  Py_ssize_t *first_idx, FieldNameIterator *rest,
    405                  AutoNumber *auto_number)
    406 {
    407     STRINGLIB_CHAR c;
    408     STRINGLIB_CHAR *p = ptr;
    409     STRINGLIB_CHAR *end = ptr + len;
    410     int field_name_is_empty;
    411     int using_numeric_index;
    412 
    413     /* find the part up until the first '.' or '[' */
    414     while (p < end) {
    415         switch (c = *p++) {
    416         case '[':
    417         case '.':
    418             /* backup so that we this character is available to the
    419                "rest" iterator */
    420             p--;
    421             break;
    422         default:
    423             continue;
    424         }
    425         break;
    426     }
    427 
    428     /* set up the return values */
    429     SubString_init(first, ptr, p - ptr);
    430     FieldNameIterator_init(rest, p, end - p);
    431 
    432     /* see if "first" is an integer, in which case it's used as an index */
    433     *first_idx = get_integer(first);
    434     if (*first_idx == -1 && PyErr_Occurred())
    435         return 0;
    436 
    437     field_name_is_empty = first->ptr >= first->end;
    438 
    439     /* If the field name is omitted or if we have a numeric index
    440        specified, then we're doing numeric indexing into args. */
    441     using_numeric_index = field_name_is_empty || *first_idx != -1;
    442 
    443     /* We always get here exactly one time for each field we're
    444        processing. And we get here in field order (counting by left
    445        braces). So this is the perfect place to handle automatic field
    446        numbering if the field name is omitted. */
    447 
    448     /* Check if we need to do the auto-numbering. It's not needed if
    449        we're called from string.Format routines, because it's handled
    450        in that class by itself. */
    451     if (auto_number) {
    452         /* Initialize our auto numbering state if this is the first
    453            time we're either auto-numbering or manually numbering. */
    454         if (auto_number->an_state == ANS_INIT && using_numeric_index)
    455             auto_number->an_state = field_name_is_empty ?
    456                 ANS_AUTO : ANS_MANUAL;
    457 
    458         /* Make sure our state is consistent with what we're doing
    459            this time through. Only check if we're using a numeric
    460            index. */
    461         if (using_numeric_index)
    462             if (autonumber_state_error(auto_number->an_state,
    463                                        field_name_is_empty))
    464                 return 0;
    465         /* Zero length field means we want to do auto-numbering of the
    466            fields. */
    467         if (field_name_is_empty)
    468             *first_idx = (auto_number->an_field_number)++;
    469     }
    470 
    471     return 1;
    472 }
    473 
    474 
    475 /*
    476     get_field_object returns the object inside {}, before the
    477     format_spec.  It handles getindex and getattr lookups and consumes
    478     the entire input string.
    479 */
    480 static PyObject *
    481 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
    482                  AutoNumber *auto_number)
    483 {
    484     PyObject *obj = NULL;
    485     int ok;
    486     int is_attribute;
    487     SubString name;
    488     SubString first;
    489     Py_ssize_t index;
    490     FieldNameIterator rest;
    491 
    492     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
    493                           &index, &rest, auto_number)) {
    494         goto error;
    495     }
    496 
    497     if (index == -1) {
    498         /* look up in kwargs */
    499         PyObject *key = SubString_new_object(&first);
    500         if (key == NULL)
    501             goto error;
    502         if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
    503             PyErr_SetObject(PyExc_KeyError, key);
    504             Py_DECREF(key);
    505             goto error;
    506         }
    507         Py_DECREF(key);
    508         Py_INCREF(obj);
    509     }
    510     else {
    511         /* look up in args */
    512         obj = PySequence_GetItem(args, index);
    513         if (obj == NULL)
    514             goto error;
    515     }
    516 
    517     /* iterate over the rest of the field_name */
    518     while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
    519                                         &name)) == 2) {
    520         PyObject *tmp;
    521 
    522         if (is_attribute)
    523             /* getattr lookup "." */
    524             tmp = getattr(obj, &name);
    525         else
    526             /* getitem lookup "[]" */
    527             if (index == -1)
    528                 tmp = getitem_str(obj, &name);
    529             else
    530                 if (PySequence_Check(obj))
    531                     tmp = getitem_sequence(obj, index);
    532                 else
    533                     /* not a sequence */
    534                     tmp = getitem_idx(obj, index);
    535         if (tmp == NULL)
    536             goto error;
    537 
    538         /* assign to obj */
    539         Py_DECREF(obj);
    540         obj = tmp;
    541     }
    542     /* end of iterator, this is the non-error case */
    543     if (ok == 1)
    544         return obj;
    545 error:
    546     Py_XDECREF(obj);
    547     return NULL;
    548 }
    549 
    550 /************************************************************************/
    551 /*****************  Field rendering functions  **************************/
    552 /************************************************************************/
    553 
    554 /*
    555     render_field() is the main function in this section.  It takes the
    556     field object and field specification string generated by
    557     get_field_and_spec, and renders the field into the output string.
    558 
    559     render_field calls fieldobj.__format__(format_spec) method, and
    560     appends to the output.
    561 */
    562 static int
    563 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
    564 {
    565     int ok = 0;
    566     PyObject *result = NULL;
    567     PyObject *format_spec_object = NULL;
    568     PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
    569     STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
    570             format_spec->ptr : NULL;
    571     Py_ssize_t format_spec_len = format_spec->ptr ?
    572             format_spec->end - format_spec->ptr : 0;
    573 
    574     /* If we know the type exactly, skip the lookup of __format__ and just
    575        call the formatter directly. */
    576 #if STRINGLIB_IS_UNICODE
    577     if (PyUnicode_CheckExact(fieldobj))
    578         formatter = _PyUnicode_FormatAdvanced;
    579     /* Unfortunately, there's a problem with checking for int, long,
    580        and float here.  If we're being included as unicode, their
    581        formatters expect string format_spec args.  For now, just skip
    582        this optimization for unicode.  This could be fixed, but it's a
    583        hassle. */
    584 #else
    585     if (PyString_CheckExact(fieldobj))
    586         formatter = _PyBytes_FormatAdvanced;
    587     else if (PyInt_CheckExact(fieldobj))
    588         formatter =_PyInt_FormatAdvanced;
    589     else if (PyLong_CheckExact(fieldobj))
    590         formatter =_PyLong_FormatAdvanced;
    591     else if (PyFloat_CheckExact(fieldobj))
    592         formatter = _PyFloat_FormatAdvanced;
    593 #endif
    594 
    595     if (formatter) {
    596         /* we know exactly which formatter will be called when __format__ is
    597            looked up, so call it directly, instead. */
    598         result = formatter(fieldobj, format_spec_start, format_spec_len);
    599     }
    600     else {
    601         /* We need to create an object out of the pointers we have, because
    602            __format__ takes a string/unicode object for format_spec. */
    603         format_spec_object = STRINGLIB_NEW(format_spec_start,
    604                                            format_spec_len);
    605         if (format_spec_object == NULL)
    606             goto done;
    607 
    608         result = PyObject_Format(fieldobj, format_spec_object);
    609     }
    610     if (result == NULL)
    611         goto done;
    612 
    613 #if PY_VERSION_HEX >= 0x03000000
    614     assert(PyUnicode_Check(result));
    615 #else
    616     assert(PyString_Check(result) || PyUnicode_Check(result));
    617 
    618     /* Convert result to our type.  We could be str, and result could
    619        be unicode */
    620     {
    621         PyObject *tmp = STRINGLIB_TOSTR(result);
    622         if (tmp == NULL)
    623             goto done;
    624         Py_DECREF(result);
    625         result = tmp;
    626     }
    627 #endif
    628 
    629     ok = output_data(output,
    630                      STRINGLIB_STR(result), STRINGLIB_LEN(result));
    631 done:
    632     Py_XDECREF(format_spec_object);
    633     Py_XDECREF(result);
    634     return ok;
    635 }
    636 
    637 static int
    638 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
    639             STRINGLIB_CHAR *conversion)
    640 {
    641     /* Note this function works if the field name is zero length,
    642        which is good.  Zero length field names are handled later, in
    643        field_name_split. */
    644 
    645     STRINGLIB_CHAR c = 0;
    646 
    647     /* initialize these, as they may be empty */
    648     *conversion = '\0';
    649     SubString_init(format_spec, NULL, 0);
    650 
    651     /* Search for the field name.  it's terminated by the end of
    652        the string, or a ':' or '!' */
    653     field_name->ptr = str->ptr;
    654     while (str->ptr < str->end) {
    655         switch (c = *(str->ptr++)) {
    656         case ':':
    657         case '!':
    658             break;
    659         default:
    660             continue;
    661         }
    662         break;
    663     }
    664 
    665     if (c == '!' || c == ':') {
    666         /* we have a format specifier and/or a conversion */
    667         /* don't include the last character */
    668         field_name->end = str->ptr-1;
    669 
    670         /* the format specifier is the rest of the string */
    671         format_spec->ptr = str->ptr;
    672         format_spec->end = str->end;
    673 
    674         /* see if there's a conversion specifier */
    675         if (c == '!') {
    676             /* there must be another character present */
    677             if (format_spec->ptr >= format_spec->end) {
    678                 PyErr_SetString(PyExc_ValueError,
    679                                 "end of format while looking for conversion "
    680                                 "specifier");
    681                 return 0;
    682             }
    683             *conversion = *(format_spec->ptr++);
    684 
    685             /* if there is another character, it must be a colon */
    686             if (format_spec->ptr < format_spec->end) {
    687                 c = *(format_spec->ptr++);
    688                 if (c != ':') {
    689                     PyErr_SetString(PyExc_ValueError,
    690                                     "expected ':' after format specifier");
    691                     return 0;
    692                 }
    693             }
    694         }
    695     }
    696     else
    697         /* end of string, there's no format_spec or conversion */
    698         field_name->end = str->ptr;
    699 
    700     return 1;
    701 }
    702 
    703 /************************************************************************/
    704 /******* Output string allocation and escape-to-markup processing  ******/
    705 /************************************************************************/
    706 
    707 /* MarkupIterator breaks the string into pieces of either literal
    708    text, or things inside {} that need to be marked up.  it is
    709    designed to make it easy to wrap a Python iterator around it, for
    710    use with the Formatter class */
    711 
    712 typedef struct {
    713     SubString str;
    714 } MarkupIterator;
    715 
    716 static int
    717 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
    718 {
    719     SubString_init(&self->str, ptr, len);
    720     return 1;
    721 }
    722 
    723 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
    724    string (or something to be expanded) */
    725 static int
    726 MarkupIterator_next(MarkupIterator *self, SubString *literal,
    727                     int *field_present, SubString *field_name,
    728                     SubString *format_spec, STRINGLIB_CHAR *conversion,
    729                     int *format_spec_needs_expanding)
    730 {
    731     int at_end;
    732     STRINGLIB_CHAR c = 0;
    733     STRINGLIB_CHAR *start;
    734     int count;
    735     Py_ssize_t len;
    736     int markup_follows = 0;
    737 
    738     /* initialize all of the output variables */
    739     SubString_init(literal, NULL, 0);
    740     SubString_init(field_name, NULL, 0);
    741     SubString_init(format_spec, NULL, 0);
    742     *conversion = '\0';
    743     *format_spec_needs_expanding = 0;
    744     *field_present = 0;
    745 
    746     /* No more input, end of iterator.  This is the normal exit
    747        path. */
    748     if (self->str.ptr >= self->str.end)
    749         return 1;
    750 
    751     start = self->str.ptr;
    752 
    753     /* First read any literal text. Read until the end of string, an
    754        escaped '{' or '}', or an unescaped '{'.  In order to never
    755        allocate memory and so I can just pass pointers around, if
    756        there's an escaped '{' or '}' then we'll return the literal
    757        including the brace, but no format object.  The next time
    758        through, we'll return the rest of the literal, skipping past
    759        the second consecutive brace. */
    760     while (self->str.ptr < self->str.end) {
    761         switch (c = *(self->str.ptr++)) {
    762         case '{':
    763         case '}':
    764             markup_follows = 1;
    765             break;
    766         default:
    767             continue;
    768         }
    769         break;
    770     }
    771 
    772     at_end = self->str.ptr >= self->str.end;
    773     len = self->str.ptr - start;
    774 
    775     if ((c == '}') && (at_end || (c != *self->str.ptr))) {
    776         PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
    777                         "in format string");
    778         return 0;
    779     }
    780     if (at_end && c == '{') {
    781         PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
    782                         "in format string");
    783         return 0;
    784     }
    785     if (!at_end) {
    786         if (c == *self->str.ptr) {
    787             /* escaped } or {, skip it in the input.  there is no
    788                markup object following us, just this literal text */
    789             self->str.ptr++;
    790             markup_follows = 0;
    791         }
    792         else
    793             len--;
    794     }
    795 
    796     /* record the literal text */
    797     literal->ptr = start;
    798     literal->end = start + len;
    799 
    800     if (!markup_follows)
    801         return 2;
    802 
    803     /* this is markup, find the end of the string by counting nested
    804        braces.  note that this prohibits escaped braces, so that
    805        format_specs cannot have braces in them. */
    806     *field_present = 1;
    807     count = 1;
    808 
    809     start = self->str.ptr;
    810 
    811     /* we know we can't have a zero length string, so don't worry
    812        about that case */
    813     while (self->str.ptr < self->str.end) {
    814         switch (c = *(self->str.ptr++)) {
    815         case '{':
    816             /* the format spec needs to be recursively expanded.
    817                this is an optimization, and not strictly needed */
    818             *format_spec_needs_expanding = 1;
    819             count++;
    820             break;
    821         case '}':
    822             count--;
    823             if (count <= 0) {
    824                 /* we're done.  parse and get out */
    825                 SubString s;
    826 
    827                 SubString_init(&s, start, self->str.ptr - 1 - start);
    828                 if (parse_field(&s, field_name, format_spec, conversion) == 0)
    829                     return 0;
    830 
    831                 /* success */
    832                 return 2;
    833             }
    834             break;
    835         }
    836     }
    837 
    838     /* end of string while searching for matching '}' */
    839     PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
    840     return 0;
    841 }
    842 
    843 
    844 /* do the !r or !s conversion on obj */
    845 static PyObject *
    846 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
    847 {
    848     /* XXX in pre-3.0, do we need to convert this to unicode, since it
    849        might have returned a string? */
    850     switch (conversion) {
    851     case 'r':
    852         return PyObject_Repr(obj);
    853     case 's':
    854         return STRINGLIB_TOSTR(obj);
    855     default:
    856         if (conversion > 32 && conversion < 127) {
    857                 /* It's the ASCII subrange; casting to char is safe
    858                    (assuming the execution character set is an ASCII
    859                    superset). */
    860                 PyErr_Format(PyExc_ValueError,
    861                      "Unknown conversion specifier %c",
    862                      (char)conversion);
    863         } else
    864                 PyErr_Format(PyExc_ValueError,
    865                      "Unknown conversion specifier \\x%x",
    866                      (unsigned int)conversion);
    867         return NULL;
    868     }
    869 }
    870 
    871 /* given:
    872 
    873    {field_name!conversion:format_spec}
    874 
    875    compute the result and write it to output.
    876    format_spec_needs_expanding is an optimization.  if it's false,
    877    just output the string directly, otherwise recursively expand the
    878    format_spec string.
    879 
    880    field_name is allowed to be zero length, in which case we
    881    are doing auto field numbering.
    882 */
    883 
    884 static int
    885 output_markup(SubString *field_name, SubString *format_spec,
    886               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
    887               OutputString *output, PyObject *args, PyObject *kwargs,
    888               int recursion_depth, AutoNumber *auto_number)
    889 {
    890     PyObject *tmp = NULL;
    891     PyObject *fieldobj = NULL;
    892     SubString expanded_format_spec;
    893     SubString *actual_format_spec;
    894     int result = 0;
    895 
    896     /* convert field_name to an object */
    897     fieldobj = get_field_object(field_name, args, kwargs, auto_number);
    898     if (fieldobj == NULL)
    899         goto done;
    900 
    901     if (conversion != '\0') {
    902         tmp = do_conversion(fieldobj, conversion);
    903         if (tmp == NULL)
    904             goto done;
    905 
    906         /* do the assignment, transferring ownership: fieldobj = tmp */
    907         Py_DECREF(fieldobj);
    908         fieldobj = tmp;
    909         tmp = NULL;
    910     }
    911 
    912     /* if needed, recurively compute the format_spec */
    913     if (format_spec_needs_expanding) {
    914         tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
    915                            auto_number);
    916         if (tmp == NULL)
    917             goto done;
    918 
    919         /* note that in the case we're expanding the format string,
    920            tmp must be kept around until after the call to
    921            render_field. */
    922         SubString_init(&expanded_format_spec,
    923                        STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
    924         actual_format_spec = &expanded_format_spec;
    925     }
    926     else
    927         actual_format_spec = format_spec;
    928 
    929     if (render_field(fieldobj, actual_format_spec, output) == 0)
    930         goto done;
    931 
    932     result = 1;
    933 
    934 done:
    935     Py_XDECREF(fieldobj);
    936     Py_XDECREF(tmp);
    937 
    938     return result;
    939 }
    940 
    941 /*
    942     do_markup is the top-level loop for the format() method.  It
    943     searches through the format string for escapes to markup codes, and
    944     calls other functions to move non-markup text to the output,
    945     and to perform the markup to the output.
    946 */
    947 static int
    948 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
    949           OutputString *output, int recursion_depth, AutoNumber *auto_number)
    950 {
    951     MarkupIterator iter;
    952     int format_spec_needs_expanding;
    953     int result;
    954     int field_present;
    955     SubString literal;
    956     SubString field_name;
    957     SubString format_spec;
    958     STRINGLIB_CHAR conversion;
    959 
    960     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
    961     while ((result = MarkupIterator_next(&iter, &literal, &field_present,
    962                                          &field_name, &format_spec,
    963                                          &conversion,
    964                                          &format_spec_needs_expanding)) == 2) {
    965         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
    966             return 0;
    967         if (field_present)
    968             if (!output_markup(&field_name, &format_spec,
    969                                format_spec_needs_expanding, conversion, output,
    970                                args, kwargs, recursion_depth, auto_number))
    971                 return 0;
    972     }
    973     return result;
    974 }
    975 
    976 
    977 /*
    978     build_string allocates the output string and then
    979     calls do_markup to do the heavy lifting.
    980 */
    981 static PyObject *
    982 build_string(SubString *input, PyObject *args, PyObject *kwargs,
    983              int recursion_depth, AutoNumber *auto_number)
    984 {
    985     OutputString output;
    986     PyObject *result = NULL;
    987     Py_ssize_t count;
    988 
    989     output.obj = NULL; /* needed so cleanup code always works */
    990 
    991     /* check the recursion level */
    992     if (recursion_depth <= 0) {
    993         PyErr_SetString(PyExc_ValueError,
    994                         "Max string recursion exceeded");
    995         goto done;
    996     }
    997 
    998     /* initial size is the length of the format string, plus the size
    999        increment.  seems like a reasonable default */
   1000     if (!output_initialize(&output,
   1001                            input->end - input->ptr +
   1002                            INITIAL_SIZE_INCREMENT))
   1003         goto done;
   1004 
   1005     if (!do_markup(input, args, kwargs, &output, recursion_depth,
   1006                    auto_number)) {
   1007         goto done;
   1008     }
   1009 
   1010     count = output.ptr - STRINGLIB_STR(output.obj);
   1011     if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
   1012         goto done;
   1013     }
   1014 
   1015     /* transfer ownership to result */
   1016     result = output.obj;
   1017     output.obj = NULL;
   1018 
   1019 done:
   1020     Py_XDECREF(output.obj);
   1021     return result;
   1022 }
   1023 
   1024 /************************************************************************/
   1025 /*********** main routine ***********************************************/
   1026 /************************************************************************/
   1027 
   1028 /* this is the main entry point */
   1029 static PyObject *
   1030 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
   1031 {
   1032     SubString input;
   1033 
   1034     /* PEP 3101 says only 2 levels, so that
   1035        "{0:{1}}".format('abc', 's')            # works
   1036        "{0:{1:{2}}}".format('abc', 's', '')    # fails
   1037     */
   1038     int recursion_depth = 2;
   1039 
   1040     AutoNumber auto_number;
   1041 
   1042     AutoNumber_Init(&auto_number);
   1043     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
   1044     return build_string(&input, args, kwargs, recursion_depth, &auto_number);
   1045 }
   1046 
   1047 
   1048 
   1049 /************************************************************************/
   1050 /*********** formatteriterator ******************************************/
   1051 /************************************************************************/
   1052 
   1053 /* This is used to implement string.Formatter.vparse().  It exists so
   1054    Formatter can share code with the built in unicode.format() method.
   1055    It's really just a wrapper around MarkupIterator that is callable
   1056    from Python. */
   1057 
   1058 typedef struct {
   1059     PyObject_HEAD
   1060 
   1061     STRINGLIB_OBJECT *str;
   1062 
   1063     MarkupIterator it_markup;
   1064 } formatteriterobject;
   1065 
   1066 static void
   1067 formatteriter_dealloc(formatteriterobject *it)
   1068 {
   1069     Py_XDECREF(it->str);
   1070     PyObject_FREE(it);
   1071 }
   1072 
   1073 /* returns a tuple:
   1074    (literal, field_name, format_spec, conversion)
   1075 
   1076    literal is any literal text to output.  might be zero length
   1077    field_name is the string before the ':'.  might be None
   1078    format_spec is the string after the ':'.  mibht be None
   1079    conversion is either None, or the string after the '!'
   1080 */
   1081 static PyObject *
   1082 formatteriter_next(formatteriterobject *it)
   1083 {
   1084     SubString literal;
   1085     SubString field_name;
   1086     SubString format_spec;
   1087     STRINGLIB_CHAR conversion;
   1088     int format_spec_needs_expanding;
   1089     int field_present;
   1090     int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
   1091                                      &field_name, &format_spec, &conversion,
   1092                                      &format_spec_needs_expanding);
   1093 
   1094     /* all of the SubString objects point into it->str, so no
   1095        memory management needs to be done on them */
   1096     assert(0 <= result && result <= 2);
   1097     if (result == 0 || result == 1)
   1098         /* if 0, error has already been set, if 1, iterator is empty */
   1099         return NULL;
   1100     else {
   1101         PyObject *literal_str = NULL;
   1102         PyObject *field_name_str = NULL;
   1103         PyObject *format_spec_str = NULL;
   1104         PyObject *conversion_str = NULL;
   1105         PyObject *tuple = NULL;
   1106 
   1107         literal_str = SubString_new_object(&literal);
   1108         if (literal_str == NULL)
   1109             goto done;
   1110 
   1111         field_name_str = SubString_new_object(&field_name);
   1112         if (field_name_str == NULL)
   1113             goto done;
   1114 
   1115         /* if field_name is non-zero length, return a string for
   1116            format_spec (even if zero length), else return None */
   1117         format_spec_str = (field_present ?
   1118                            SubString_new_object_or_empty :
   1119                            SubString_new_object)(&format_spec);
   1120         if (format_spec_str == NULL)
   1121             goto done;
   1122 
   1123         /* if the conversion is not specified, return a None,
   1124            otherwise create a one length string with the conversion
   1125            character */
   1126         if (conversion == '\0') {
   1127             conversion_str = Py_None;
   1128             Py_INCREF(conversion_str);
   1129         }
   1130         else
   1131             conversion_str = STRINGLIB_NEW(&conversion, 1);
   1132         if (conversion_str == NULL)
   1133             goto done;
   1134 
   1135         tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
   1136                              conversion_str);
   1137     done:
   1138         Py_XDECREF(literal_str);
   1139         Py_XDECREF(field_name_str);
   1140         Py_XDECREF(format_spec_str);
   1141         Py_XDECREF(conversion_str);
   1142         return tuple;
   1143     }
   1144 }
   1145 
   1146 static PyMethodDef formatteriter_methods[] = {
   1147     {NULL,              NULL}           /* sentinel */
   1148 };
   1149 
   1150 static PyTypeObject PyFormatterIter_Type = {
   1151     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   1152     "formatteriterator",                /* tp_name */
   1153     sizeof(formatteriterobject),        /* tp_basicsize */
   1154     0,                                  /* tp_itemsize */
   1155     /* methods */
   1156     (destructor)formatteriter_dealloc,  /* tp_dealloc */
   1157     0,                                  /* tp_print */
   1158     0,                                  /* tp_getattr */
   1159     0,                                  /* tp_setattr */
   1160     0,                                  /* tp_compare */
   1161     0,                                  /* tp_repr */
   1162     0,                                  /* tp_as_number */
   1163     0,                                  /* tp_as_sequence */
   1164     0,                                  /* tp_as_mapping */
   1165     0,                                  /* tp_hash */
   1166     0,                                  /* tp_call */
   1167     0,                                  /* tp_str */
   1168     PyObject_GenericGetAttr,            /* tp_getattro */
   1169     0,                                  /* tp_setattro */
   1170     0,                                  /* tp_as_buffer */
   1171     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
   1172     0,                                  /* tp_doc */
   1173     0,                                  /* tp_traverse */
   1174     0,                                  /* tp_clear */
   1175     0,                                  /* tp_richcompare */
   1176     0,                                  /* tp_weaklistoffset */
   1177     PyObject_SelfIter,                  /* tp_iter */
   1178     (iternextfunc)formatteriter_next,   /* tp_iternext */
   1179     formatteriter_methods,              /* tp_methods */
   1180     0,
   1181 };
   1182 
   1183 /* unicode_formatter_parser is used to implement
   1184    string.Formatter.vformat.  it parses a string and returns tuples
   1185    describing the parsed elements.  It's a wrapper around
   1186    stringlib/string_format.h's MarkupIterator */
   1187 static PyObject *
   1188 formatter_parser(STRINGLIB_OBJECT *self)
   1189 {
   1190     formatteriterobject *it;
   1191 
   1192     it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
   1193     if (it == NULL)
   1194         return NULL;
   1195 
   1196     /* take ownership, give the object to the iterator */
   1197     Py_INCREF(self);
   1198     it->str = self;
   1199 
   1200     /* initialize the contained MarkupIterator */
   1201     MarkupIterator_init(&it->it_markup,
   1202                         STRINGLIB_STR(self),
   1203                         STRINGLIB_LEN(self));
   1204 
   1205     return (PyObject *)it;
   1206 }
   1207 
   1208 
   1209 /************************************************************************/
   1210 /*********** fieldnameiterator ******************************************/
   1211 /************************************************************************/
   1212 
   1213 
   1214 /* This is used to implement string.Formatter.vparse().  It parses the
   1215    field name into attribute and item values.  It's a Python-callable
   1216    wrapper around FieldNameIterator */
   1217 
   1218 typedef struct {
   1219     PyObject_HEAD
   1220 
   1221     STRINGLIB_OBJECT *str;
   1222 
   1223     FieldNameIterator it_field;
   1224 } fieldnameiterobject;
   1225 
   1226 static void
   1227 fieldnameiter_dealloc(fieldnameiterobject *it)
   1228 {
   1229     Py_XDECREF(it->str);
   1230     PyObject_FREE(it);
   1231 }
   1232 
   1233 /* returns a tuple:
   1234    (is_attr, value)
   1235    is_attr is true if we used attribute syntax (e.g., '.foo')
   1236               false if we used index syntax (e.g., '[foo]')
   1237    value is an integer or string
   1238 */
   1239 static PyObject *
   1240 fieldnameiter_next(fieldnameiterobject *it)
   1241 {
   1242     int result;
   1243     int is_attr;
   1244     Py_ssize_t idx;
   1245     SubString name;
   1246 
   1247     result = FieldNameIterator_next(&it->it_field, &is_attr,
   1248                                     &idx, &name);
   1249     if (result == 0 || result == 1)
   1250         /* if 0, error has already been set, if 1, iterator is empty */
   1251         return NULL;
   1252     else {
   1253         PyObject* result = NULL;
   1254         PyObject* is_attr_obj = NULL;
   1255         PyObject* obj = NULL;
   1256 
   1257         is_attr_obj = PyBool_FromLong(is_attr);
   1258         if (is_attr_obj == NULL)
   1259             goto done;
   1260 
   1261         /* either an integer or a string */
   1262         if (idx != -1)
   1263             obj = PyLong_FromSsize_t(idx);
   1264         else
   1265             obj = SubString_new_object(&name);
   1266         if (obj == NULL)
   1267             goto done;
   1268 
   1269         /* return a tuple of values */
   1270         result = PyTuple_Pack(2, is_attr_obj, obj);
   1271 
   1272     done:
   1273         Py_XDECREF(is_attr_obj);
   1274         Py_XDECREF(obj);
   1275         return result;
   1276     }
   1277 }
   1278 
   1279 static PyMethodDef fieldnameiter_methods[] = {
   1280     {NULL,              NULL}           /* sentinel */
   1281 };
   1282 
   1283 static PyTypeObject PyFieldNameIter_Type = {
   1284     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   1285     "fieldnameiterator",                /* tp_name */
   1286     sizeof(fieldnameiterobject),        /* tp_basicsize */
   1287     0,                                  /* tp_itemsize */
   1288     /* methods */
   1289     (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
   1290     0,                                  /* tp_print */
   1291     0,                                  /* tp_getattr */
   1292     0,                                  /* tp_setattr */
   1293     0,                                  /* tp_compare */
   1294     0,                                  /* tp_repr */
   1295     0,                                  /* tp_as_number */
   1296     0,                                  /* tp_as_sequence */
   1297     0,                                  /* tp_as_mapping */
   1298     0,                                  /* tp_hash */
   1299     0,                                  /* tp_call */
   1300     0,                                  /* tp_str */
   1301     PyObject_GenericGetAttr,            /* tp_getattro */
   1302     0,                                  /* tp_setattro */
   1303     0,                                  /* tp_as_buffer */
   1304     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
   1305     0,                                  /* tp_doc */
   1306     0,                                  /* tp_traverse */
   1307     0,                                  /* tp_clear */
   1308     0,                                  /* tp_richcompare */
   1309     0,                                  /* tp_weaklistoffset */
   1310     PyObject_SelfIter,                  /* tp_iter */
   1311     (iternextfunc)fieldnameiter_next,   /* tp_iternext */
   1312     fieldnameiter_methods,              /* tp_methods */
   1313     0};
   1314 
   1315 /* unicode_formatter_field_name_split is used to implement
   1316    string.Formatter.vformat.  it takes an PEP 3101 "field name", and
   1317    returns a tuple of (first, rest): "first", the part before the
   1318    first '.' or '['; and "rest", an iterator for the rest of the field
   1319    name.  it's a wrapper around stringlib/string_format.h's
   1320    field_name_split.  The iterator it returns is a
   1321    FieldNameIterator */
   1322 static PyObject *
   1323 formatter_field_name_split(STRINGLIB_OBJECT *self)
   1324 {
   1325     SubString first;
   1326     Py_ssize_t first_idx;
   1327     fieldnameiterobject *it;
   1328 
   1329     PyObject *first_obj = NULL;
   1330     PyObject *result = NULL;
   1331 
   1332     it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
   1333     if (it == NULL)
   1334         return NULL;
   1335 
   1336     /* take ownership, give the object to the iterator.  this is
   1337        just to keep the field_name alive */
   1338     Py_INCREF(self);
   1339     it->str = self;
   1340 
   1341     /* Pass in auto_number = NULL. We'll return an empty string for
   1342        first_obj in that case. */
   1343     if (!field_name_split(STRINGLIB_STR(self),
   1344                           STRINGLIB_LEN(self),
   1345                           &first, &first_idx, &it->it_field, NULL))
   1346         goto done;
   1347 
   1348     /* first becomes an integer, if possible; else a string */
   1349     if (first_idx != -1)
   1350         first_obj = PyLong_FromSsize_t(first_idx);
   1351     else
   1352         /* convert "first" into a string object */
   1353         first_obj = SubString_new_object(&first);
   1354     if (first_obj == NULL)
   1355         goto done;
   1356 
   1357     /* return a tuple of values */
   1358     result = PyTuple_Pack(2, first_obj, it);
   1359 
   1360 done:
   1361     Py_XDECREF(it);
   1362     Py_XDECREF(first_obj);
   1363     return result;
   1364 }
   1365