Home | History | Annotate | Download | only in stringlib
      1 /*
      2     string_format.h -- implementation of string.format().
      3 
      4     It uses the Objects/stringlib conventions, so that it can be
      5     compiled for both unicode and string objects.
      6 */
      7 
      8 
      9 /* Defines for Python 2.6 compatibility */
     10 #if PY_VERSION_HEX < 0x03000000
     11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
     12 #endif
     13 
     14 /* Defines for more efficiently reallocating the string buffer */
     15 #define INITIAL_SIZE_INCREMENT 100
     16 #define SIZE_MULTIPLIER 2
     17 #define MAX_SIZE_INCREMENT  3200
     18 
     19 
     20 /************************************************************************/
     21 /***********   Global data structures and forward declarations  *********/
     22 /************************************************************************/
     23 
     24 /*
     25    A SubString consists of the characters between two string or
     26    unicode pointers.
     27 */
     28 typedef struct {
     29     STRINGLIB_CHAR *ptr;
     30     STRINGLIB_CHAR *end;
     31 } SubString;
     32 
     33 
     34 typedef enum {
     35     ANS_INIT,
     36     ANS_AUTO,
     37     ANS_MANUAL
     38 } AutoNumberState;   /* Keep track if we're auto-numbering fields */
     39 
     40 /* Keeps track of our auto-numbering state, and which number field we're on */
     41 typedef struct {
     42     AutoNumberState an_state;
     43     int an_field_number;
     44 } AutoNumber;
     45 
     46 
     47 /* forward declaration for recursion */
     48 static PyObject *
     49 build_string(SubString *input, PyObject *args, PyObject *kwargs,
     50              int recursion_depth, AutoNumber *auto_number);
     51 
     52 
     53 
     54 /************************************************************************/
     55 /**************************  Utility  functions  ************************/
     56 /************************************************************************/
     57 
     58 static void
     59 AutoNumber_Init(AutoNumber *auto_number)
     60 {
     61     auto_number->an_state = ANS_INIT;
     62     auto_number->an_field_number = 0;
     63 }
     64 
     65 /* fill in a SubString from a pointer and length */
     66 Py_LOCAL_INLINE(void)
     67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
     68 {
     69     str->ptr = p;
     70     if (p == NULL)
     71         str->end = NULL;
     72     else
     73         str->end = str->ptr + len;
     74 }
     75 
     76 /* return a new string.  if str->ptr is NULL, return None */
     77 Py_LOCAL_INLINE(PyObject *)
     78 SubString_new_object(SubString *str)
     79 {
     80     if (str->ptr == NULL) {
     81         Py_INCREF(Py_None);
     82         return Py_None;
     83     }
     84     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
     85 }
     86 
     87 /* return a new string.  if str->ptr is NULL, return None */
     88 Py_LOCAL_INLINE(PyObject *)
     89 SubString_new_object_or_empty(SubString *str)
     90 {
     91     if (str->ptr == NULL) {
     92         return STRINGLIB_NEW(NULL, 0);
     93     }
     94     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
     95 }
     96 
     97 /* Return 1 if an error has been detected switching between automatic
     98    field numbering and manual field specification, else return 0. Set
     99    ValueError on error. */
    100 static int
    101 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
    102 {
    103     if (state == ANS_MANUAL) {
    104         if (field_name_is_empty) {
    105             PyErr_SetString(PyExc_ValueError, "cannot switch from "
    106                             "manual field specification to "
    107                             "automatic field numbering");
    108             return 1;
    109         }
    110     }
    111     else {
    112         if (!field_name_is_empty) {
    113             PyErr_SetString(PyExc_ValueError, "cannot switch from "
    114                             "automatic field numbering to "
    115                             "manual field specification");
    116             return 1;
    117         }
    118     }
    119     return 0;
    120 }
    121 
    122 
    123 /************************************************************************/
    124 /***********    Output string management functions       ****************/
    125 /************************************************************************/
    126 
    127 typedef struct {
    128     STRINGLIB_CHAR *ptr;
    129     STRINGLIB_CHAR *end;
    130     PyObject *obj;
    131     Py_ssize_t size_increment;
    132 } OutputString;
    133 
    134 /* initialize an OutputString object, reserving size characters */
    135 static int
    136 output_initialize(OutputString *output, Py_ssize_t size)
    137 {
    138     output->obj = STRINGLIB_NEW(NULL, size);
    139     if (output->obj == NULL)
    140         return 0;
    141 
    142     output->ptr = STRINGLIB_STR(output->obj);
    143     output->end = STRINGLIB_LEN(output->obj) + output->ptr;
    144     output->size_increment = INITIAL_SIZE_INCREMENT;
    145 
    146     return 1;
    147 }
    148 
    149 /*
    150     output_extend reallocates the output string buffer.
    151     It returns a status:  0 for a failed reallocation,
    152     1 for success.
    153 */
    154 
    155 static int
    156 output_extend(OutputString *output, Py_ssize_t count)
    157 {
    158     STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
    159     Py_ssize_t curlen = output->ptr - startptr;
    160     Py_ssize_t maxlen = curlen + count + output->size_increment;
    161 
    162     if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
    163         return 0;
    164     startptr = STRINGLIB_STR(output->obj);
    165     output->ptr = startptr + curlen;
    166     output->end = startptr + maxlen;
    167     if (output->size_increment < MAX_SIZE_INCREMENT)
    168         output->size_increment *= SIZE_MULTIPLIER;
    169     return 1;
    170 }
    171 
    172 /*
    173     output_data dumps characters into our output string
    174     buffer.
    175 
    176     In some cases, it has to reallocate the string.
    177 
    178     It returns a status:  0 for a failed reallocation,
    179     1 for success.
    180 */
    181 static int
    182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
    183 {
    184     if ((count > output->end - output->ptr) && !output_extend(output, count))
    185         return 0;
    186     memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
    187     output->ptr += count;
    188     return 1;
    189 }
    190 
    191 /************************************************************************/
    192 /***********  Format string parsing -- integers and identifiers *********/
    193 /************************************************************************/
    194 
    195 static Py_ssize_t
    196 get_integer(const SubString *str)
    197 {
    198     Py_ssize_t accumulator = 0;
    199     Py_ssize_t digitval;
    200     STRINGLIB_CHAR *p;
    201 
    202     /* empty string is an error */
    203     if (str->ptr >= str->end)
    204         return -1;
    205 
    206     for (p = str->ptr; p < str->end; p++) {
    207         digitval = STRINGLIB_TODECIMAL(*p);
    208         if (digitval < 0)
    209             return -1;
    210         /*
    211            Detect possible overflow before it happens:
    212 
    213               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
    214               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
    215         */
    216         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
    217             PyErr_Format(PyExc_ValueError,
    218                          "Too many decimal digits in format string");
    219             return -1;
    220         }
    221         accumulator = accumulator * 10 + digitval;
    222     }
    223     return accumulator;
    224 }
    225 
    226 /************************************************************************/
    227 /******** Functions to get field objects and specification strings ******/
    228 /************************************************************************/
    229 
    230 /* do the equivalent of obj.name */
    231 static PyObject *
    232 getattr(PyObject *obj, SubString *name)
    233 {
    234     PyObject *newobj;
    235     PyObject *str = SubString_new_object(name);
    236     if (str == NULL)
    237         return NULL;
    238     newobj = PyObject_GetAttr(obj, str);
    239     Py_DECREF(str);
    240     return newobj;
    241 }
    242 
    243 /* do the equivalent of obj[idx], where obj is a sequence */
    244 static PyObject *
    245 getitem_sequence(PyObject *obj, Py_ssize_t idx)
    246 {
    247     return PySequence_GetItem(obj, idx);
    248 }
    249 
    250 /* do the equivalent of obj[idx], where obj is not a sequence */
    251 static PyObject *
    252 getitem_idx(PyObject *obj, Py_ssize_t idx)
    253 {
    254     PyObject *newobj;
    255     PyObject *idx_obj = PyLong_FromSsize_t(idx);
    256     if (idx_obj == NULL)
    257         return NULL;
    258     newobj = PyObject_GetItem(obj, idx_obj);
    259     Py_DECREF(idx_obj);
    260     return newobj;
    261 }
    262 
    263 /* do the equivalent of obj[name] */
    264 static PyObject *
    265 getitem_str(PyObject *obj, SubString *name)
    266 {
    267     PyObject *newobj;
    268     PyObject *str = SubString_new_object(name);
    269     if (str == NULL)
    270         return NULL;
    271     newobj = PyObject_GetItem(obj, str);
    272     Py_DECREF(str);
    273     return newobj;
    274 }
    275 
    276 typedef struct {
    277     /* the entire string we're parsing.  we assume that someone else
    278        is managing its lifetime, and that it will exist for the
    279        lifetime of the iterator.  can be empty */
    280     SubString str;
    281 
    282     /* pointer to where we are inside field_name */
    283     STRINGLIB_CHAR *ptr;
    284 } FieldNameIterator;
    285 
    286 
    287 static int
    288 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
    289                        Py_ssize_t len)
    290 {
    291     SubString_init(&self->str, ptr, len);
    292     self->ptr = self->str.ptr;
    293     return 1;
    294 }
    295 
    296 static int
    297 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
    298 {
    299     STRINGLIB_CHAR c;
    300 
    301     name->ptr = self->ptr;
    302 
    303     /* return everything until '.' or '[' */
    304     while (self->ptr < self->str.end) {
    305         switch (c = *self->ptr++) {
    306         case '[':
    307         case '.':
    308             /* backup so that we this character will be seen next time */
    309             self->ptr--;
    310             break;
    311         default:
    312             continue;
    313         }
    314         break;
    315     }
    316     /* end of string is okay */
    317     name->end = self->ptr;
    318     return 1;
    319 }
    320 
    321 static int
    322 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
    323 {
    324     int bracket_seen = 0;
    325     STRINGLIB_CHAR c;
    326 
    327     name->ptr = self->ptr;
    328 
    329     /* return everything until ']' */
    330     while (self->ptr < self->str.end) {
    331         switch (c = *self->ptr++) {
    332         case ']':
    333             bracket_seen = 1;
    334             break;
    335         default:
    336             continue;
    337         }
    338         break;
    339     }
    340     /* make sure we ended with a ']' */
    341     if (!bracket_seen) {
    342         PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
    343         return 0;
    344     }
    345 
    346     /* end of string is okay */
    347     /* don't include the ']' */
    348     name->end = self->ptr-1;
    349     return 1;
    350 }
    351 
    352 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
    353 static int
    354 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
    355                        Py_ssize_t *name_idx, SubString *name)
    356 {
    357     /* check at end of input */
    358     if (self->ptr >= self->str.end)
    359         return 1;
    360 
    361     switch (*self->ptr++) {
    362     case '.':
    363         *is_attribute = 1;
    364         if (_FieldNameIterator_attr(self, name) == 0)
    365             return 0;
    366         *name_idx = -1;
    367         break;
    368     case '[':
    369         *is_attribute = 0;
    370         if (_FieldNameIterator_item(self, name) == 0)
    371             return 0;
    372         *name_idx = get_integer(name);
    373         if (*name_idx == -1 && PyErr_Occurred())
    374             return 0;
    375         break;
    376     default:
    377         /* Invalid character follows ']' */
    378         PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
    379                         "follow ']' in format field specifier");
    380         return 0;
    381     }
    382 
    383     /* empty string is an error */
    384     if (name->ptr == name->end) {
    385         PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
    386         return 0;
    387     }
    388 
    389     return 2;
    390 }
    391 
    392 
    393 /* input: field_name
    394    output: 'first' points to the part before the first '[' or '.'
    395            'first_idx' is -1 if 'first' is not an integer, otherwise
    396                        it's the value of first converted to an integer
    397            'rest' is an iterator to return the rest
    398 */
    399 static int
    400 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
    401                  Py_ssize_t *first_idx, FieldNameIterator *rest,
    402                  AutoNumber *auto_number)
    403 {
    404     STRINGLIB_CHAR c;
    405     STRINGLIB_CHAR *p = ptr;
    406     STRINGLIB_CHAR *end = ptr + len;
    407     int field_name_is_empty;
    408     int using_numeric_index;
    409 
    410     /* find the part up until the first '.' or '[' */
    411     while (p < end) {
    412         switch (c = *p++) {
    413         case '[':
    414         case '.':
    415             /* backup so that we this character is available to the
    416                "rest" iterator */
    417             p--;
    418             break;
    419         default:
    420             continue;
    421         }
    422         break;
    423     }
    424 
    425     /* set up the return values */
    426     SubString_init(first, ptr, p - ptr);
    427     FieldNameIterator_init(rest, p, end - p);
    428 
    429     /* see if "first" is an integer, in which case it's used as an index */
    430     *first_idx = get_integer(first);
    431     if (*first_idx == -1 && PyErr_Occurred())
    432         return 0;
    433 
    434     field_name_is_empty = first->ptr >= first->end;
    435 
    436     /* If the field name is omitted or if we have a numeric index
    437        specified, then we're doing numeric indexing into args. */
    438     using_numeric_index = field_name_is_empty || *first_idx != -1;
    439 
    440     /* We always get here exactly one time for each field we're
    441        processing. And we get here in field order (counting by left
    442        braces). So this is the perfect place to handle automatic field
    443        numbering if the field name is omitted. */
    444 
    445     /* Check if we need to do the auto-numbering. It's not needed if
    446        we're called from string.Format routines, because it's handled
    447        in that class by itself. */
    448     if (auto_number) {
    449         /* Initialize our auto numbering state if this is the first
    450            time we're either auto-numbering or manually numbering. */
    451         if (auto_number->an_state == ANS_INIT && using_numeric_index)
    452             auto_number->an_state = field_name_is_empty ?
    453                 ANS_AUTO : ANS_MANUAL;
    454 
    455         /* Make sure our state is consistent with what we're doing
    456            this time through. Only check if we're using a numeric
    457            index. */
    458         if (using_numeric_index)
    459             if (autonumber_state_error(auto_number->an_state,
    460                                        field_name_is_empty))
    461                 return 0;
    462         /* Zero length field means we want to do auto-numbering of the
    463            fields. */
    464         if (field_name_is_empty)
    465             *first_idx = (auto_number->an_field_number)++;
    466     }
    467 
    468     return 1;
    469 }
    470 
    471 
    472 /*
    473     get_field_object returns the object inside {}, before the
    474     format_spec.  It handles getindex and getattr lookups and consumes
    475     the entire input string.
    476 */
    477 static PyObject *
    478 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
    479                  AutoNumber *auto_number)
    480 {
    481     PyObject *obj = NULL;
    482     int ok;
    483     int is_attribute;
    484     SubString name;
    485     SubString first;
    486     Py_ssize_t index;
    487     FieldNameIterator rest;
    488 
    489     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
    490                           &index, &rest, auto_number)) {
    491         goto error;
    492     }
    493 
    494     if (index == -1) {
    495         /* look up in kwargs */
    496         PyObject *key = SubString_new_object(&first);
    497         if (key == NULL)
    498             goto error;
    499         if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
    500             PyErr_SetObject(PyExc_KeyError, key);
    501             Py_DECREF(key);
    502             goto error;
    503         }
    504         Py_DECREF(key);
    505         Py_INCREF(obj);
    506     }
    507     else {
    508         /* look up in args */
    509         obj = PySequence_GetItem(args, index);
    510         if (obj == NULL)
    511             goto error;
    512     }
    513 
    514     /* iterate over the rest of the field_name */
    515     while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
    516                                         &name)) == 2) {
    517         PyObject *tmp;
    518 
    519         if (is_attribute)
    520             /* getattr lookup "." */
    521             tmp = getattr(obj, &name);
    522         else
    523             /* getitem lookup "[]" */
    524             if (index == -1)
    525                 tmp = getitem_str(obj, &name);
    526             else
    527                 if (PySequence_Check(obj))
    528                     tmp = getitem_sequence(obj, index);
    529                 else
    530                     /* not a sequence */
    531                     tmp = getitem_idx(obj, index);
    532         if (tmp == NULL)
    533             goto error;
    534 
    535         /* assign to obj */
    536         Py_DECREF(obj);
    537         obj = tmp;
    538     }
    539     /* end of iterator, this is the non-error case */
    540     if (ok == 1)
    541         return obj;
    542 error:
    543     Py_XDECREF(obj);
    544     return NULL;
    545 }
    546 
    547 /************************************************************************/
    548 /*****************  Field rendering functions  **************************/
    549 /************************************************************************/
    550 
    551 /*
    552     render_field() is the main function in this section.  It takes the
    553     field object and field specification string generated by
    554     get_field_and_spec, and renders the field into the output string.
    555 
    556     render_field calls fieldobj.__format__(format_spec) method, and
    557     appends to the output.
    558 */
    559 static int
    560 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
    561 {
    562     int ok = 0;
    563     PyObject *result = NULL;
    564     PyObject *format_spec_object = NULL;
    565     PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
    566     STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
    567             format_spec->ptr : NULL;
    568     Py_ssize_t format_spec_len = format_spec->ptr ?
    569             format_spec->end - format_spec->ptr : 0;
    570 
    571     /* If we know the type exactly, skip the lookup of __format__ and just
    572        call the formatter directly. */
    573 #if STRINGLIB_IS_UNICODE
    574     if (PyUnicode_CheckExact(fieldobj))
    575         formatter = _PyUnicode_FormatAdvanced;
    576     /* Unfortunately, there's a problem with checking for int, long,
    577        and float here.  If we're being included as unicode, their
    578        formatters expect string format_spec args.  For now, just skip
    579        this optimization for unicode.  This could be fixed, but it's a
    580        hassle. */
    581 #else
    582     if (PyString_CheckExact(fieldobj))
    583         formatter = _PyBytes_FormatAdvanced;
    584     else if (PyInt_CheckExact(fieldobj))
    585         formatter =_PyInt_FormatAdvanced;
    586     else if (PyLong_CheckExact(fieldobj))
    587         formatter =_PyLong_FormatAdvanced;
    588     else if (PyFloat_CheckExact(fieldobj))
    589         formatter = _PyFloat_FormatAdvanced;
    590 #endif
    591 
    592     if (formatter) {
    593         /* we know exactly which formatter will be called when __format__ is
    594            looked up, so call it directly, instead. */
    595         result = formatter(fieldobj, format_spec_start, format_spec_len);
    596     }
    597     else {
    598         /* We need to create an object out of the pointers we have, because
    599            __format__ takes a string/unicode object for format_spec. */
    600         format_spec_object = STRINGLIB_NEW(format_spec_start,
    601                                            format_spec_len);
    602         if (format_spec_object == NULL)
    603             goto done;
    604 
    605         result = PyObject_Format(fieldobj, format_spec_object);
    606     }
    607     if (result == NULL)
    608         goto done;
    609 
    610 #if PY_VERSION_HEX >= 0x03000000
    611     assert(PyUnicode_Check(result));
    612 #else
    613     assert(PyString_Check(result) || PyUnicode_Check(result));
    614 
    615     /* Convert result to our type.  We could be str, and result could
    616        be unicode */
    617     {
    618         PyObject *tmp = STRINGLIB_TOSTR(result);
    619         if (tmp == NULL)
    620             goto done;
    621         Py_DECREF(result);
    622         result = tmp;
    623     }
    624 #endif
    625 
    626     ok = output_data(output,
    627                      STRINGLIB_STR(result), STRINGLIB_LEN(result));
    628 done:
    629     Py_XDECREF(format_spec_object);
    630     Py_XDECREF(result);
    631     return ok;
    632 }
    633 
    634 static int
    635 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
    636             STRINGLIB_CHAR *conversion)
    637 {
    638     /* Note this function works if the field name is zero length,
    639        which is good.  Zero length field names are handled later, in
    640        field_name_split. */
    641 
    642     STRINGLIB_CHAR c = 0;
    643 
    644     /* initialize these, as they may be empty */
    645     *conversion = '\0';
    646     SubString_init(format_spec, NULL, 0);
    647 
    648     /* Search for the field name.  it's terminated by the end of
    649        the string, or a ':' or '!' */
    650     field_name->ptr = str->ptr;
    651     while (str->ptr < str->end) {
    652         switch (c = *(str->ptr++)) {
    653         case ':':
    654         case '!':
    655             break;
    656         default:
    657             continue;
    658         }
    659         break;
    660     }
    661 
    662     if (c == '!' || c == ':') {
    663         /* we have a format specifier and/or a conversion */
    664         /* don't include the last character */
    665         field_name->end = str->ptr-1;
    666 
    667         /* the format specifier is the rest of the string */
    668         format_spec->ptr = str->ptr;
    669         format_spec->end = str->end;
    670 
    671         /* see if there's a conversion specifier */
    672         if (c == '!') {
    673             /* there must be another character present */
    674             if (format_spec->ptr >= format_spec->end) {
    675                 PyErr_SetString(PyExc_ValueError,
    676                                 "end of format while looking for conversion "
    677                                 "specifier");
    678                 return 0;
    679             }
    680             *conversion = *(format_spec->ptr++);
    681 
    682             /* if there is another character, it must be a colon */
    683             if (format_spec->ptr < format_spec->end) {
    684                 c = *(format_spec->ptr++);
    685                 if (c != ':') {
    686                     PyErr_SetString(PyExc_ValueError,
    687                                     "expected ':' after format specifier");
    688                     return 0;
    689                 }
    690             }
    691         }
    692     }
    693     else
    694         /* end of string, there's no format_spec or conversion */
    695         field_name->end = str->ptr;
    696 
    697     return 1;
    698 }
    699 
    700 /************************************************************************/
    701 /******* Output string allocation and escape-to-markup processing  ******/
    702 /************************************************************************/
    703 
    704 /* MarkupIterator breaks the string into pieces of either literal
    705    text, or things inside {} that need to be marked up.  it is
    706    designed to make it easy to wrap a Python iterator around it, for
    707    use with the Formatter class */
    708 
    709 typedef struct {
    710     SubString str;
    711 } MarkupIterator;
    712 
    713 static int
    714 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
    715 {
    716     SubString_init(&self->str, ptr, len);
    717     return 1;
    718 }
    719 
    720 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
    721    string (or something to be expanded) */
    722 static int
    723 MarkupIterator_next(MarkupIterator *self, SubString *literal,
    724                     int *field_present, SubString *field_name,
    725                     SubString *format_spec, STRINGLIB_CHAR *conversion,
    726                     int *format_spec_needs_expanding)
    727 {
    728     int at_end;
    729     STRINGLIB_CHAR c = 0;
    730     STRINGLIB_CHAR *start;
    731     int count;
    732     Py_ssize_t len;
    733     int markup_follows = 0;
    734 
    735     /* initialize all of the output variables */
    736     SubString_init(literal, NULL, 0);
    737     SubString_init(field_name, NULL, 0);
    738     SubString_init(format_spec, NULL, 0);
    739     *conversion = '\0';
    740     *format_spec_needs_expanding = 0;
    741     *field_present = 0;
    742 
    743     /* No more input, end of iterator.  This is the normal exit
    744        path. */
    745     if (self->str.ptr >= self->str.end)
    746         return 1;
    747 
    748     start = self->str.ptr;
    749 
    750     /* First read any literal text. Read until the end of string, an
    751        escaped '{' or '}', or an unescaped '{'.  In order to never
    752        allocate memory and so I can just pass pointers around, if
    753        there's an escaped '{' or '}' then we'll return the literal
    754        including the brace, but no format object.  The next time
    755        through, we'll return the rest of the literal, skipping past
    756        the second consecutive brace. */
    757     while (self->str.ptr < self->str.end) {
    758         switch (c = *(self->str.ptr++)) {
    759         case '{':
    760         case '}':
    761             markup_follows = 1;
    762             break;
    763         default:
    764             continue;
    765         }
    766         break;
    767     }
    768 
    769     at_end = self->str.ptr >= self->str.end;
    770     len = self->str.ptr - start;
    771 
    772     if ((c == '}') && (at_end || (c != *self->str.ptr))) {
    773         PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
    774                         "in format string");
    775         return 0;
    776     }
    777     if (at_end && c == '{') {
    778         PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
    779                         "in format string");
    780         return 0;
    781     }
    782     if (!at_end) {
    783         if (c == *self->str.ptr) {
    784             /* escaped } or {, skip it in the input.  there is no
    785                markup object following us, just this literal text */
    786             self->str.ptr++;
    787             markup_follows = 0;
    788         }
    789         else
    790             len--;
    791     }
    792 
    793     /* record the literal text */
    794     literal->ptr = start;
    795     literal->end = start + len;
    796 
    797     if (!markup_follows)
    798         return 2;
    799 
    800     /* this is markup, find the end of the string by counting nested
    801        braces.  note that this prohibits escaped braces, so that
    802        format_specs cannot have braces in them. */
    803     *field_present = 1;
    804     count = 1;
    805 
    806     start = self->str.ptr;
    807 
    808     /* we know we can't have a zero length string, so don't worry
    809        about that case */
    810     while (self->str.ptr < self->str.end) {
    811         switch (c = *(self->str.ptr++)) {
    812         case '{':
    813             /* the format spec needs to be recursively expanded.
    814                this is an optimization, and not strictly needed */
    815             *format_spec_needs_expanding = 1;
    816             count++;
    817             break;
    818         case '}':
    819             count--;
    820             if (count <= 0) {
    821                 /* we're done.  parse and get out */
    822                 SubString s;
    823 
    824                 SubString_init(&s, start, self->str.ptr - 1 - start);
    825                 if (parse_field(&s, field_name, format_spec, conversion) == 0)
    826                     return 0;
    827 
    828                 /* success */
    829                 return 2;
    830             }
    831             break;
    832         }
    833     }
    834 
    835     /* end of string while searching for matching '}' */
    836     PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
    837     return 0;
    838 }
    839 
    840 
    841 /* do the !r or !s conversion on obj */
    842 static PyObject *
    843 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
    844 {
    845     /* XXX in pre-3.0, do we need to convert this to unicode, since it
    846        might have returned a string? */
    847     switch (conversion) {
    848     case 'r':
    849         return PyObject_Repr(obj);
    850     case 's':
    851         return STRINGLIB_TOSTR(obj);
    852     default:
    853         if (conversion > 32 && conversion < 127) {
    854                 /* It's the ASCII subrange; casting to char is safe
    855                    (assuming the execution character set is an ASCII
    856                    superset). */
    857                 PyErr_Format(PyExc_ValueError,
    858                      "Unknown conversion specifier %c",
    859                      (char)conversion);
    860         } else
    861                 PyErr_Format(PyExc_ValueError,
    862                      "Unknown conversion specifier \\x%x",
    863                      (unsigned int)conversion);
    864         return NULL;
    865     }
    866 }
    867 
    868 /* given:
    869 
    870    {field_name!conversion:format_spec}
    871 
    872    compute the result and write it to output.
    873    format_spec_needs_expanding is an optimization.  if it's false,
    874    just output the string directly, otherwise recursively expand the
    875    format_spec string.
    876 
    877    field_name is allowed to be zero length, in which case we
    878    are doing auto field numbering.
    879 */
    880 
    881 static int
    882 output_markup(SubString *field_name, SubString *format_spec,
    883               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
    884               OutputString *output, PyObject *args, PyObject *kwargs,
    885               int recursion_depth, AutoNumber *auto_number)
    886 {
    887     PyObject *tmp = NULL;
    888     PyObject *fieldobj = NULL;
    889     SubString expanded_format_spec;
    890     SubString *actual_format_spec;
    891     int result = 0;
    892 
    893     /* convert field_name to an object */
    894     fieldobj = get_field_object(field_name, args, kwargs, auto_number);
    895     if (fieldobj == NULL)
    896         goto done;
    897 
    898     if (conversion != '\0') {
    899         tmp = do_conversion(fieldobj, conversion);
    900         if (tmp == NULL)
    901             goto done;
    902 
    903         /* do the assignment, transferring ownership: fieldobj = tmp */
    904         Py_DECREF(fieldobj);
    905         fieldobj = tmp;
    906         tmp = NULL;
    907     }
    908 
    909     /* if needed, recurively compute the format_spec */
    910     if (format_spec_needs_expanding) {
    911         tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
    912                            auto_number);
    913         if (tmp == NULL)
    914             goto done;
    915 
    916         /* note that in the case we're expanding the format string,
    917            tmp must be kept around until after the call to
    918            render_field. */
    919         SubString_init(&expanded_format_spec,
    920                        STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
    921         actual_format_spec = &expanded_format_spec;
    922     }
    923     else
    924         actual_format_spec = format_spec;
    925 
    926     if (render_field(fieldobj, actual_format_spec, output) == 0)
    927         goto done;
    928 
    929     result = 1;
    930 
    931 done:
    932     Py_XDECREF(fieldobj);
    933     Py_XDECREF(tmp);
    934 
    935     return result;
    936 }
    937 
    938 /*
    939     do_markup is the top-level loop for the format() method.  It
    940     searches through the format string for escapes to markup codes, and
    941     calls other functions to move non-markup text to the output,
    942     and to perform the markup to the output.
    943 */
    944 static int
    945 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
    946           OutputString *output, int recursion_depth, AutoNumber *auto_number)
    947 {
    948     MarkupIterator iter;
    949     int format_spec_needs_expanding;
    950     int result;
    951     int field_present;
    952     SubString literal;
    953     SubString field_name;
    954     SubString format_spec;
    955     STRINGLIB_CHAR conversion;
    956 
    957     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
    958     while ((result = MarkupIterator_next(&iter, &literal, &field_present,
    959                                          &field_name, &format_spec,
    960                                          &conversion,
    961                                          &format_spec_needs_expanding)) == 2) {
    962         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
    963             return 0;
    964         if (field_present)
    965             if (!output_markup(&field_name, &format_spec,
    966                                format_spec_needs_expanding, conversion, output,
    967                                args, kwargs, recursion_depth, auto_number))
    968                 return 0;
    969     }
    970     return result;
    971 }
    972 
    973 
    974 /*
    975     build_string allocates the output string and then
    976     calls do_markup to do the heavy lifting.
    977 */
    978 static PyObject *
    979 build_string(SubString *input, PyObject *args, PyObject *kwargs,
    980              int recursion_depth, AutoNumber *auto_number)
    981 {
    982     OutputString output;
    983     PyObject *result = NULL;
    984     Py_ssize_t count;
    985 
    986     output.obj = NULL; /* needed so cleanup code always works */
    987 
    988     /* check the recursion level */
    989     if (recursion_depth <= 0) {
    990         PyErr_SetString(PyExc_ValueError,
    991                         "Max string recursion exceeded");
    992         goto done;
    993     }
    994 
    995     /* initial size is the length of the format string, plus the size
    996        increment.  seems like a reasonable default */
    997     if (!output_initialize(&output,
    998                            input->end - input->ptr +
    999                            INITIAL_SIZE_INCREMENT))
   1000         goto done;
   1001 
   1002     if (!do_markup(input, args, kwargs, &output, recursion_depth,
   1003                    auto_number)) {
   1004         goto done;
   1005     }
   1006 
   1007     count = output.ptr - STRINGLIB_STR(output.obj);
   1008     if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
   1009         goto done;
   1010     }
   1011 
   1012     /* transfer ownership to result */
   1013     result = output.obj;
   1014     output.obj = NULL;
   1015 
   1016 done:
   1017     Py_XDECREF(output.obj);
   1018     return result;
   1019 }
   1020 
   1021 /************************************************************************/
   1022 /*********** main routine ***********************************************/
   1023 /************************************************************************/
   1024 
   1025 /* this is the main entry point */
   1026 static PyObject *
   1027 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
   1028 {
   1029     SubString input;
   1030 
   1031     /* PEP 3101 says only 2 levels, so that
   1032        "{0:{1}}".format('abc', 's')            # works
   1033        "{0:{1:{2}}}".format('abc', 's', '')    # fails
   1034     */
   1035     int recursion_depth = 2;
   1036 
   1037     AutoNumber auto_number;
   1038 
   1039     AutoNumber_Init(&auto_number);
   1040     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
   1041     return build_string(&input, args, kwargs, recursion_depth, &auto_number);
   1042 }
   1043 
   1044 
   1045 
   1046 /************************************************************************/
   1047 /*********** formatteriterator ******************************************/
   1048 /************************************************************************/
   1049 
   1050 /* This is used to implement string.Formatter.vparse().  It exists so
   1051    Formatter can share code with the built in unicode.format() method.
   1052    It's really just a wrapper around MarkupIterator that is callable
   1053    from Python. */
   1054 
   1055 typedef struct {
   1056     PyObject_HEAD
   1057 
   1058     STRINGLIB_OBJECT *str;
   1059 
   1060     MarkupIterator it_markup;
   1061 } formatteriterobject;
   1062 
   1063 static void
   1064 formatteriter_dealloc(formatteriterobject *it)
   1065 {
   1066     Py_XDECREF(it->str);
   1067     PyObject_FREE(it);
   1068 }
   1069 
   1070 /* returns a tuple:
   1071    (literal, field_name, format_spec, conversion)
   1072 
   1073    literal is any literal text to output.  might be zero length
   1074    field_name is the string before the ':'.  might be None
   1075    format_spec is the string after the ':'.  mibht be None
   1076    conversion is either None, or the string after the '!'
   1077 */
   1078 static PyObject *
   1079 formatteriter_next(formatteriterobject *it)
   1080 {
   1081     SubString literal;
   1082     SubString field_name;
   1083     SubString format_spec;
   1084     STRINGLIB_CHAR conversion;
   1085     int format_spec_needs_expanding;
   1086     int field_present;
   1087     int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
   1088                                      &field_name, &format_spec, &conversion,
   1089                                      &format_spec_needs_expanding);
   1090 
   1091     /* all of the SubString objects point into it->str, so no
   1092        memory management needs to be done on them */
   1093     assert(0 <= result && result <= 2);
   1094     if (result == 0 || result == 1)
   1095         /* if 0, error has already been set, if 1, iterator is empty */
   1096         return NULL;
   1097     else {
   1098         PyObject *literal_str = NULL;
   1099         PyObject *field_name_str = NULL;
   1100         PyObject *format_spec_str = NULL;
   1101         PyObject *conversion_str = NULL;
   1102         PyObject *tuple = NULL;
   1103 
   1104         literal_str = SubString_new_object(&literal);
   1105         if (literal_str == NULL)
   1106             goto done;
   1107 
   1108         field_name_str = SubString_new_object(&field_name);
   1109         if (field_name_str == NULL)
   1110             goto done;
   1111 
   1112         /* if field_name is non-zero length, return a string for
   1113            format_spec (even if zero length), else return None */
   1114         format_spec_str = (field_present ?
   1115                            SubString_new_object_or_empty :
   1116                            SubString_new_object)(&format_spec);
   1117         if (format_spec_str == NULL)
   1118             goto done;
   1119 
   1120         /* if the conversion is not specified, return a None,
   1121            otherwise create a one length string with the conversion
   1122            character */
   1123         if (conversion == '\0') {
   1124             conversion_str = Py_None;
   1125             Py_INCREF(conversion_str);
   1126         }
   1127         else
   1128             conversion_str = STRINGLIB_NEW(&conversion, 1);
   1129         if (conversion_str == NULL)
   1130             goto done;
   1131 
   1132         tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
   1133                              conversion_str);
   1134     done:
   1135         Py_XDECREF(literal_str);
   1136         Py_XDECREF(field_name_str);
   1137         Py_XDECREF(format_spec_str);
   1138         Py_XDECREF(conversion_str);
   1139         return tuple;
   1140     }
   1141 }
   1142 
   1143 static PyMethodDef formatteriter_methods[] = {
   1144     {NULL,              NULL}           /* sentinel */
   1145 };
   1146 
   1147 static PyTypeObject PyFormatterIter_Type = {
   1148     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   1149     "formatteriterator",                /* tp_name */
   1150     sizeof(formatteriterobject),        /* tp_basicsize */
   1151     0,                                  /* tp_itemsize */
   1152     /* methods */
   1153     (destructor)formatteriter_dealloc,  /* tp_dealloc */
   1154     0,                                  /* tp_print */
   1155     0,                                  /* tp_getattr */
   1156     0,                                  /* tp_setattr */
   1157     0,                                  /* tp_compare */
   1158     0,                                  /* tp_repr */
   1159     0,                                  /* tp_as_number */
   1160     0,                                  /* tp_as_sequence */
   1161     0,                                  /* tp_as_mapping */
   1162     0,                                  /* tp_hash */
   1163     0,                                  /* tp_call */
   1164     0,                                  /* tp_str */
   1165     PyObject_GenericGetAttr,            /* tp_getattro */
   1166     0,                                  /* tp_setattro */
   1167     0,                                  /* tp_as_buffer */
   1168     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
   1169     0,                                  /* tp_doc */
   1170     0,                                  /* tp_traverse */
   1171     0,                                  /* tp_clear */
   1172     0,                                  /* tp_richcompare */
   1173     0,                                  /* tp_weaklistoffset */
   1174     PyObject_SelfIter,                  /* tp_iter */
   1175     (iternextfunc)formatteriter_next,   /* tp_iternext */
   1176     formatteriter_methods,              /* tp_methods */
   1177     0,
   1178 };
   1179 
   1180 /* unicode_formatter_parser is used to implement
   1181    string.Formatter.vformat.  it parses a string and returns tuples
   1182    describing the parsed elements.  It's a wrapper around
   1183    stringlib/string_format.h's MarkupIterator */
   1184 static PyObject *
   1185 formatter_parser(STRINGLIB_OBJECT *self)
   1186 {
   1187     formatteriterobject *it;
   1188 
   1189     it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
   1190     if (it == NULL)
   1191         return NULL;
   1192 
   1193     /* take ownership, give the object to the iterator */
   1194     Py_INCREF(self);
   1195     it->str = self;
   1196 
   1197     /* initialize the contained MarkupIterator */
   1198     MarkupIterator_init(&it->it_markup,
   1199                         STRINGLIB_STR(self),
   1200                         STRINGLIB_LEN(self));
   1201 
   1202     return (PyObject *)it;
   1203 }
   1204 
   1205 
   1206 /************************************************************************/
   1207 /*********** fieldnameiterator ******************************************/
   1208 /************************************************************************/
   1209 
   1210 
   1211 /* This is used to implement string.Formatter.vparse().  It parses the
   1212    field name into attribute and item values.  It's a Python-callable
   1213    wrapper around FieldNameIterator */
   1214 
   1215 typedef struct {
   1216     PyObject_HEAD
   1217 
   1218     STRINGLIB_OBJECT *str;
   1219 
   1220     FieldNameIterator it_field;
   1221 } fieldnameiterobject;
   1222 
   1223 static void
   1224 fieldnameiter_dealloc(fieldnameiterobject *it)
   1225 {
   1226     Py_XDECREF(it->str);
   1227     PyObject_FREE(it);
   1228 }
   1229 
   1230 /* returns a tuple:
   1231    (is_attr, value)
   1232    is_attr is true if we used attribute syntax (e.g., '.foo')
   1233               false if we used index syntax (e.g., '[foo]')
   1234    value is an integer or string
   1235 */
   1236 static PyObject *
   1237 fieldnameiter_next(fieldnameiterobject *it)
   1238 {
   1239     int result;
   1240     int is_attr;
   1241     Py_ssize_t idx;
   1242     SubString name;
   1243 
   1244     result = FieldNameIterator_next(&it->it_field, &is_attr,
   1245                                     &idx, &name);
   1246     if (result == 0 || result == 1)
   1247         /* if 0, error has already been set, if 1, iterator is empty */
   1248         return NULL;
   1249     else {
   1250         PyObject* result = NULL;
   1251         PyObject* is_attr_obj = NULL;
   1252         PyObject* obj = NULL;
   1253 
   1254         is_attr_obj = PyBool_FromLong(is_attr);
   1255         if (is_attr_obj == NULL)
   1256             goto done;
   1257 
   1258         /* either an integer or a string */
   1259         if (idx != -1)
   1260             obj = PyLong_FromSsize_t(idx);
   1261         else
   1262             obj = SubString_new_object(&name);
   1263         if (obj == NULL)
   1264             goto done;
   1265 
   1266         /* return a tuple of values */
   1267         result = PyTuple_Pack(2, is_attr_obj, obj);
   1268 
   1269     done:
   1270         Py_XDECREF(is_attr_obj);
   1271         Py_XDECREF(obj);
   1272         return result;
   1273     }
   1274 }
   1275 
   1276 static PyMethodDef fieldnameiter_methods[] = {
   1277     {NULL,              NULL}           /* sentinel */
   1278 };
   1279 
   1280 static PyTypeObject PyFieldNameIter_Type = {
   1281     PyVarObject_HEAD_INIT(&PyType_Type, 0)
   1282     "fieldnameiterator",                /* tp_name */
   1283     sizeof(fieldnameiterobject),        /* tp_basicsize */
   1284     0,                                  /* tp_itemsize */
   1285     /* methods */
   1286     (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
   1287     0,                                  /* tp_print */
   1288     0,                                  /* tp_getattr */
   1289     0,                                  /* tp_setattr */
   1290     0,                                  /* tp_compare */
   1291     0,                                  /* tp_repr */
   1292     0,                                  /* tp_as_number */
   1293     0,                                  /* tp_as_sequence */
   1294     0,                                  /* tp_as_mapping */
   1295     0,                                  /* tp_hash */
   1296     0,                                  /* tp_call */
   1297     0,                                  /* tp_str */
   1298     PyObject_GenericGetAttr,            /* tp_getattro */
   1299     0,                                  /* tp_setattro */
   1300     0,                                  /* tp_as_buffer */
   1301     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
   1302     0,                                  /* tp_doc */
   1303     0,                                  /* tp_traverse */
   1304     0,                                  /* tp_clear */
   1305     0,                                  /* tp_richcompare */
   1306     0,                                  /* tp_weaklistoffset */
   1307     PyObject_SelfIter,                  /* tp_iter */
   1308     (iternextfunc)fieldnameiter_next,   /* tp_iternext */
   1309     fieldnameiter_methods,              /* tp_methods */
   1310     0};
   1311 
   1312 /* unicode_formatter_field_name_split is used to implement
   1313    string.Formatter.vformat.  it takes an PEP 3101 "field name", and
   1314    returns a tuple of (first, rest): "first", the part before the
   1315    first '.' or '['; and "rest", an iterator for the rest of the field
   1316    name.  it's a wrapper around stringlib/string_format.h's
   1317    field_name_split.  The iterator it returns is a
   1318    FieldNameIterator */
   1319 static PyObject *
   1320 formatter_field_name_split(STRINGLIB_OBJECT *self)
   1321 {
   1322     SubString first;
   1323     Py_ssize_t first_idx;
   1324     fieldnameiterobject *it;
   1325 
   1326     PyObject *first_obj = NULL;
   1327     PyObject *result = NULL;
   1328 
   1329     it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
   1330     if (it == NULL)
   1331         return NULL;
   1332 
   1333     /* take ownership, give the object to the iterator.  this is
   1334        just to keep the field_name alive */
   1335     Py_INCREF(self);
   1336     it->str = self;
   1337 
   1338     /* Pass in auto_number = NULL. We'll return an empty string for
   1339        first_obj in that case. */
   1340     if (!field_name_split(STRINGLIB_STR(self),
   1341                           STRINGLIB_LEN(self),
   1342                           &first, &first_idx, &it->it_field, NULL))
   1343         goto done;
   1344 
   1345     /* first becomes an integer, if possible; else a string */
   1346     if (first_idx != -1)
   1347         first_obj = PyLong_FromSsize_t(first_idx);
   1348     else
   1349         /* convert "first" into a string object */
   1350         first_obj = SubString_new_object(&first);
   1351     if (first_obj == NULL)
   1352         goto done;
   1353 
   1354     /* return a tuple of values */
   1355     result = PyTuple_Pack(2, first_obj, it);
   1356 
   1357 done:
   1358     Py_XDECREF(it);
   1359     Py_XDECREF(first_obj);
   1360     return result;
   1361 }
   1362