Home | History | Annotate | Download | only in Modules
      1 #include "Python.h"
      2 #include "structmember.h"
      3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
      4 #define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
      5 #endif
      6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
      7 typedef int Py_ssize_t;
      8 #define PY_SSIZE_T_MAX INT_MAX
      9 #define PY_SSIZE_T_MIN INT_MIN
     10 #define PyInt_FromSsize_t PyInt_FromLong
     11 #define PyInt_AsSsize_t PyInt_AsLong
     12 #endif
     13 #ifndef Py_IS_FINITE
     14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
     15 #endif
     16 
     17 #ifdef __GNUC__
     18 #define UNUSED __attribute__((__unused__))
     19 #else
     20 #define UNUSED
     21 #endif
     22 
     23 #define DEFAULT_ENCODING "utf-8"
     24 
     25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
     26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
     27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
     28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
     29 
     30 static PyTypeObject PyScannerType;
     31 static PyTypeObject PyEncoderType;
     32 
     33 typedef struct _PyScannerObject {
     34     PyObject_HEAD
     35     PyObject *encoding;
     36     PyObject *strict;
     37     PyObject *object_hook;
     38     PyObject *pairs_hook;
     39     PyObject *parse_float;
     40     PyObject *parse_int;
     41     PyObject *parse_constant;
     42 } PyScannerObject;
     43 
     44 static PyMemberDef scanner_members[] = {
     45     {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
     46     {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
     47     {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
     48     {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
     49     {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
     50     {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
     51     {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
     52     {NULL}
     53 };
     54 
     55 typedef struct _PyEncoderObject {
     56     PyObject_HEAD
     57     PyObject *markers;
     58     PyObject *defaultfn;
     59     PyObject *encoder;
     60     PyObject *indent;
     61     PyObject *key_separator;
     62     PyObject *item_separator;
     63     PyObject *sort_keys;
     64     PyObject *skipkeys;
     65     int fast_encode;
     66     int allow_nan;
     67 } PyEncoderObject;
     68 
     69 static PyMemberDef encoder_members[] = {
     70     {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
     71     {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
     72     {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
     73     {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
     74     {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
     75     {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
     76     {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
     77     {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
     78     {NULL}
     79 };
     80 
     81 static Py_ssize_t
     82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
     83 static PyObject *
     84 ascii_escape_unicode(PyObject *pystr);
     85 static PyObject *
     86 ascii_escape_str(PyObject *pystr);
     87 static PyObject *
     88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
     89 void init_json(void);
     90 static PyObject *
     91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
     92 static PyObject *
     93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
     94 static PyObject *
     95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
     96 static PyObject *
     97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
     98 static int
     99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
    100 static void
    101 scanner_dealloc(PyObject *self);
    102 static int
    103 scanner_clear(PyObject *self);
    104 static PyObject *
    105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
    106 static int
    107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
    108 static void
    109 encoder_dealloc(PyObject *self);
    110 static int
    111 encoder_clear(PyObject *self);
    112 static int
    113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
    114 static int
    115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
    116 static int
    117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
    118 static PyObject *
    119 _encoded_const(PyObject *obj);
    120 static void
    121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
    122 static PyObject *
    123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
    124 static int
    125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
    126 static PyObject *
    127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
    128 static PyObject *
    129 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
    130 
    131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
    132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
    133 
    134 #define MIN_EXPANSION 6
    135 #ifdef Py_UNICODE_WIDE
    136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
    137 #else
    138 #define MAX_EXPANSION MIN_EXPANSION
    139 #endif
    140 
    141 static int
    142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
    143 {
    144     /* PyObject to Py_ssize_t converter */
    145     *size_ptr = PyInt_AsSsize_t(o);
    146     if (*size_ptr == -1 && PyErr_Occurred())
    147         return 0;
    148     return 1;
    149 }
    150 
    151 static PyObject *
    152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
    153 {
    154     /* Py_ssize_t to PyObject converter */
    155     return PyInt_FromSsize_t(*size_ptr);
    156 }
    157 
    158 static Py_ssize_t
    159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
    160 {
    161     /* Escape unicode code point c to ASCII escape sequences
    162     in char *output. output must have at least 12 bytes unused to
    163     accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
    164     output[chars++] = '\\';
    165     switch (c) {
    166         case '\\': output[chars++] = (char)c; break;
    167         case '"': output[chars++] = (char)c; break;
    168         case '\b': output[chars++] = 'b'; break;
    169         case '\f': output[chars++] = 'f'; break;
    170         case '\n': output[chars++] = 'n'; break;
    171         case '\r': output[chars++] = 'r'; break;
    172         case '\t': output[chars++] = 't'; break;
    173         default:
    174 #ifdef Py_UNICODE_WIDE
    175             if (c >= 0x10000) {
    176                 /* UTF-16 surrogate pair */
    177                 Py_UNICODE v = c - 0x10000;
    178                 c = 0xd800 | ((v >> 10) & 0x3ff);
    179                 output[chars++] = 'u';
    180                 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
    181                 output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
    182                 output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
    183                 output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
    184                 c = 0xdc00 | (v & 0x3ff);
    185                 output[chars++] = '\\';
    186             }
    187 #endif
    188             output[chars++] = 'u';
    189             output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
    190             output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
    191             output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
    192             output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
    193     }
    194     return chars;
    195 }
    196 
    197 static PyObject *
    198 ascii_escape_unicode(PyObject *pystr)
    199 {
    200     /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
    201     Py_ssize_t i;
    202     Py_ssize_t input_chars;
    203     Py_ssize_t output_size;
    204     Py_ssize_t max_output_size;
    205     Py_ssize_t chars;
    206     PyObject *rval;
    207     char *output;
    208     Py_UNICODE *input_unicode;
    209 
    210     input_chars = PyUnicode_GET_SIZE(pystr);
    211     input_unicode = PyUnicode_AS_UNICODE(pystr);
    212 
    213     /* One char input can be up to 6 chars output, estimate 4 of these */
    214     output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
    215     max_output_size = 2 + (input_chars * MAX_EXPANSION);
    216     rval = PyString_FromStringAndSize(NULL, output_size);
    217     if (rval == NULL) {
    218         return NULL;
    219     }
    220     output = PyString_AS_STRING(rval);
    221     chars = 0;
    222     output[chars++] = '"';
    223     for (i = 0; i < input_chars; i++) {
    224         Py_UNICODE c = input_unicode[i];
    225         if (S_CHAR(c)) {
    226             output[chars++] = (char)c;
    227         }
    228         else {
    229             chars = ascii_escape_char(c, output, chars);
    230         }
    231         if (output_size - chars < (1 + MAX_EXPANSION)) {
    232             /* There's more than four, so let's resize by a lot */
    233             Py_ssize_t new_output_size = output_size * 2;
    234             /* This is an upper bound */
    235             if (new_output_size > max_output_size) {
    236                 new_output_size = max_output_size;
    237             }
    238             /* Make sure that the output size changed before resizing */
    239             if (new_output_size != output_size) {
    240                 output_size = new_output_size;
    241                 if (_PyString_Resize(&rval, output_size) == -1) {
    242                     return NULL;
    243                 }
    244                 output = PyString_AS_STRING(rval);
    245             }
    246         }
    247     }
    248     output[chars++] = '"';
    249     if (_PyString_Resize(&rval, chars) == -1) {
    250         return NULL;
    251     }
    252     return rval;
    253 }
    254 
    255 static PyObject *
    256 ascii_escape_str(PyObject *pystr)
    257 {
    258     /* Take a PyString pystr and return a new ASCII-only escaped PyString */
    259     Py_ssize_t i;
    260     Py_ssize_t input_chars;
    261     Py_ssize_t output_size;
    262     Py_ssize_t chars;
    263     PyObject *rval;
    264     char *output;
    265     char *input_str;
    266 
    267     input_chars = PyString_GET_SIZE(pystr);
    268     input_str = PyString_AS_STRING(pystr);
    269 
    270     /* Fast path for a string that's already ASCII */
    271     for (i = 0; i < input_chars; i++) {
    272         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
    273         if (!S_CHAR(c)) {
    274             /* If we have to escape something, scan the string for unicode */
    275             Py_ssize_t j;
    276             for (j = i; j < input_chars; j++) {
    277                 c = (Py_UNICODE)(unsigned char)input_str[j];
    278                 if (c > 0x7f) {
    279                     /* We hit a non-ASCII character, bail to unicode mode */
    280                     PyObject *uni;
    281                     uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
    282                     if (uni == NULL) {
    283                         return NULL;
    284                     }
    285                     rval = ascii_escape_unicode(uni);
    286                     Py_DECREF(uni);
    287                     return rval;
    288                 }
    289             }
    290             break;
    291         }
    292     }
    293 
    294     if (i == input_chars) {
    295         /* Input is already ASCII */
    296         output_size = 2 + input_chars;
    297     }
    298     else {
    299         /* One char input can be up to 6 chars output, estimate 4 of these */
    300         output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
    301     }
    302     rval = PyString_FromStringAndSize(NULL, output_size);
    303     if (rval == NULL) {
    304         return NULL;
    305     }
    306     output = PyString_AS_STRING(rval);
    307     output[0] = '"';
    308 
    309     /* We know that everything up to i is ASCII already */
    310     chars = i + 1;
    311     memcpy(&output[1], input_str, i);
    312 
    313     for (; i < input_chars; i++) {
    314         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
    315         if (S_CHAR(c)) {
    316             output[chars++] = (char)c;
    317         }
    318         else {
    319             chars = ascii_escape_char(c, output, chars);
    320         }
    321         /* An ASCII char can't possibly expand to a surrogate! */
    322         if (output_size - chars < (1 + MIN_EXPANSION)) {
    323             /* There's more than four, so let's resize by a lot */
    324             output_size *= 2;
    325             if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
    326                 output_size = 2 + (input_chars * MIN_EXPANSION);
    327             }
    328             if (_PyString_Resize(&rval, output_size) == -1) {
    329                 return NULL;
    330             }
    331             output = PyString_AS_STRING(rval);
    332         }
    333     }
    334     output[chars++] = '"';
    335     if (_PyString_Resize(&rval, chars) == -1) {
    336         return NULL;
    337     }
    338     return rval;
    339 }
    340 
    341 static void
    342 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
    343 {
    344     /* Use the Python function json.decoder.errmsg to raise a nice
    345     looking ValueError exception */
    346     static PyObject *errmsg_fn = NULL;
    347     PyObject *pymsg;
    348     if (errmsg_fn == NULL) {
    349         PyObject *decoder = PyImport_ImportModule("json.decoder");
    350         if (decoder == NULL)
    351             return;
    352         errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
    353         Py_DECREF(decoder);
    354         if (errmsg_fn == NULL)
    355             return;
    356     }
    357     pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
    358     if (pymsg) {
    359         PyErr_SetObject(PyExc_ValueError, pymsg);
    360         Py_DECREF(pymsg);
    361     }
    362 }
    363 
    364 static PyObject *
    365 join_list_unicode(PyObject *lst)
    366 {
    367     /* return u''.join(lst) */
    368     static PyObject *joinfn = NULL;
    369     if (joinfn == NULL) {
    370         PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
    371         if (ustr == NULL)
    372             return NULL;
    373 
    374         joinfn = PyObject_GetAttrString(ustr, "join");
    375         Py_DECREF(ustr);
    376         if (joinfn == NULL)
    377             return NULL;
    378     }
    379     return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
    380 }
    381 
    382 static PyObject *
    383 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
    384     /* return (rval, idx) tuple, stealing reference to rval */
    385     PyObject *tpl;
    386     PyObject *pyidx;
    387     /*
    388     steal a reference to rval, returns (rval, idx)
    389     */
    390     if (rval == NULL) {
    391         return NULL;
    392     }
    393     pyidx = PyInt_FromSsize_t(idx);
    394     if (pyidx == NULL) {
    395         Py_DECREF(rval);
    396         return NULL;
    397     }
    398     tpl = PyTuple_New(2);
    399     if (tpl == NULL) {
    400         Py_DECREF(pyidx);
    401         Py_DECREF(rval);
    402         return NULL;
    403     }
    404     PyTuple_SET_ITEM(tpl, 0, rval);
    405     PyTuple_SET_ITEM(tpl, 1, pyidx);
    406     return tpl;
    407 }
    408 
    409 static PyObject *
    410 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
    411 {
    412     /* Read the JSON string from PyString pystr.
    413     end is the index of the first character after the quote.
    414     encoding is the encoding of pystr (must be an ASCII superset)
    415     if strict is zero then literal control characters are allowed
    416     *next_end_ptr is a return-by-reference index of the character
    417         after the end quote
    418 
    419     Return value is a new PyString (if ASCII-only) or PyUnicode
    420     */
    421     PyObject *rval;
    422     Py_ssize_t len = PyString_GET_SIZE(pystr);
    423     Py_ssize_t begin = end - 1;
    424     Py_ssize_t next;
    425     char *buf = PyString_AS_STRING(pystr);
    426     PyObject *chunks = PyList_New(0);
    427     if (chunks == NULL) {
    428         goto bail;
    429     }
    430     if (end < 0 || len <= end) {
    431         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
    432         goto bail;
    433     }
    434     while (1) {
    435         /* Find the end of the string or the next escape */
    436         Py_UNICODE c = 0;
    437         PyObject *chunk = NULL;
    438         for (next = end; next < len; next++) {
    439             c = (unsigned char)buf[next];
    440             if (c == '"' || c == '\\') {
    441                 break;
    442             }
    443             else if (strict && c <= 0x1f) {
    444                 raise_errmsg("Invalid control character at", pystr, next);
    445                 goto bail;
    446             }
    447         }
    448         if (!(c == '"' || c == '\\')) {
    449             raise_errmsg("Unterminated string starting at", pystr, begin);
    450             goto bail;
    451         }
    452         /* Pick up this chunk if it's not zero length */
    453         if (next != end) {
    454             PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
    455             if (strchunk == NULL) {
    456                 goto bail;
    457             }
    458             chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
    459             Py_DECREF(strchunk);
    460             if (chunk == NULL) {
    461                 goto bail;
    462             }
    463             if (PyList_Append(chunks, chunk)) {
    464                 Py_DECREF(chunk);
    465                 goto bail;
    466             }
    467             Py_DECREF(chunk);
    468         }
    469         next++;
    470         if (c == '"') {
    471             end = next;
    472             break;
    473         }
    474         if (next == len) {
    475             raise_errmsg("Unterminated string starting at", pystr, begin);
    476             goto bail;
    477         }
    478         c = buf[next];
    479         if (c != 'u') {
    480             /* Non-unicode backslash escapes */
    481             end = next + 1;
    482             switch (c) {
    483                 case '"': break;
    484                 case '\\': break;
    485                 case '/': break;
    486                 case 'b': c = '\b'; break;
    487                 case 'f': c = '\f'; break;
    488                 case 'n': c = '\n'; break;
    489                 case 'r': c = '\r'; break;
    490                 case 't': c = '\t'; break;
    491                 default: c = 0;
    492             }
    493             if (c == 0) {
    494                 raise_errmsg("Invalid \\escape", pystr, end - 2);
    495                 goto bail;
    496             }
    497         }
    498         else {
    499             c = 0;
    500             next++;
    501             end = next + 4;
    502             if (end >= len) {
    503                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
    504                 goto bail;
    505             }
    506             /* Decode 4 hex digits */
    507             for (; next < end; next++) {
    508                 Py_UNICODE digit = buf[next];
    509                 c <<= 4;
    510                 switch (digit) {
    511                     case '0': case '1': case '2': case '3': case '4':
    512                     case '5': case '6': case '7': case '8': case '9':
    513                         c |= (digit - '0'); break;
    514                     case 'a': case 'b': case 'c': case 'd': case 'e':
    515                     case 'f':
    516                         c |= (digit - 'a' + 10); break;
    517                     case 'A': case 'B': case 'C': case 'D': case 'E':
    518                     case 'F':
    519                         c |= (digit - 'A' + 10); break;
    520                     default:
    521                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
    522                         goto bail;
    523                 }
    524             }
    525 #ifdef Py_UNICODE_WIDE
    526             /* Surrogate pair */
    527             if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
    528                 buf[next++] == '\\' &&
    529                 buf[next++] == 'u') {
    530                 Py_UNICODE c2 = 0;
    531                 end += 6;
    532                 /* Decode 4 hex digits */
    533                 for (; next < end; next++) {
    534                     Py_UNICODE digit = buf[next];
    535                     c2 <<= 4;
    536                     switch (digit) {
    537                         case '0': case '1': case '2': case '3': case '4':
    538                         case '5': case '6': case '7': case '8': case '9':
    539                             c2 |= (digit - '0'); break;
    540                         case 'a': case 'b': case 'c': case 'd': case 'e':
    541                         case 'f':
    542                             c2 |= (digit - 'a' + 10); break;
    543                         case 'A': case 'B': case 'C': case 'D': case 'E':
    544                         case 'F':
    545                             c2 |= (digit - 'A' + 10); break;
    546                         default:
    547                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
    548                             goto bail;
    549                     }
    550                 }
    551                 if ((c2 & 0xfc00) == 0xdc00)
    552                     c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
    553                 else
    554                     end -= 6;
    555             }
    556 #endif
    557         }
    558         chunk = PyUnicode_FromUnicode(&c, 1);
    559         if (chunk == NULL) {
    560             goto bail;
    561         }
    562         if (PyList_Append(chunks, chunk)) {
    563             Py_DECREF(chunk);
    564             goto bail;
    565         }
    566         Py_DECREF(chunk);
    567     }
    568 
    569     rval = join_list_unicode(chunks);
    570     if (rval == NULL) {
    571         goto bail;
    572     }
    573     Py_CLEAR(chunks);
    574     *next_end_ptr = end;
    575     return rval;
    576 bail:
    577     *next_end_ptr = -1;
    578     Py_XDECREF(chunks);
    579     return NULL;
    580 }
    581 
    582 
    583 static PyObject *
    584 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
    585 {
    586     /* Read the JSON string from PyUnicode pystr.
    587     end is the index of the first character after the quote.
    588     if strict is zero then literal control characters are allowed
    589     *next_end_ptr is a return-by-reference index of the character
    590         after the end quote
    591 
    592     Return value is a new PyUnicode
    593     */
    594     PyObject *rval;
    595     Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
    596     Py_ssize_t begin = end - 1;
    597     Py_ssize_t next;
    598     const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
    599     PyObject *chunks = PyList_New(0);
    600     if (chunks == NULL) {
    601         goto bail;
    602     }
    603     if (end < 0 || len <= end) {
    604         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
    605         goto bail;
    606     }
    607     while (1) {
    608         /* Find the end of the string or the next escape */
    609         Py_UNICODE c = 0;
    610         PyObject *chunk = NULL;
    611         for (next = end; next < len; next++) {
    612             c = buf[next];
    613             if (c == '"' || c == '\\') {
    614                 break;
    615             }
    616             else if (strict && c <= 0x1f) {
    617                 raise_errmsg("Invalid control character at", pystr, next);
    618                 goto bail;
    619             }
    620         }
    621         if (!(c == '"' || c == '\\')) {
    622             raise_errmsg("Unterminated string starting at", pystr, begin);
    623             goto bail;
    624         }
    625         /* Pick up this chunk if it's not zero length */
    626         if (next != end) {
    627             chunk = PyUnicode_FromUnicode(&buf[end], next - end);
    628             if (chunk == NULL) {
    629                 goto bail;
    630             }
    631             if (PyList_Append(chunks, chunk)) {
    632                 Py_DECREF(chunk);
    633                 goto bail;
    634             }
    635             Py_DECREF(chunk);
    636         }
    637         next++;
    638         if (c == '"') {
    639             end = next;
    640             break;
    641         }
    642         if (next == len) {
    643             raise_errmsg("Unterminated string starting at", pystr, begin);
    644             goto bail;
    645         }
    646         c = buf[next];
    647         if (c != 'u') {
    648             /* Non-unicode backslash escapes */
    649             end = next + 1;
    650             switch (c) {
    651                 case '"': break;
    652                 case '\\': break;
    653                 case '/': break;
    654                 case 'b': c = '\b'; break;
    655                 case 'f': c = '\f'; break;
    656                 case 'n': c = '\n'; break;
    657                 case 'r': c = '\r'; break;
    658                 case 't': c = '\t'; break;
    659                 default: c = 0;
    660             }
    661             if (c == 0) {
    662                 raise_errmsg("Invalid \\escape", pystr, end - 2);
    663                 goto bail;
    664             }
    665         }
    666         else {
    667             c = 0;
    668             next++;
    669             end = next + 4;
    670             if (end >= len) {
    671                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
    672                 goto bail;
    673             }
    674             /* Decode 4 hex digits */
    675             for (; next < end; next++) {
    676                 Py_UNICODE digit = buf[next];
    677                 c <<= 4;
    678                 switch (digit) {
    679                     case '0': case '1': case '2': case '3': case '4':
    680                     case '5': case '6': case '7': case '8': case '9':
    681                         c |= (digit - '0'); break;
    682                     case 'a': case 'b': case 'c': case 'd': case 'e':
    683                     case 'f':
    684                         c |= (digit - 'a' + 10); break;
    685                     case 'A': case 'B': case 'C': case 'D': case 'E':
    686                     case 'F':
    687                         c |= (digit - 'A' + 10); break;
    688                     default:
    689                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
    690                         goto bail;
    691                 }
    692             }
    693 #ifdef Py_UNICODE_WIDE
    694             /* Surrogate pair */
    695             if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
    696                 buf[next++] == '\\' && buf[next++] == 'u') {
    697                 Py_UNICODE c2 = 0;
    698                 end += 6;
    699                 /* Decode 4 hex digits */
    700                 for (; next < end; next++) {
    701                     Py_UNICODE digit = buf[next];
    702                     c2 <<= 4;
    703                     switch (digit) {
    704                         case '0': case '1': case '2': case '3': case '4':
    705                         case '5': case '6': case '7': case '8': case '9':
    706                             c2 |= (digit - '0'); break;
    707                         case 'a': case 'b': case 'c': case 'd': case 'e':
    708                         case 'f':
    709                             c2 |= (digit - 'a' + 10); break;
    710                         case 'A': case 'B': case 'C': case 'D': case 'E':
    711                         case 'F':
    712                             c2 |= (digit - 'A' + 10); break;
    713                         default:
    714                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
    715                             goto bail;
    716                     }
    717                 }
    718                 if ((c2 & 0xfc00) == 0xdc00)
    719                     c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
    720                 else
    721                     end -= 6;
    722             }
    723 #endif
    724         }
    725         chunk = PyUnicode_FromUnicode(&c, 1);
    726         if (chunk == NULL) {
    727             goto bail;
    728         }
    729         if (PyList_Append(chunks, chunk)) {
    730             Py_DECREF(chunk);
    731             goto bail;
    732         }
    733         Py_DECREF(chunk);
    734     }
    735 
    736     rval = join_list_unicode(chunks);
    737     if (rval == NULL) {
    738         goto bail;
    739     }
    740     Py_DECREF(chunks);
    741     *next_end_ptr = end;
    742     return rval;
    743 bail:
    744     *next_end_ptr = -1;
    745     Py_XDECREF(chunks);
    746     return NULL;
    747 }
    748 
    749 PyDoc_STRVAR(pydoc_scanstring,
    750     "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
    751     "\n"
    752     "Scan the string s for a JSON string. End is the index of the\n"
    753     "character in s after the quote that started the JSON string.\n"
    754     "Unescapes all valid JSON string escape sequences and raises ValueError\n"
    755     "on attempt to decode an invalid string. If strict is False then literal\n"
    756     "control characters are allowed in the string.\n"
    757     "\n"
    758     "Returns a tuple of the decoded string and the index of the character in s\n"
    759     "after the end quote."
    760 );
    761 
    762 static PyObject *
    763 py_scanstring(PyObject* self UNUSED, PyObject *args)
    764 {
    765     PyObject *pystr;
    766     PyObject *rval;
    767     Py_ssize_t end;
    768     Py_ssize_t next_end = -1;
    769     char *encoding = NULL;
    770     int strict = 1;
    771     if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
    772         return NULL;
    773     }
    774     if (encoding == NULL) {
    775         encoding = DEFAULT_ENCODING;
    776     }
    777     if (PyString_Check(pystr)) {
    778         rval = scanstring_str(pystr, end, encoding, strict, &next_end);
    779     }
    780     else if (PyUnicode_Check(pystr)) {
    781         rval = scanstring_unicode(pystr, end, strict, &next_end);
    782     }
    783     else {
    784         PyErr_Format(PyExc_TypeError,
    785                      "first argument must be a string, not %.80s",
    786                      Py_TYPE(pystr)->tp_name);
    787         return NULL;
    788     }
    789     return _build_rval_index_tuple(rval, next_end);
    790 }
    791 
    792 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
    793     "encode_basestring_ascii(basestring) -> str\n"
    794     "\n"
    795     "Return an ASCII-only JSON representation of a Python string"
    796 );
    797 
    798 static PyObject *
    799 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
    800 {
    801     /* Return an ASCII-only JSON representation of a Python string */
    802     /* METH_O */
    803     if (PyString_Check(pystr)) {
    804         return ascii_escape_str(pystr);
    805     }
    806     else if (PyUnicode_Check(pystr)) {
    807         return ascii_escape_unicode(pystr);
    808     }
    809     else {
    810         PyErr_Format(PyExc_TypeError,
    811                      "first argument must be a string, not %.80s",
    812                      Py_TYPE(pystr)->tp_name);
    813         return NULL;
    814     }
    815 }
    816 
    817 static void
    818 scanner_dealloc(PyObject *self)
    819 {
    820     /* Deallocate scanner object */
    821     scanner_clear(self);
    822     Py_TYPE(self)->tp_free(self);
    823 }
    824 
    825 static int
    826 scanner_traverse(PyObject *self, visitproc visit, void *arg)
    827 {
    828     PyScannerObject *s;
    829     assert(PyScanner_Check(self));
    830     s = (PyScannerObject *)self;
    831     Py_VISIT(s->encoding);
    832     Py_VISIT(s->strict);
    833     Py_VISIT(s->object_hook);
    834     Py_VISIT(s->pairs_hook);
    835     Py_VISIT(s->parse_float);
    836     Py_VISIT(s->parse_int);
    837     Py_VISIT(s->parse_constant);
    838     return 0;
    839 }
    840 
    841 static int
    842 scanner_clear(PyObject *self)
    843 {
    844     PyScannerObject *s;
    845     assert(PyScanner_Check(self));
    846     s = (PyScannerObject *)self;
    847     Py_CLEAR(s->encoding);
    848     Py_CLEAR(s->strict);
    849     Py_CLEAR(s->object_hook);
    850     Py_CLEAR(s->pairs_hook);
    851     Py_CLEAR(s->parse_float);
    852     Py_CLEAR(s->parse_int);
    853     Py_CLEAR(s->parse_constant);
    854     return 0;
    855 }
    856 
    857 static PyObject *
    858 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
    859     /* Read a JSON object from PyString pystr.
    860     idx is the index of the first character after the opening curly brace.
    861     *next_idx_ptr is a return-by-reference index to the first character after
    862         the closing curly brace.
    863 
    864     Returns a new PyObject (usually a dict, but object_hook can change that)
    865     */
    866     char *str = PyString_AS_STRING(pystr);
    867     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
    868     PyObject *rval;
    869     PyObject *pairs;
    870     PyObject *item;
    871     PyObject *key = NULL;
    872     PyObject *val = NULL;
    873     char *encoding = PyString_AS_STRING(s->encoding);
    874     int strict = PyObject_IsTrue(s->strict);
    875     Py_ssize_t next_idx;
    876 
    877     pairs = PyList_New(0);
    878     if (pairs == NULL)
    879         return NULL;
    880 
    881     /* skip whitespace after { */
    882     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
    883 
    884     /* only loop if the object is non-empty */
    885     if (idx <= end_idx && str[idx] != '}') {
    886         while (idx <= end_idx) {
    887             /* read key */
    888             if (str[idx] != '"') {
    889                 raise_errmsg("Expecting property name", pystr, idx);
    890                 goto bail;
    891             }
    892             key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
    893             if (key == NULL)
    894                 goto bail;
    895             idx = next_idx;
    896 
    897             /* skip whitespace between key and : delimiter, read :, skip whitespace */
    898             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
    899             if (idx > end_idx || str[idx] != ':') {
    900                 raise_errmsg("Expecting : delimiter", pystr, idx);
    901                 goto bail;
    902             }
    903             idx++;
    904             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
    905 
    906             /* read any JSON data type */
    907             val = scan_once_str(s, pystr, idx, &next_idx);
    908             if (val == NULL)
    909                 goto bail;
    910 
    911             item = PyTuple_Pack(2, key, val);
    912             if (item == NULL)
    913                 goto bail;
    914             Py_CLEAR(key);
    915             Py_CLEAR(val);
    916             if (PyList_Append(pairs, item) == -1) {
    917                 Py_DECREF(item);
    918                 goto bail;
    919             }
    920             Py_DECREF(item);
    921             idx = next_idx;
    922 
    923             /* skip whitespace before } or , */
    924             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
    925 
    926             /* bail if the object is closed or we didn't get the , delimiter */
    927             if (idx > end_idx) break;
    928             if (str[idx] == '}') {
    929                 break;
    930             }
    931             else if (str[idx] != ',') {
    932                 raise_errmsg("Expecting , delimiter", pystr, idx);
    933                 goto bail;
    934             }
    935             idx++;
    936 
    937             /* skip whitespace after , delimiter */
    938             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
    939         }
    940     }
    941     /* verify that idx < end_idx, str[idx] should be '}' */
    942     if (idx > end_idx || str[idx] != '}') {
    943         raise_errmsg("Expecting object", pystr, end_idx);
    944         goto bail;
    945     }
    946 
    947     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
    948     if (s->pairs_hook != Py_None) {
    949         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
    950         if (val == NULL)
    951             goto bail;
    952         Py_DECREF(pairs);
    953         *next_idx_ptr = idx + 1;
    954         return val;
    955     }
    956 
    957     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
    958                                          pairs, NULL);
    959     if (rval == NULL)
    960         goto bail;
    961     Py_CLEAR(pairs);
    962 
    963     /* if object_hook is not None: rval = object_hook(rval) */
    964     if (s->object_hook != Py_None) {
    965         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
    966         if (val == NULL)
    967             goto bail;
    968         Py_DECREF(rval);
    969         rval = val;
    970         val = NULL;
    971     }
    972     *next_idx_ptr = idx + 1;
    973     return rval;
    974 bail:
    975     Py_XDECREF(key);
    976     Py_XDECREF(val);
    977     Py_XDECREF(pairs);
    978     return NULL;
    979 }
    980 
    981 static PyObject *
    982 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
    983     /* Read a JSON object from PyUnicode pystr.
    984     idx is the index of the first character after the opening curly brace.
    985     *next_idx_ptr is a return-by-reference index to the first character after
    986         the closing curly brace.
    987 
    988     Returns a new PyObject (usually a dict, but object_hook can change that)
    989     */
    990     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
    991     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
    992     PyObject *rval;
    993     PyObject *pairs;
    994     PyObject *item;
    995     PyObject *key = NULL;
    996     PyObject *val = NULL;
    997     int strict = PyObject_IsTrue(s->strict);
    998     Py_ssize_t next_idx;
    999 
   1000     pairs = PyList_New(0);
   1001     if (pairs == NULL)
   1002         return NULL;
   1003 
   1004     /* skip whitespace after { */
   1005     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1006 
   1007     /* only loop if the object is non-empty */
   1008     if (idx <= end_idx && str[idx] != '}') {
   1009         while (idx <= end_idx) {
   1010             /* read key */
   1011             if (str[idx] != '"') {
   1012                 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
   1013                 goto bail;
   1014             }
   1015             key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
   1016             if (key == NULL)
   1017                 goto bail;
   1018             idx = next_idx;
   1019 
   1020             /* skip whitespace between key and : delimiter, read :, skip whitespace */
   1021             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1022             if (idx > end_idx || str[idx] != ':') {
   1023                 raise_errmsg("Expecting ':' delimiter", pystr, idx);
   1024                 goto bail;
   1025             }
   1026             idx++;
   1027             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1028 
   1029             /* read any JSON term */
   1030             val = scan_once_unicode(s, pystr, idx, &next_idx);
   1031             if (val == NULL)
   1032                 goto bail;
   1033 
   1034             item = PyTuple_Pack(2, key, val);
   1035             if (item == NULL)
   1036                 goto bail;
   1037             Py_CLEAR(key);
   1038             Py_CLEAR(val);
   1039             if (PyList_Append(pairs, item) == -1) {
   1040                 Py_DECREF(item);
   1041                 goto bail;
   1042             }
   1043             Py_DECREF(item);
   1044             idx = next_idx;
   1045 
   1046             /* skip whitespace before } or , */
   1047             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1048 
   1049             /* bail if the object is closed or we didn't get the , delimiter */
   1050             if (idx > end_idx) break;
   1051             if (str[idx] == '}') {
   1052                 break;
   1053             }
   1054             else if (str[idx] != ',') {
   1055                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
   1056                 goto bail;
   1057             }
   1058             idx++;
   1059 
   1060             /* skip whitespace after , delimiter */
   1061             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1062         }
   1063     }
   1064 
   1065     /* verify that idx < end_idx, str[idx] should be '}' */
   1066     if (idx > end_idx || str[idx] != '}') {
   1067         raise_errmsg("Expecting object", pystr, end_idx);
   1068         goto bail;
   1069     }
   1070 
   1071     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
   1072     if (s->pairs_hook != Py_None) {
   1073         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
   1074         if (val == NULL)
   1075             goto bail;
   1076         Py_DECREF(pairs);
   1077         *next_idx_ptr = idx + 1;
   1078         return val;
   1079     }
   1080 
   1081     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
   1082                                          pairs, NULL);
   1083     if (rval == NULL)
   1084         goto bail;
   1085     Py_CLEAR(pairs);
   1086 
   1087     /* if object_hook is not None: rval = object_hook(rval) */
   1088     if (s->object_hook != Py_None) {
   1089         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
   1090         if (val == NULL)
   1091             goto bail;
   1092         Py_DECREF(rval);
   1093         rval = val;
   1094         val = NULL;
   1095     }
   1096     *next_idx_ptr = idx + 1;
   1097     return rval;
   1098 bail:
   1099     Py_XDECREF(key);
   1100     Py_XDECREF(val);
   1101     Py_XDECREF(pairs);
   1102     return NULL;
   1103 }
   1104 
   1105 static PyObject *
   1106 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
   1107     /* Read a JSON array from PyString pystr.
   1108     idx is the index of the first character after the opening brace.
   1109     *next_idx_ptr is a return-by-reference index to the first character after
   1110         the closing brace.
   1111 
   1112     Returns a new PyList
   1113     */
   1114     char *str = PyString_AS_STRING(pystr);
   1115     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
   1116     PyObject *val = NULL;
   1117     PyObject *rval = PyList_New(0);
   1118     Py_ssize_t next_idx;
   1119     if (rval == NULL)
   1120         return NULL;
   1121 
   1122     /* skip whitespace after [ */
   1123     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1124 
   1125     /* only loop if the array is non-empty */
   1126     if (idx <= end_idx && str[idx] != ']') {
   1127         while (idx <= end_idx) {
   1128 
   1129             /* read any JSON term and de-tuplefy the (rval, idx) */
   1130             val = scan_once_str(s, pystr, idx, &next_idx);
   1131             if (val == NULL)
   1132                 goto bail;
   1133 
   1134             if (PyList_Append(rval, val) == -1)
   1135                 goto bail;
   1136 
   1137             Py_CLEAR(val);
   1138             idx = next_idx;
   1139 
   1140             /* skip whitespace between term and , */
   1141             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1142 
   1143             /* bail if the array is closed or we didn't get the , delimiter */
   1144             if (idx > end_idx) break;
   1145             if (str[idx] == ']') {
   1146                 break;
   1147             }
   1148             else if (str[idx] != ',') {
   1149                 raise_errmsg("Expecting , delimiter", pystr, idx);
   1150                 goto bail;
   1151             }
   1152             idx++;
   1153 
   1154             /* skip whitespace after , */
   1155             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1156         }
   1157     }
   1158 
   1159     /* verify that idx < end_idx, str[idx] should be ']' */
   1160     if (idx > end_idx || str[idx] != ']') {
   1161         raise_errmsg("Expecting object", pystr, end_idx);
   1162         goto bail;
   1163     }
   1164     *next_idx_ptr = idx + 1;
   1165     return rval;
   1166 bail:
   1167     Py_XDECREF(val);
   1168     Py_DECREF(rval);
   1169     return NULL;
   1170 }
   1171 
   1172 static PyObject *
   1173 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
   1174     /* Read a JSON array from PyString pystr.
   1175     idx is the index of the first character after the opening brace.
   1176     *next_idx_ptr is a return-by-reference index to the first character after
   1177         the closing brace.
   1178 
   1179     Returns a new PyList
   1180     */
   1181     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
   1182     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
   1183     PyObject *val = NULL;
   1184     PyObject *rval = PyList_New(0);
   1185     Py_ssize_t next_idx;
   1186     if (rval == NULL)
   1187         return NULL;
   1188 
   1189     /* skip whitespace after [ */
   1190     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1191 
   1192     /* only loop if the array is non-empty */
   1193     if (idx <= end_idx && str[idx] != ']') {
   1194         while (idx <= end_idx) {
   1195 
   1196             /* read any JSON term  */
   1197             val = scan_once_unicode(s, pystr, idx, &next_idx);
   1198             if (val == NULL)
   1199                 goto bail;
   1200 
   1201             if (PyList_Append(rval, val) == -1)
   1202                 goto bail;
   1203 
   1204             Py_CLEAR(val);
   1205             idx = next_idx;
   1206 
   1207             /* skip whitespace between term and , */
   1208             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1209 
   1210             /* bail if the array is closed or we didn't get the , delimiter */
   1211             if (idx > end_idx) break;
   1212             if (str[idx] == ']') {
   1213                 break;
   1214             }
   1215             else if (str[idx] != ',') {
   1216                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
   1217                 goto bail;
   1218             }
   1219             idx++;
   1220 
   1221             /* skip whitespace after , */
   1222             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
   1223         }
   1224     }
   1225 
   1226     /* verify that idx < end_idx, str[idx] should be ']' */
   1227     if (idx > end_idx || str[idx] != ']') {
   1228         raise_errmsg("Expecting object", pystr, end_idx);
   1229         goto bail;
   1230     }
   1231     *next_idx_ptr = idx + 1;
   1232     return rval;
   1233 bail:
   1234     Py_XDECREF(val);
   1235     Py_DECREF(rval);
   1236     return NULL;
   1237 }
   1238 
   1239 static PyObject *
   1240 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
   1241     /* Read a JSON constant from PyString pystr.
   1242     constant is the constant string that was found
   1243         ("NaN", "Infinity", "-Infinity").
   1244     idx is the index of the first character of the constant
   1245     *next_idx_ptr is a return-by-reference index to the first character after
   1246         the constant.
   1247 
   1248     Returns the result of parse_constant
   1249     */
   1250     PyObject *cstr;
   1251     PyObject *rval;
   1252     /* constant is "NaN", "Infinity", or "-Infinity" */
   1253     cstr = PyString_InternFromString(constant);
   1254     if (cstr == NULL)
   1255         return NULL;
   1256 
   1257     /* rval = parse_constant(constant) */
   1258     rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
   1259     idx += PyString_GET_SIZE(cstr);
   1260     Py_DECREF(cstr);
   1261     *next_idx_ptr = idx;
   1262     return rval;
   1263 }
   1264 
   1265 static PyObject *
   1266 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
   1267     /* Read a JSON number from PyString pystr.
   1268     idx is the index of the first character of the number
   1269     *next_idx_ptr is a return-by-reference index to the first character after
   1270         the number.
   1271 
   1272     Returns a new PyObject representation of that number:
   1273         PyInt, PyLong, or PyFloat.
   1274         May return other types if parse_int or parse_float are set
   1275     */
   1276     char *str = PyString_AS_STRING(pystr);
   1277     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
   1278     Py_ssize_t idx = start;
   1279     int is_float = 0;
   1280     PyObject *rval;
   1281     PyObject *numstr;
   1282 
   1283     /* read a sign if it's there, make sure it's not the end of the string */
   1284     if (str[idx] == '-') {
   1285         idx++;
   1286         if (idx > end_idx) {
   1287             PyErr_SetNone(PyExc_StopIteration);
   1288             return NULL;
   1289         }
   1290     }
   1291 
   1292     /* read as many integer digits as we find as long as it doesn't start with 0 */
   1293     if (str[idx] >= '1' && str[idx] <= '9') {
   1294         idx++;
   1295         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
   1296     }
   1297     /* if it starts with 0 we only expect one integer digit */
   1298     else if (str[idx] == '0') {
   1299         idx++;
   1300     }
   1301     /* no integer digits, error */
   1302     else {
   1303         PyErr_SetNone(PyExc_StopIteration);
   1304         return NULL;
   1305     }
   1306 
   1307     /* if the next char is '.' followed by a digit then read all float digits */
   1308     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
   1309         is_float = 1;
   1310         idx += 2;
   1311         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
   1312     }
   1313 
   1314     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
   1315     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
   1316 
   1317         /* save the index of the 'e' or 'E' just in case we need to backtrack */
   1318         Py_ssize_t e_start = idx;
   1319         idx++;
   1320 
   1321         /* read an exponent sign if present */
   1322         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
   1323 
   1324         /* read all digits */
   1325         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
   1326 
   1327         /* if we got a digit, then parse as float. if not, backtrack */
   1328         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
   1329             is_float = 1;
   1330         }
   1331         else {
   1332             idx = e_start;
   1333         }
   1334     }
   1335 
   1336     /* copy the section we determined to be a number */
   1337     numstr = PyString_FromStringAndSize(&str[start], idx - start);
   1338     if (numstr == NULL)
   1339         return NULL;
   1340     if (is_float) {
   1341         /* parse as a float using a fast path if available, otherwise call user defined method */
   1342         if (s->parse_float != (PyObject *)&PyFloat_Type) {
   1343             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
   1344         }
   1345         else {
   1346             double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
   1347                                              NULL, NULL);
   1348             if (d == -1.0 && PyErr_Occurred())
   1349                 return NULL;
   1350             rval = PyFloat_FromDouble(d);
   1351         }
   1352     }
   1353     else {
   1354         /* parse as an int using a fast path if available, otherwise call user defined method */
   1355         if (s->parse_int != (PyObject *)&PyInt_Type) {
   1356             rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
   1357         }
   1358         else {
   1359             rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
   1360         }
   1361     }
   1362     Py_DECREF(numstr);
   1363     *next_idx_ptr = idx;
   1364     return rval;
   1365 }
   1366 
   1367 static PyObject *
   1368 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
   1369     /* Read a JSON number from PyUnicode pystr.
   1370     idx is the index of the first character of the number
   1371     *next_idx_ptr is a return-by-reference index to the first character after
   1372         the number.
   1373 
   1374     Returns a new PyObject representation of that number:
   1375         PyInt, PyLong, or PyFloat.
   1376         May return other types if parse_int or parse_float are set
   1377     */
   1378     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
   1379     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
   1380     Py_ssize_t idx = start;
   1381     int is_float = 0;
   1382     PyObject *rval;
   1383     PyObject *numstr;
   1384 
   1385     /* read a sign if it's there, make sure it's not the end of the string */
   1386     if (str[idx] == '-') {
   1387         idx++;
   1388         if (idx > end_idx) {
   1389             PyErr_SetNone(PyExc_StopIteration);
   1390             return NULL;
   1391         }
   1392     }
   1393 
   1394     /* read as many integer digits as we find as long as it doesn't start with 0 */
   1395     if (str[idx] >= '1' && str[idx] <= '9') {
   1396         idx++;
   1397         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
   1398     }
   1399     /* if it starts with 0 we only expect one integer digit */
   1400     else if (str[idx] == '0') {
   1401         idx++;
   1402     }
   1403     /* no integer digits, error */
   1404     else {
   1405         PyErr_SetNone(PyExc_StopIteration);
   1406         return NULL;
   1407     }
   1408 
   1409     /* if the next char is '.' followed by a digit then read all float digits */
   1410     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
   1411         is_float = 1;
   1412         idx += 2;
   1413         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
   1414     }
   1415 
   1416     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
   1417     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
   1418         Py_ssize_t e_start = idx;
   1419         idx++;
   1420 
   1421         /* read an exponent sign if present */
   1422         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
   1423 
   1424         /* read all digits */
   1425         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
   1426 
   1427         /* if we got a digit, then parse as float. if not, backtrack */
   1428         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
   1429             is_float = 1;
   1430         }
   1431         else {
   1432             idx = e_start;
   1433         }
   1434     }
   1435 
   1436     /* copy the section we determined to be a number */
   1437     numstr = PyUnicode_FromUnicode(&str[start], idx - start);
   1438     if (numstr == NULL)
   1439         return NULL;
   1440     if (is_float) {
   1441         /* parse as a float using a fast path if available, otherwise call user defined method */
   1442         if (s->parse_float != (PyObject *)&PyFloat_Type) {
   1443             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
   1444         }
   1445         else {
   1446             rval = PyFloat_FromString(numstr, NULL);
   1447         }
   1448     }
   1449     else {
   1450         /* no fast path for unicode -> int, just call */
   1451         rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
   1452     }
   1453     Py_DECREF(numstr);
   1454     *next_idx_ptr = idx;
   1455     return rval;
   1456 }
   1457 
   1458 static PyObject *
   1459 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
   1460 {
   1461     /* Read one JSON term (of any kind) from PyString pystr.
   1462     idx is the index of the first character of the term
   1463     *next_idx_ptr is a return-by-reference index to the first character after
   1464         the number.
   1465 
   1466     Returns a new PyObject representation of the term.
   1467     */
   1468     PyObject *res;
   1469     char *str = PyString_AS_STRING(pystr);
   1470     Py_ssize_t length = PyString_GET_SIZE(pystr);
   1471     if (idx < 0) {
   1472         PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
   1473         return NULL;
   1474     }
   1475     if (idx >= length) {
   1476         PyErr_SetNone(PyExc_StopIteration);
   1477         return NULL;
   1478     }
   1479     switch (str[idx]) {
   1480         case '"':
   1481             /* string */
   1482             return scanstring_str(pystr, idx + 1,
   1483                 PyString_AS_STRING(s->encoding),
   1484                 PyObject_IsTrue(s->strict),
   1485                 next_idx_ptr);
   1486         case '{':
   1487             /* object */
   1488             if (Py_EnterRecursiveCall(" while decoding a JSON object "
   1489                                       "from a byte string"))
   1490                 return NULL;
   1491             res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
   1492             Py_LeaveRecursiveCall();
   1493             return res;
   1494         case '[':
   1495             /* array */
   1496             if (Py_EnterRecursiveCall(" while decoding a JSON array "
   1497                                       "from a byte string"))
   1498                 return NULL;
   1499             res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
   1500             Py_LeaveRecursiveCall();
   1501             return res;
   1502         case 'n':
   1503             /* null */
   1504             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
   1505                 Py_INCREF(Py_None);
   1506                 *next_idx_ptr = idx + 4;
   1507                 return Py_None;
   1508             }
   1509             break;
   1510         case 't':
   1511             /* true */
   1512             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
   1513                 Py_INCREF(Py_True);
   1514                 *next_idx_ptr = idx + 4;
   1515                 return Py_True;
   1516             }
   1517             break;
   1518         case 'f':
   1519             /* false */
   1520             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
   1521                 Py_INCREF(Py_False);
   1522                 *next_idx_ptr = idx + 5;
   1523                 return Py_False;
   1524             }
   1525             break;
   1526         case 'N':
   1527             /* NaN */
   1528             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
   1529                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
   1530             }
   1531             break;
   1532         case 'I':
   1533             /* Infinity */
   1534             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
   1535                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
   1536             }
   1537             break;
   1538         case '-':
   1539             /* -Infinity */
   1540             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
   1541                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
   1542             }
   1543             break;
   1544     }
   1545     /* Didn't find a string, object, array, or named constant. Look for a number. */
   1546     return _match_number_str(s, pystr, idx, next_idx_ptr);
   1547 }
   1548 
   1549 static PyObject *
   1550 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
   1551 {
   1552     /* Read one JSON term (of any kind) from PyUnicode pystr.
   1553     idx is the index of the first character of the term
   1554     *next_idx_ptr is a return-by-reference index to the first character after
   1555         the number.
   1556 
   1557     Returns a new PyObject representation of the term.
   1558     */
   1559     PyObject *res;
   1560     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
   1561     Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
   1562     if (idx < 0) {
   1563         PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
   1564         return NULL;
   1565     }
   1566     if (idx >= length) {
   1567         PyErr_SetNone(PyExc_StopIteration);
   1568         return NULL;
   1569     }
   1570     switch (str[idx]) {
   1571         case '"':
   1572             /* string */
   1573             return scanstring_unicode(pystr, idx + 1,
   1574                 PyObject_IsTrue(s->strict),
   1575                 next_idx_ptr);
   1576         case '{':
   1577             /* object */
   1578             if (Py_EnterRecursiveCall(" while decoding a JSON object "
   1579                                       "from a unicode string"))
   1580                 return NULL;
   1581             res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
   1582             Py_LeaveRecursiveCall();
   1583             return res;
   1584         case '[':
   1585             /* array */
   1586             if (Py_EnterRecursiveCall(" while decoding a JSON array "
   1587                                       "from a unicode string"))
   1588                 return NULL;
   1589             res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
   1590             Py_LeaveRecursiveCall();
   1591             return res;
   1592         case 'n':
   1593             /* null */
   1594             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
   1595                 Py_INCREF(Py_None);
   1596                 *next_idx_ptr = idx + 4;
   1597                 return Py_None;
   1598             }
   1599             break;
   1600         case 't':
   1601             /* true */
   1602             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
   1603                 Py_INCREF(Py_True);
   1604                 *next_idx_ptr = idx + 4;
   1605                 return Py_True;
   1606             }
   1607             break;
   1608         case 'f':
   1609             /* false */
   1610             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
   1611                 Py_INCREF(Py_False);
   1612                 *next_idx_ptr = idx + 5;
   1613                 return Py_False;
   1614             }
   1615             break;
   1616         case 'N':
   1617             /* NaN */
   1618             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
   1619                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
   1620             }
   1621             break;
   1622         case 'I':
   1623             /* Infinity */
   1624             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
   1625                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
   1626             }
   1627             break;
   1628         case '-':
   1629             /* -Infinity */
   1630             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
   1631                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
   1632             }
   1633             break;
   1634     }
   1635     /* Didn't find a string, object, array, or named constant. Look for a number. */
   1636     return _match_number_unicode(s, pystr, idx, next_idx_ptr);
   1637 }
   1638 
   1639 static PyObject *
   1640 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
   1641 {
   1642     /* Python callable interface to scan_once_{str,unicode} */
   1643     PyObject *pystr;
   1644     PyObject *rval;
   1645     Py_ssize_t idx;
   1646     Py_ssize_t next_idx = -1;
   1647     static char *kwlist[] = {"string", "idx", NULL};
   1648     PyScannerObject *s;
   1649     assert(PyScanner_Check(self));
   1650     s = (PyScannerObject *)self;
   1651     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
   1652         return NULL;
   1653 
   1654     if (PyString_Check(pystr)) {
   1655         rval = scan_once_str(s, pystr, idx, &next_idx);
   1656     }
   1657     else if (PyUnicode_Check(pystr)) {
   1658         rval = scan_once_unicode(s, pystr, idx, &next_idx);
   1659     }
   1660     else {
   1661         PyErr_Format(PyExc_TypeError,
   1662                  "first argument must be a string, not %.80s",
   1663                  Py_TYPE(pystr)->tp_name);
   1664         return NULL;
   1665     }
   1666     return _build_rval_index_tuple(rval, next_idx);
   1667 }
   1668 
   1669 static PyObject *
   1670 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   1671 {
   1672     PyScannerObject *s;
   1673     s = (PyScannerObject *)type->tp_alloc(type, 0);
   1674     if (s != NULL) {
   1675         s->encoding = NULL;
   1676         s->strict = NULL;
   1677         s->object_hook = NULL;
   1678         s->pairs_hook = NULL;
   1679         s->parse_float = NULL;
   1680         s->parse_int = NULL;
   1681         s->parse_constant = NULL;
   1682     }
   1683     return (PyObject *)s;
   1684 }
   1685 
   1686 static int
   1687 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
   1688 {
   1689     /* Initialize Scanner object */
   1690     PyObject *ctx;
   1691     static char *kwlist[] = {"context", NULL};
   1692     PyScannerObject *s;
   1693 
   1694     assert(PyScanner_Check(self));
   1695     s = (PyScannerObject *)self;
   1696 
   1697     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
   1698         return -1;
   1699 
   1700     /* PyString_AS_STRING is used on encoding */
   1701     s->encoding = PyObject_GetAttrString(ctx, "encoding");
   1702     if (s->encoding == NULL)
   1703         goto bail;
   1704     if (s->encoding == Py_None) {
   1705         Py_DECREF(Py_None);
   1706         s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
   1707     }
   1708     else if (PyUnicode_Check(s->encoding)) {
   1709         PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
   1710         Py_DECREF(s->encoding);
   1711         s->encoding = tmp;
   1712     }
   1713     if (s->encoding == NULL)
   1714         goto bail;
   1715     if (!PyString_Check(s->encoding)) {
   1716 	PyErr_Format(PyExc_TypeError,
   1717 		     "encoding must be a string, not %.80s",
   1718 		     Py_TYPE(s->encoding)->tp_name);
   1719 	goto bail;
   1720     }
   1721 
   1722 
   1723     /* All of these will fail "gracefully" so we don't need to verify them */
   1724     s->strict = PyObject_GetAttrString(ctx, "strict");
   1725     if (s->strict == NULL)
   1726         goto bail;
   1727     s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
   1728     if (s->object_hook == NULL)
   1729         goto bail;
   1730     s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
   1731     if (s->pairs_hook == NULL)
   1732         goto bail;
   1733     s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
   1734     if (s->parse_float == NULL)
   1735         goto bail;
   1736     s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
   1737     if (s->parse_int == NULL)
   1738         goto bail;
   1739     s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
   1740     if (s->parse_constant == NULL)
   1741         goto bail;
   1742 
   1743     return 0;
   1744 
   1745 bail:
   1746     Py_CLEAR(s->encoding);
   1747     Py_CLEAR(s->strict);
   1748     Py_CLEAR(s->object_hook);
   1749     Py_CLEAR(s->pairs_hook);
   1750     Py_CLEAR(s->parse_float);
   1751     Py_CLEAR(s->parse_int);
   1752     Py_CLEAR(s->parse_constant);
   1753     return -1;
   1754 }
   1755 
   1756 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
   1757 
   1758 static
   1759 PyTypeObject PyScannerType = {
   1760     PyObject_HEAD_INIT(NULL)
   1761     0,                    /* tp_internal */
   1762     "_json.Scanner",       /* tp_name */
   1763     sizeof(PyScannerObject), /* tp_basicsize */
   1764     0,                    /* tp_itemsize */
   1765     scanner_dealloc, /* tp_dealloc */
   1766     0,                    /* tp_print */
   1767     0,                    /* tp_getattr */
   1768     0,                    /* tp_setattr */
   1769     0,                    /* tp_compare */
   1770     0,                    /* tp_repr */
   1771     0,                    /* tp_as_number */
   1772     0,                    /* tp_as_sequence */
   1773     0,                    /* tp_as_mapping */
   1774     0,                    /* tp_hash */
   1775     scanner_call,         /* tp_call */
   1776     0,                    /* tp_str */
   1777     0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
   1778     0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
   1779     0,                    /* tp_as_buffer */
   1780     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
   1781     scanner_doc,          /* tp_doc */
   1782     scanner_traverse,                    /* tp_traverse */
   1783     scanner_clear,                    /* tp_clear */
   1784     0,                    /* tp_richcompare */
   1785     0,                    /* tp_weaklistoffset */
   1786     0,                    /* tp_iter */
   1787     0,                    /* tp_iternext */
   1788     0,                    /* tp_methods */
   1789     scanner_members,                    /* tp_members */
   1790     0,                    /* tp_getset */
   1791     0,                    /* tp_base */
   1792     0,                    /* tp_dict */
   1793     0,                    /* tp_descr_get */
   1794     0,                    /* tp_descr_set */
   1795     0,                    /* tp_dictoffset */
   1796     scanner_init,                    /* tp_init */
   1797     0,/* PyType_GenericAlloc, */        /* tp_alloc */
   1798     scanner_new,          /* tp_new */
   1799     0,/* PyObject_GC_Del, */              /* tp_free */
   1800 };
   1801 
   1802 static PyObject *
   1803 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
   1804 {
   1805     PyEncoderObject *s;
   1806     s = (PyEncoderObject *)type->tp_alloc(type, 0);
   1807     if (s != NULL) {
   1808         s->markers = NULL;
   1809         s->defaultfn = NULL;
   1810         s->encoder = NULL;
   1811         s->indent = NULL;
   1812         s->key_separator = NULL;
   1813         s->item_separator = NULL;
   1814         s->sort_keys = NULL;
   1815         s->skipkeys = NULL;
   1816     }
   1817     return (PyObject *)s;
   1818 }
   1819 
   1820 static int
   1821 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
   1822 {
   1823     /* initialize Encoder object */
   1824     static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
   1825 
   1826     PyEncoderObject *s;
   1827     PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
   1828     PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
   1829 
   1830     assert(PyEncoder_Check(self));
   1831     s = (PyEncoderObject *)self;
   1832 
   1833     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
   1834         &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
   1835         &sort_keys, &skipkeys, &allow_nan))
   1836         return -1;
   1837 
   1838     s->markers = markers;
   1839     s->defaultfn = defaultfn;
   1840     s->encoder = encoder;
   1841     s->indent = indent;
   1842     s->key_separator = key_separator;
   1843     s->item_separator = item_separator;
   1844     s->sort_keys = sort_keys;
   1845     s->skipkeys = skipkeys;
   1846     s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
   1847     s->allow_nan = PyObject_IsTrue(allow_nan);
   1848 
   1849     Py_INCREF(s->markers);
   1850     Py_INCREF(s->defaultfn);
   1851     Py_INCREF(s->encoder);
   1852     Py_INCREF(s->indent);
   1853     Py_INCREF(s->key_separator);
   1854     Py_INCREF(s->item_separator);
   1855     Py_INCREF(s->sort_keys);
   1856     Py_INCREF(s->skipkeys);
   1857     return 0;
   1858 }
   1859 
   1860 static PyObject *
   1861 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
   1862 {
   1863     /* Python callable interface to encode_listencode_obj */
   1864     static char *kwlist[] = {"obj", "_current_indent_level", NULL};
   1865     PyObject *obj;
   1866     PyObject *rval;
   1867     Py_ssize_t indent_level;
   1868     PyEncoderObject *s;
   1869     assert(PyEncoder_Check(self));
   1870     s = (PyEncoderObject *)self;
   1871     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
   1872         &obj, _convertPyInt_AsSsize_t, &indent_level))
   1873         return NULL;
   1874     rval = PyList_New(0);
   1875     if (rval == NULL)
   1876         return NULL;
   1877     if (encoder_listencode_obj(s, rval, obj, indent_level)) {
   1878         Py_DECREF(rval);
   1879         return NULL;
   1880     }
   1881     return rval;
   1882 }
   1883 
   1884 static PyObject *
   1885 _encoded_const(PyObject *obj)
   1886 {
   1887     /* Return the JSON string representation of None, True, False */
   1888     if (obj == Py_None) {
   1889         static PyObject *s_null = NULL;
   1890         if (s_null == NULL) {
   1891             s_null = PyString_InternFromString("null");
   1892         }
   1893         Py_INCREF(s_null);
   1894         return s_null;
   1895     }
   1896     else if (obj == Py_True) {
   1897         static PyObject *s_true = NULL;
   1898         if (s_true == NULL) {
   1899             s_true = PyString_InternFromString("true");
   1900         }
   1901         Py_INCREF(s_true);
   1902         return s_true;
   1903     }
   1904     else if (obj == Py_False) {
   1905         static PyObject *s_false = NULL;
   1906         if (s_false == NULL) {
   1907             s_false = PyString_InternFromString("false");
   1908         }
   1909         Py_INCREF(s_false);
   1910         return s_false;
   1911     }
   1912     else {
   1913         PyErr_SetString(PyExc_ValueError, "not a const");
   1914         return NULL;
   1915     }
   1916 }
   1917 
   1918 static PyObject *
   1919 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
   1920 {
   1921     /* Return the JSON representation of a PyFloat */
   1922     double i = PyFloat_AS_DOUBLE(obj);
   1923     if (!Py_IS_FINITE(i)) {
   1924         if (!s->allow_nan) {
   1925             PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
   1926             return NULL;
   1927         }
   1928         if (i > 0) {
   1929             return PyString_FromString("Infinity");
   1930         }
   1931         else if (i < 0) {
   1932             return PyString_FromString("-Infinity");
   1933         }
   1934         else {
   1935             return PyString_FromString("NaN");
   1936         }
   1937     }
   1938     /* Use a better float format here? */
   1939     return PyObject_Repr(obj);
   1940 }
   1941 
   1942 static PyObject *
   1943 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
   1944 {
   1945     /* Return the JSON representation of a string */
   1946     if (s->fast_encode)
   1947         return py_encode_basestring_ascii(NULL, obj);
   1948     else
   1949         return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
   1950 }
   1951 
   1952 static int
   1953 _steal_list_append(PyObject *lst, PyObject *stolen)
   1954 {
   1955     /* Append stolen and then decrement its reference count */
   1956     int rval = PyList_Append(lst, stolen);
   1957     Py_DECREF(stolen);
   1958     return rval;
   1959 }
   1960 
   1961 static int
   1962 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
   1963 {
   1964     /* Encode Python object obj to a JSON term, rval is a PyList */
   1965     PyObject *newobj;
   1966     int rv;
   1967 
   1968     if (obj == Py_None || obj == Py_True || obj == Py_False) {
   1969         PyObject *cstr = _encoded_const(obj);
   1970         if (cstr == NULL)
   1971             return -1;
   1972         return _steal_list_append(rval, cstr);
   1973     }
   1974     else if (PyString_Check(obj) || PyUnicode_Check(obj))
   1975     {
   1976         PyObject *encoded = encoder_encode_string(s, obj);
   1977         if (encoded == NULL)
   1978             return -1;
   1979         return _steal_list_append(rval, encoded);
   1980     }
   1981     else if (PyInt_Check(obj) || PyLong_Check(obj)) {
   1982         PyObject *encoded = PyObject_Str(obj);
   1983         if (encoded == NULL)
   1984             return -1;
   1985         return _steal_list_append(rval, encoded);
   1986     }
   1987     else if (PyFloat_Check(obj)) {
   1988         PyObject *encoded = encoder_encode_float(s, obj);
   1989         if (encoded == NULL)
   1990             return -1;
   1991         return _steal_list_append(rval, encoded);
   1992     }
   1993     else if (PyList_Check(obj) || PyTuple_Check(obj)) {
   1994         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
   1995             return -1;
   1996         rv = encoder_listencode_list(s, rval, obj, indent_level);
   1997         Py_LeaveRecursiveCall();
   1998         return rv;
   1999     }
   2000     else if (PyDict_Check(obj)) {
   2001         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
   2002             return -1;
   2003         rv = encoder_listencode_dict(s, rval, obj, indent_level);
   2004         Py_LeaveRecursiveCall();
   2005         return rv;
   2006     }
   2007     else {
   2008         PyObject *ident = NULL;
   2009         if (s->markers != Py_None) {
   2010             int has_key;
   2011             ident = PyLong_FromVoidPtr(obj);
   2012             if (ident == NULL)
   2013                 return -1;
   2014             has_key = PyDict_Contains(s->markers, ident);
   2015             if (has_key) {
   2016                 if (has_key != -1)
   2017                     PyErr_SetString(PyExc_ValueError, "Circular reference detected");
   2018                 Py_DECREF(ident);
   2019                 return -1;
   2020             }
   2021             if (PyDict_SetItem(s->markers, ident, obj)) {
   2022                 Py_DECREF(ident);
   2023                 return -1;
   2024             }
   2025         }
   2026         newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
   2027         if (newobj == NULL) {
   2028             Py_XDECREF(ident);
   2029             return -1;
   2030         }
   2031 
   2032         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
   2033             return -1;
   2034         rv = encoder_listencode_obj(s, rval, newobj, indent_level);
   2035         Py_LeaveRecursiveCall();
   2036 
   2037         Py_DECREF(newobj);
   2038         if (rv) {
   2039             Py_XDECREF(ident);
   2040             return -1;
   2041         }
   2042         if (ident != NULL) {
   2043             if (PyDict_DelItem(s->markers, ident)) {
   2044                 Py_XDECREF(ident);
   2045                 return -1;
   2046             }
   2047             Py_XDECREF(ident);
   2048         }
   2049         return rv;
   2050     }
   2051 }
   2052 
   2053 static int
   2054 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
   2055 {
   2056     /* Encode Python dict dct a JSON term, rval is a PyList */
   2057     static PyObject *open_dict = NULL;
   2058     static PyObject *close_dict = NULL;
   2059     static PyObject *empty_dict = NULL;
   2060     PyObject *kstr = NULL;
   2061     PyObject *ident = NULL;
   2062     PyObject *key = NULL;
   2063     PyObject *value = NULL;
   2064     PyObject *it = NULL;
   2065     int skipkeys;
   2066     Py_ssize_t idx;
   2067 
   2068     if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
   2069         open_dict = PyString_InternFromString("{");
   2070         close_dict = PyString_InternFromString("}");
   2071         empty_dict = PyString_InternFromString("{}");
   2072         if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
   2073             return -1;
   2074     }
   2075     if (Py_SIZE(dct) == 0)
   2076         return PyList_Append(rval, empty_dict);
   2077 
   2078     if (s->markers != Py_None) {
   2079         int has_key;
   2080         ident = PyLong_FromVoidPtr(dct);
   2081         if (ident == NULL)
   2082             goto bail;
   2083         has_key = PyDict_Contains(s->markers, ident);
   2084         if (has_key) {
   2085             if (has_key != -1)
   2086                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
   2087             goto bail;
   2088         }
   2089         if (PyDict_SetItem(s->markers, ident, dct)) {
   2090             goto bail;
   2091         }
   2092     }
   2093 
   2094     if (PyList_Append(rval, open_dict))
   2095         goto bail;
   2096 
   2097     if (s->indent != Py_None) {
   2098         /* TODO: DOES NOT RUN */
   2099         indent_level += 1;
   2100         /*
   2101             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
   2102             separator = _item_separator + newline_indent
   2103             buf += newline_indent
   2104         */
   2105     }
   2106 
   2107     /* TODO: C speedup not implemented for sort_keys */
   2108 
   2109     it = PyObject_GetIter(dct);
   2110     if (it == NULL)
   2111         goto bail;
   2112     skipkeys = PyObject_IsTrue(s->skipkeys);
   2113     idx = 0;
   2114     while ((key = PyIter_Next(it)) != NULL) {
   2115         PyObject *encoded;
   2116 
   2117         if (PyString_Check(key) || PyUnicode_Check(key)) {
   2118             Py_INCREF(key);
   2119             kstr = key;
   2120         }
   2121         else if (PyFloat_Check(key)) {
   2122             kstr = encoder_encode_float(s, key);
   2123             if (kstr == NULL)
   2124                 goto bail;
   2125         }
   2126         else if (PyInt_Check(key) || PyLong_Check(key)) {
   2127             kstr = PyObject_Str(key);
   2128             if (kstr == NULL)
   2129                 goto bail;
   2130         }
   2131         else if (key == Py_True || key == Py_False || key == Py_None) {
   2132             kstr = _encoded_const(key);
   2133             if (kstr == NULL)
   2134                 goto bail;
   2135         }
   2136         else if (skipkeys) {
   2137             Py_DECREF(key);
   2138             continue;
   2139         }
   2140         else {
   2141             /* TODO: include repr of key */
   2142             PyErr_SetString(PyExc_TypeError, "keys must be a string");
   2143             goto bail;
   2144         }
   2145 
   2146         if (idx) {
   2147             if (PyList_Append(rval, s->item_separator))
   2148                 goto bail;
   2149         }
   2150 
   2151         value = PyObject_GetItem(dct, key);
   2152         if (value == NULL)
   2153             goto bail;
   2154 
   2155         encoded = encoder_encode_string(s, kstr);
   2156         Py_CLEAR(kstr);
   2157         if (encoded == NULL)
   2158             goto bail;
   2159         if (PyList_Append(rval, encoded)) {
   2160             Py_DECREF(encoded);
   2161             goto bail;
   2162         }
   2163         Py_DECREF(encoded);
   2164         if (PyList_Append(rval, s->key_separator))
   2165             goto bail;
   2166         if (encoder_listencode_obj(s, rval, value, indent_level))
   2167             goto bail;
   2168         idx += 1;
   2169         Py_CLEAR(value);
   2170         Py_DECREF(key);
   2171     }
   2172     if (PyErr_Occurred())
   2173         goto bail;
   2174     Py_CLEAR(it);
   2175 
   2176     if (ident != NULL) {
   2177         if (PyDict_DelItem(s->markers, ident))
   2178             goto bail;
   2179         Py_CLEAR(ident);
   2180     }
   2181     if (s->indent != Py_None) {
   2182         /* TODO: DOES NOT RUN */
   2183         /*
   2184             indent_level -= 1;
   2185 
   2186             yield '\n' + (' ' * (_indent * _current_indent_level))
   2187         */
   2188     }
   2189     if (PyList_Append(rval, close_dict))
   2190         goto bail;
   2191     return 0;
   2192 
   2193 bail:
   2194     Py_XDECREF(it);
   2195     Py_XDECREF(key);
   2196     Py_XDECREF(value);
   2197     Py_XDECREF(kstr);
   2198     Py_XDECREF(ident);
   2199     return -1;
   2200 }
   2201 
   2202 
   2203 static int
   2204 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
   2205 {
   2206     /* Encode Python list seq to a JSON term, rval is a PyList */
   2207     static PyObject *open_array = NULL;
   2208     static PyObject *close_array = NULL;
   2209     static PyObject *empty_array = NULL;
   2210     PyObject *ident = NULL;
   2211     PyObject *s_fast = NULL;
   2212     Py_ssize_t i;
   2213 
   2214     if (open_array == NULL || close_array == NULL || empty_array == NULL) {
   2215         open_array = PyString_InternFromString("[");
   2216         close_array = PyString_InternFromString("]");
   2217         empty_array = PyString_InternFromString("[]");
   2218         if (open_array == NULL || close_array == NULL || empty_array == NULL)
   2219             return -1;
   2220     }
   2221     ident = NULL;
   2222     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
   2223     if (s_fast == NULL)
   2224         return -1;
   2225     if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
   2226         Py_DECREF(s_fast);
   2227         return PyList_Append(rval, empty_array);
   2228     }
   2229 
   2230     if (s->markers != Py_None) {
   2231         int has_key;
   2232         ident = PyLong_FromVoidPtr(seq);
   2233         if (ident == NULL)
   2234             goto bail;
   2235         has_key = PyDict_Contains(s->markers, ident);
   2236         if (has_key) {
   2237             if (has_key != -1)
   2238                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
   2239             goto bail;
   2240         }
   2241         if (PyDict_SetItem(s->markers, ident, seq)) {
   2242             goto bail;
   2243         }
   2244     }
   2245 
   2246     if (PyList_Append(rval, open_array))
   2247         goto bail;
   2248     if (s->indent != Py_None) {
   2249         /* TODO: DOES NOT RUN */
   2250         indent_level += 1;
   2251         /*
   2252             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
   2253             separator = _item_separator + newline_indent
   2254             buf += newline_indent
   2255         */
   2256     }
   2257     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
   2258         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
   2259         if (i) {
   2260             if (PyList_Append(rval, s->item_separator))
   2261                 goto bail;
   2262         }
   2263         if (encoder_listencode_obj(s, rval, obj, indent_level))
   2264             goto bail;
   2265     }
   2266     if (ident != NULL) {
   2267         if (PyDict_DelItem(s->markers, ident))
   2268             goto bail;
   2269         Py_CLEAR(ident);
   2270     }
   2271     if (s->indent != Py_None) {
   2272         /* TODO: DOES NOT RUN */
   2273         /*
   2274             indent_level -= 1;
   2275 
   2276             yield '\n' + (' ' * (_indent * _current_indent_level))
   2277         */
   2278     }
   2279     if (PyList_Append(rval, close_array))
   2280         goto bail;
   2281     Py_DECREF(s_fast);
   2282     return 0;
   2283 
   2284 bail:
   2285     Py_XDECREF(ident);
   2286     Py_DECREF(s_fast);
   2287     return -1;
   2288 }
   2289 
   2290 static void
   2291 encoder_dealloc(PyObject *self)
   2292 {
   2293     /* Deallocate Encoder */
   2294     encoder_clear(self);
   2295     Py_TYPE(self)->tp_free(self);
   2296 }
   2297 
   2298 static int
   2299 encoder_traverse(PyObject *self, visitproc visit, void *arg)
   2300 {
   2301     PyEncoderObject *s;
   2302     assert(PyEncoder_Check(self));
   2303     s = (PyEncoderObject *)self;
   2304     Py_VISIT(s->markers);
   2305     Py_VISIT(s->defaultfn);
   2306     Py_VISIT(s->encoder);
   2307     Py_VISIT(s->indent);
   2308     Py_VISIT(s->key_separator);
   2309     Py_VISIT(s->item_separator);
   2310     Py_VISIT(s->sort_keys);
   2311     Py_VISIT(s->skipkeys);
   2312     return 0;
   2313 }
   2314 
   2315 static int
   2316 encoder_clear(PyObject *self)
   2317 {
   2318     /* Deallocate Encoder */
   2319     PyEncoderObject *s;
   2320     assert(PyEncoder_Check(self));
   2321     s = (PyEncoderObject *)self;
   2322     Py_CLEAR(s->markers);
   2323     Py_CLEAR(s->defaultfn);
   2324     Py_CLEAR(s->encoder);
   2325     Py_CLEAR(s->indent);
   2326     Py_CLEAR(s->key_separator);
   2327     Py_CLEAR(s->item_separator);
   2328     Py_CLEAR(s->sort_keys);
   2329     Py_CLEAR(s->skipkeys);
   2330     return 0;
   2331 }
   2332 
   2333 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
   2334 
   2335 static
   2336 PyTypeObject PyEncoderType = {
   2337     PyObject_HEAD_INIT(NULL)
   2338     0,                    /* tp_internal */
   2339     "_json.Encoder",       /* tp_name */
   2340     sizeof(PyEncoderObject), /* tp_basicsize */
   2341     0,                    /* tp_itemsize */
   2342     encoder_dealloc, /* tp_dealloc */
   2343     0,                    /* tp_print */
   2344     0,                    /* tp_getattr */
   2345     0,                    /* tp_setattr */
   2346     0,                    /* tp_compare */
   2347     0,                    /* tp_repr */
   2348     0,                    /* tp_as_number */
   2349     0,                    /* tp_as_sequence */
   2350     0,                    /* tp_as_mapping */
   2351     0,                    /* tp_hash */
   2352     encoder_call,         /* tp_call */
   2353     0,                    /* tp_str */
   2354     0,                    /* tp_getattro */
   2355     0,                    /* tp_setattro */
   2356     0,                    /* tp_as_buffer */
   2357     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
   2358     encoder_doc,          /* tp_doc */
   2359     encoder_traverse,     /* tp_traverse */
   2360     encoder_clear,        /* tp_clear */
   2361     0,                    /* tp_richcompare */
   2362     0,                    /* tp_weaklistoffset */
   2363     0,                    /* tp_iter */
   2364     0,                    /* tp_iternext */
   2365     0,                    /* tp_methods */
   2366     encoder_members,      /* tp_members */
   2367     0,                    /* tp_getset */
   2368     0,                    /* tp_base */
   2369     0,                    /* tp_dict */
   2370     0,                    /* tp_descr_get */
   2371     0,                    /* tp_descr_set */
   2372     0,                    /* tp_dictoffset */
   2373     encoder_init,         /* tp_init */
   2374     0,                    /* tp_alloc */
   2375     encoder_new,          /* tp_new */
   2376     0,                    /* tp_free */
   2377 };
   2378 
   2379 static PyMethodDef speedups_methods[] = {
   2380     {"encode_basestring_ascii",
   2381         (PyCFunction)py_encode_basestring_ascii,
   2382         METH_O,
   2383         pydoc_encode_basestring_ascii},
   2384     {"scanstring",
   2385         (PyCFunction)py_scanstring,
   2386         METH_VARARGS,
   2387         pydoc_scanstring},
   2388     {NULL, NULL, 0, NULL}
   2389 };
   2390 
   2391 PyDoc_STRVAR(module_doc,
   2392 "json speedups\n");
   2393 
   2394 void
   2395 init_json(void)
   2396 {
   2397     PyObject *m;
   2398     PyScannerType.tp_new = PyType_GenericNew;
   2399     if (PyType_Ready(&PyScannerType) < 0)
   2400         return;
   2401     PyEncoderType.tp_new = PyType_GenericNew;
   2402     if (PyType_Ready(&PyEncoderType) < 0)
   2403         return;
   2404     m = Py_InitModule3("_json", speedups_methods, module_doc);
   2405     Py_INCREF((PyObject*)&PyScannerType);
   2406     PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
   2407     Py_INCREF((PyObject*)&PyEncoderType);
   2408     PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
   2409 }
   2410