Home | History | Annotate | Download | only in json
      1 """Implementation of JSONEncoder
      2 """
      3 import re
      4 
      5 try:
      6     from _json import encode_basestring_ascii as c_encode_basestring_ascii
      7 except ImportError:
      8     c_encode_basestring_ascii = None
      9 try:
     10     from _json import make_encoder as c_make_encoder
     11 except ImportError:
     12     c_make_encoder = None
     13 
     14 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
     15 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
     16 HAS_UTF8 = re.compile(r'[\x80-\xff]')
     17 ESCAPE_DCT = {
     18     '\\': '\\\\',
     19     '"': '\\"',
     20     '\b': '\\b',
     21     '\f': '\\f',
     22     '\n': '\\n',
     23     '\r': '\\r',
     24     '\t': '\\t',
     25 }
     26 for i in range(0x20):
     27     ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
     28     #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
     29 
     30 # Assume this produces an infinity on all machines (probably not guaranteed)
     31 INFINITY = float('1e66666')
     32 FLOAT_REPR = repr
     33 
     34 def encode_basestring(s):
     35     """Return a JSON representation of a Python string
     36 
     37     """
     38     def replace(match):
     39         return ESCAPE_DCT[match.group(0)]
     40     return '"' + ESCAPE.sub(replace, s) + '"'
     41 
     42 
     43 def py_encode_basestring_ascii(s):
     44     """Return an ASCII-only JSON representation of a Python string
     45 
     46     """
     47     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
     48         s = s.decode('utf-8')
     49     def replace(match):
     50         s = match.group(0)
     51         try:
     52             return ESCAPE_DCT[s]
     53         except KeyError:
     54             n = ord(s)
     55             if n < 0x10000:
     56                 return '\\u{0:04x}'.format(n)
     57                 #return '\\u%04x' % (n,)
     58             else:
     59                 # surrogate pair
     60                 n -= 0x10000
     61                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
     62                 s2 = 0xdc00 | (n & 0x3ff)
     63                 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
     64                 #return '\\u%04x\\u%04x' % (s1, s2)
     65     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
     66 
     67 
     68 encode_basestring_ascii = (
     69     c_encode_basestring_ascii or py_encode_basestring_ascii)
     70 
     71 class JSONEncoder(object):
     72     """Extensible JSON <http://json.org> encoder for Python data structures.
     73 
     74     Supports the following objects and types by default:
     75 
     76     +-------------------+---------------+
     77     | Python            | JSON          |
     78     +===================+===============+
     79     | dict              | object        |
     80     +-------------------+---------------+
     81     | list, tuple       | array         |
     82     +-------------------+---------------+
     83     | str, unicode      | string        |
     84     +-------------------+---------------+
     85     | int, long, float  | number        |
     86     +-------------------+---------------+
     87     | True              | true          |
     88     +-------------------+---------------+
     89     | False             | false         |
     90     +-------------------+---------------+
     91     | None              | null          |
     92     +-------------------+---------------+
     93 
     94     To extend this to recognize other objects, subclass and implement a
     95     ``.default()`` method with another method that returns a serializable
     96     object for ``o`` if possible, otherwise it should call the superclass
     97     implementation (to raise ``TypeError``).
     98 
     99     """
    100     item_separator = ', '
    101     key_separator = ': '
    102     def __init__(self, skipkeys=False, ensure_ascii=True,
    103             check_circular=True, allow_nan=True, sort_keys=False,
    104             indent=None, separators=None, encoding='utf-8', default=None):
    105         """Constructor for JSONEncoder, with sensible defaults.
    106 
    107         If skipkeys is false, then it is a TypeError to attempt
    108         encoding of keys that are not str, int, long, float or None.  If
    109         skipkeys is True, such items are simply skipped.
    110 
    111         If ensure_ascii is true, the output is guaranteed to be str
    112         objects with all incoming unicode characters escaped.  If
    113         ensure_ascii is false, the output will be unicode object.
    114 
    115         If check_circular is true, then lists, dicts, and custom encoded
    116         objects will be checked for circular references during encoding to
    117         prevent an infinite recursion (which would cause an OverflowError).
    118         Otherwise, no such check takes place.
    119 
    120         If allow_nan is true, then NaN, Infinity, and -Infinity will be
    121         encoded as such.  This behavior is not JSON specification compliant,
    122         but is consistent with most JavaScript based encoders and decoders.
    123         Otherwise, it will be a ValueError to encode such floats.
    124 
    125         If sort_keys is true, then the output of dictionaries will be
    126         sorted by key; this is useful for regression tests to ensure
    127         that JSON serializations can be compared on a day-to-day basis.
    128 
    129         If indent is a non-negative integer, then JSON array
    130         elements and object members will be pretty-printed with that
    131         indent level.  An indent level of 0 will only insert newlines.
    132         None is the most compact representation.
    133 
    134         If specified, separators should be a (item_separator, key_separator)
    135         tuple.  The default is (', ', ': ').  To get the most compact JSON
    136         representation you should specify (',', ':') to eliminate whitespace.
    137 
    138         If specified, default is a function that gets called for objects
    139         that can't otherwise be serialized.  It should return a JSON encodable
    140         version of the object or raise a ``TypeError``.
    141 
    142         If encoding is not None, then all input strings will be
    143         transformed into unicode using that encoding prior to JSON-encoding.
    144         The default is UTF-8.
    145 
    146         """
    147 
    148         self.skipkeys = skipkeys
    149         self.ensure_ascii = ensure_ascii
    150         self.check_circular = check_circular
    151         self.allow_nan = allow_nan
    152         self.sort_keys = sort_keys
    153         self.indent = indent
    154         if separators is not None:
    155             self.item_separator, self.key_separator = separators
    156         if default is not None:
    157             self.default = default
    158         self.encoding = encoding
    159 
    160     def default(self, o):
    161         """Implement this method in a subclass such that it returns
    162         a serializable object for ``o``, or calls the base implementation
    163         (to raise a ``TypeError``).
    164 
    165         For example, to support arbitrary iterators, you could
    166         implement default like this::
    167 
    168             def default(self, o):
    169                 try:
    170                     iterable = iter(o)
    171                 except TypeError:
    172                     pass
    173                 else:
    174                     return list(iterable)
    175                 return JSONEncoder.default(self, o)
    176 
    177         """
    178         raise TypeError(repr(o) + " is not JSON serializable")
    179 
    180     def encode(self, o):
    181         """Return a JSON string representation of a Python data structure.
    182 
    183         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
    184         '{"foo": ["bar", "baz"]}'
    185 
    186         """
    187         # This is for extremely simple cases and benchmarks.
    188         if isinstance(o, basestring):
    189             if isinstance(o, str):
    190                 _encoding = self.encoding
    191                 if (_encoding is not None
    192                         and not (_encoding == 'utf-8')):
    193                     o = o.decode(_encoding)
    194             if self.ensure_ascii:
    195                 return encode_basestring_ascii(o)
    196             else:
    197                 return encode_basestring(o)
    198         # This doesn't pass the iterator directly to ''.join() because the
    199         # exceptions aren't as detailed.  The list call should be roughly
    200         # equivalent to the PySequence_Fast that ''.join() would do.
    201         chunks = self.iterencode(o, _one_shot=True)
    202         if not isinstance(chunks, (list, tuple)):
    203             chunks = list(chunks)
    204         return ''.join(chunks)
    205 
    206     def iterencode(self, o, _one_shot=False):
    207         """Encode the given object and yield each string
    208         representation as available.
    209 
    210         For example::
    211 
    212             for chunk in JSONEncoder().iterencode(bigobject):
    213                 mysocket.write(chunk)
    214 
    215         """
    216         if self.check_circular:
    217             markers = {}
    218         else:
    219             markers = None
    220         if self.ensure_ascii:
    221             _encoder = encode_basestring_ascii
    222         else:
    223             _encoder = encode_basestring
    224         if self.encoding != 'utf-8':
    225             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
    226                 if isinstance(o, str):
    227                     o = o.decode(_encoding)
    228                 return _orig_encoder(o)
    229 
    230         def floatstr(o, allow_nan=self.allow_nan,
    231                 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
    232             # Check for specials.  Note that this type of test is processor
    233             # and/or platform-specific, so do tests which don't depend on the
    234             # internals.
    235 
    236             if o != o:
    237                 text = 'NaN'
    238             elif o == _inf:
    239                 text = 'Infinity'
    240             elif o == _neginf:
    241                 text = '-Infinity'
    242             else:
    243                 return _repr(o)
    244 
    245             if not allow_nan:
    246                 raise ValueError(
    247                     "Out of range float values are not JSON compliant: " +
    248                     repr(o))
    249 
    250             return text
    251 
    252 
    253         if (_one_shot and c_make_encoder is not None
    254                 and self.indent is None and not self.sort_keys):
    255             _iterencode = c_make_encoder(
    256                 markers, self.default, _encoder, self.indent,
    257                 self.key_separator, self.item_separator, self.sort_keys,
    258                 self.skipkeys, self.allow_nan)
    259         else:
    260             _iterencode = _make_iterencode(
    261                 markers, self.default, _encoder, self.indent, floatstr,
    262                 self.key_separator, self.item_separator, self.sort_keys,
    263                 self.skipkeys, _one_shot)
    264         return _iterencode(o, 0)
    265 
    266 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
    267         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
    268         ## HACK: hand-optimized bytecode; turn globals into locals
    269         ValueError=ValueError,
    270         basestring=basestring,
    271         dict=dict,
    272         float=float,
    273         id=id,
    274         int=int,
    275         isinstance=isinstance,
    276         list=list,
    277         long=long,
    278         str=str,
    279         tuple=tuple,
    280     ):
    281 
    282     def _iterencode_list(lst, _current_indent_level):
    283         if not lst:
    284             yield '[]'
    285             return
    286         if markers is not None:
    287             markerid = id(lst)
    288             if markerid in markers:
    289                 raise ValueError("Circular reference detected")
    290             markers[markerid] = lst
    291         buf = '['
    292         if _indent is not None:
    293             _current_indent_level += 1
    294             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
    295             separator = _item_separator + newline_indent
    296             buf += newline_indent
    297         else:
    298             newline_indent = None
    299             separator = _item_separator
    300         first = True
    301         for value in lst:
    302             if first:
    303                 first = False
    304             else:
    305                 buf = separator
    306             if isinstance(value, basestring):
    307                 yield buf + _encoder(value)
    308             elif value is None:
    309                 yield buf + 'null'
    310             elif value is True:
    311                 yield buf + 'true'
    312             elif value is False:
    313                 yield buf + 'false'
    314             elif isinstance(value, (int, long)):
    315                 yield buf + str(value)
    316             elif isinstance(value, float):
    317                 yield buf + _floatstr(value)
    318             else:
    319                 yield buf
    320                 if isinstance(value, (list, tuple)):
    321                     chunks = _iterencode_list(value, _current_indent_level)
    322                 elif isinstance(value, dict):
    323                     chunks = _iterencode_dict(value, _current_indent_level)
    324                 else:
    325                     chunks = _iterencode(value, _current_indent_level)
    326                 for chunk in chunks:
    327                     yield chunk
    328         if newline_indent is not None:
    329             _current_indent_level -= 1
    330             yield '\n' + (' ' * (_indent * _current_indent_level))
    331         yield ']'
    332         if markers is not None:
    333             del markers[markerid]
    334 
    335     def _iterencode_dict(dct, _current_indent_level):
    336         if not dct:
    337             yield '{}'
    338             return
    339         if markers is not None:
    340             markerid = id(dct)
    341             if markerid in markers:
    342                 raise ValueError("Circular reference detected")
    343             markers[markerid] = dct
    344         yield '{'
    345         if _indent is not None:
    346             _current_indent_level += 1
    347             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
    348             item_separator = _item_separator + newline_indent
    349             yield newline_indent
    350         else:
    351             newline_indent = None
    352             item_separator = _item_separator
    353         first = True
    354         if _sort_keys:
    355             items = sorted(dct.items(), key=lambda kv: kv[0])
    356         else:
    357             items = dct.iteritems()
    358         for key, value in items:
    359             if isinstance(key, basestring):
    360                 pass
    361             # JavaScript is weakly typed for these, so it makes sense to
    362             # also allow them.  Many encoders seem to do something like this.
    363             elif isinstance(key, float):
    364                 key = _floatstr(key)
    365             elif key is True:
    366                 key = 'true'
    367             elif key is False:
    368                 key = 'false'
    369             elif key is None:
    370                 key = 'null'
    371             elif isinstance(key, (int, long)):
    372                 key = str(key)
    373             elif _skipkeys:
    374                 continue
    375             else:
    376                 raise TypeError("key " + repr(key) + " is not a string")
    377             if first:
    378                 first = False
    379             else:
    380                 yield item_separator
    381             yield _encoder(key)
    382             yield _key_separator
    383             if isinstance(value, basestring):
    384                 yield _encoder(value)
    385             elif value is None:
    386                 yield 'null'
    387             elif value is True:
    388                 yield 'true'
    389             elif value is False:
    390                 yield 'false'
    391             elif isinstance(value, (int, long)):
    392                 yield str(value)
    393             elif isinstance(value, float):
    394                 yield _floatstr(value)
    395             else:
    396                 if isinstance(value, (list, tuple)):
    397                     chunks = _iterencode_list(value, _current_indent_level)
    398                 elif isinstance(value, dict):
    399                     chunks = _iterencode_dict(value, _current_indent_level)
    400                 else:
    401                     chunks = _iterencode(value, _current_indent_level)
    402                 for chunk in chunks:
    403                     yield chunk
    404         if newline_indent is not None:
    405             _current_indent_level -= 1
    406             yield '\n' + (' ' * (_indent * _current_indent_level))
    407         yield '}'
    408         if markers is not None:
    409             del markers[markerid]
    410 
    411     def _iterencode(o, _current_indent_level):
    412         if isinstance(o, basestring):
    413             yield _encoder(o)
    414         elif o is None:
    415             yield 'null'
    416         elif o is True:
    417             yield 'true'
    418         elif o is False:
    419             yield 'false'
    420         elif isinstance(o, (int, long)):
    421             yield str(o)
    422         elif isinstance(o, float):
    423             yield _floatstr(o)
    424         elif isinstance(o, (list, tuple)):
    425             for chunk in _iterencode_list(o, _current_indent_level):
    426                 yield chunk
    427         elif isinstance(o, dict):
    428             for chunk in _iterencode_dict(o, _current_indent_level):
    429                 yield chunk
    430         else:
    431             if markers is not None:
    432                 markerid = id(o)
    433                 if markerid in markers:
    434                     raise ValueError("Circular reference detected")
    435                 markers[markerid] = o
    436             o = _default(o)
    437             for chunk in _iterencode(o, _current_indent_level):
    438                 yield chunk
    439             if markers is not None:
    440                 del markers[markerid]
    441 
    442     return _iterencode
    443