Home | History | Annotate | Download | only in json
      1 """Implementation of JSONEncoder
      2 """
      3 import re
      4 
      5 try:
      6     from _json import encode_basestring_ascii as c_encode_basestring_ascii
      7 except ImportError:
      8     c_encode_basestring_ascii = None
      9 try:
     10     from _json import encode_basestring as c_encode_basestring
     11 except ImportError:
     12     c_encode_basestring = None
     13 try:
     14     from _json import make_encoder as c_make_encoder
     15 except ImportError:
     16     c_make_encoder = None
     17 
     18 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
     19 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
     20 HAS_UTF8 = re.compile(b'[\x80-\xff]')
     21 ESCAPE_DCT = {
     22     '\\': '\\\\',
     23     '"': '\\"',
     24     '\b': '\\b',
     25     '\f': '\\f',
     26     '\n': '\\n',
     27     '\r': '\\r',
     28     '\t': '\\t',
     29 }
     30 for i in range(0x20):
     31     ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
     32     #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
     33 
     34 INFINITY = float('inf')
     35 
     36 def py_encode_basestring(s):
     37     """Return a JSON representation of a Python string
     38 
     39     """
     40     def replace(match):
     41         return ESCAPE_DCT[match.group(0)]
     42     return '"' + ESCAPE.sub(replace, s) + '"'
     43 
     44 
     45 encode_basestring = (c_encode_basestring or py_encode_basestring)
     46 
     47 
     48 def py_encode_basestring_ascii(s):
     49     """Return an ASCII-only JSON representation of a Python string
     50 
     51     """
     52     def replace(match):
     53         s = match.group(0)
     54         try:
     55             return ESCAPE_DCT[s]
     56         except KeyError:
     57             n = ord(s)
     58             if n < 0x10000:
     59                 return '\\u{0:04x}'.format(n)
     60                 #return '\\u%04x' % (n,)
     61             else:
     62                 # surrogate pair
     63                 n -= 0x10000
     64                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
     65                 s2 = 0xdc00 | (n & 0x3ff)
     66                 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
     67     return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
     68 
     69 
     70 encode_basestring_ascii = (
     71     c_encode_basestring_ascii or py_encode_basestring_ascii)
     72 
     73 class JSONEncoder(object):
     74     """Extensible JSON <http://json.org> encoder for Python data structures.
     75 
     76     Supports the following objects and types by default:
     77 
     78     +-------------------+---------------+
     79     | Python            | JSON          |
     80     +===================+===============+
     81     | dict              | object        |
     82     +-------------------+---------------+
     83     | list, tuple       | array         |
     84     +-------------------+---------------+
     85     | str               | string        |
     86     +-------------------+---------------+
     87     | int, float        | number        |
     88     +-------------------+---------------+
     89     | True              | true          |
     90     +-------------------+---------------+
     91     | False             | false         |
     92     +-------------------+---------------+
     93     | None              | null          |
     94     +-------------------+---------------+
     95 
     96     To extend this to recognize other objects, subclass and implement a
     97     ``.default()`` method with another method that returns a serializable
     98     object for ``o`` if possible, otherwise it should call the superclass
     99     implementation (to raise ``TypeError``).
    100 
    101     """
    102     item_separator = ', '
    103     key_separator = ': '
    104     def __init__(self, *, skipkeys=False, ensure_ascii=True,
    105             check_circular=True, allow_nan=True, sort_keys=False,
    106             indent=None, separators=None, default=None):
    107         """Constructor for JSONEncoder, with sensible defaults.
    108 
    109         If skipkeys is false, then it is a TypeError to attempt
    110         encoding of keys that are not str, int, float or None.  If
    111         skipkeys is True, such items are simply skipped.
    112 
    113         If ensure_ascii is true, the output is guaranteed to be str
    114         objects with all incoming non-ASCII characters escaped.  If
    115         ensure_ascii is false, the output can contain non-ASCII characters.
    116 
    117         If check_circular is true, then lists, dicts, and custom encoded
    118         objects will be checked for circular references during encoding to
    119         prevent an infinite recursion (which would cause an OverflowError).
    120         Otherwise, no such check takes place.
    121 
    122         If allow_nan is true, then NaN, Infinity, and -Infinity will be
    123         encoded as such.  This behavior is not JSON specification compliant,
    124         but is consistent with most JavaScript based encoders and decoders.
    125         Otherwise, it will be a ValueError to encode such floats.
    126 
    127         If sort_keys is true, then the output of dictionaries will be
    128         sorted by key; this is useful for regression tests to ensure
    129         that JSON serializations can be compared on a day-to-day basis.
    130 
    131         If indent is a non-negative integer, then JSON array
    132         elements and object members will be pretty-printed with that
    133         indent level.  An indent level of 0 will only insert newlines.
    134         None is the most compact representation.
    135 
    136         If specified, separators should be an (item_separator, key_separator)
    137         tuple.  The default is (', ', ': ') if *indent* is ``None`` and
    138         (',', ': ') otherwise.  To get the most compact JSON representation,
    139         you should specify (',', ':') to eliminate whitespace.
    140 
    141         If specified, default is a function that gets called for objects
    142         that can't otherwise be serialized.  It should return a JSON encodable
    143         version of the object or raise a ``TypeError``.
    144 
    145         """
    146 
    147         self.skipkeys = skipkeys
    148         self.ensure_ascii = ensure_ascii
    149         self.check_circular = check_circular
    150         self.allow_nan = allow_nan
    151         self.sort_keys = sort_keys
    152         self.indent = indent
    153         if separators is not None:
    154             self.item_separator, self.key_separator = separators
    155         elif indent is not None:
    156             self.item_separator = ','
    157         if default is not None:
    158             self.default = default
    159 
    160     def default(self, o):
    161         """Implement this method in a subclass such that it returns
    162         a serializable object for ``o``, or calls the base implementation
    163         (to raise a ``TypeError``).
    164 
    165         For example, to support arbitrary iterators, you could
    166         implement default like this::
    167 
    168             def default(self, o):
    169                 try:
    170                     iterable = iter(o)
    171                 except TypeError:
    172                     pass
    173                 else:
    174                     return list(iterable)
    175                 # Let the base class default method raise the TypeError
    176                 return JSONEncoder.default(self, o)
    177 
    178         """
    179         raise TypeError(f'Object of type {o.__class__.__name__} '
    180                         f'is not JSON serializable')
    181 
    182     def encode(self, o):
    183         """Return a JSON string representation of a Python data structure.
    184 
    185         >>> from json.encoder import JSONEncoder
    186         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
    187         '{"foo": ["bar", "baz"]}'
    188 
    189         """
    190         # This is for extremely simple cases and benchmarks.
    191         if isinstance(o, str):
    192             if self.ensure_ascii:
    193                 return encode_basestring_ascii(o)
    194             else:
    195                 return encode_basestring(o)
    196         # This doesn't pass the iterator directly to ''.join() because the
    197         # exceptions aren't as detailed.  The list call should be roughly
    198         # equivalent to the PySequence_Fast that ''.join() would do.
    199         chunks = self.iterencode(o, _one_shot=True)
    200         if not isinstance(chunks, (list, tuple)):
    201             chunks = list(chunks)
    202         return ''.join(chunks)
    203 
    204     def iterencode(self, o, _one_shot=False):
    205         """Encode the given object and yield each string
    206         representation as available.
    207 
    208         For example::
    209 
    210             for chunk in JSONEncoder().iterencode(bigobject):
    211                 mysocket.write(chunk)
    212 
    213         """
    214         if self.check_circular:
    215             markers = {}
    216         else:
    217             markers = None
    218         if self.ensure_ascii:
    219             _encoder = encode_basestring_ascii
    220         else:
    221             _encoder = encode_basestring
    222 
    223         def floatstr(o, allow_nan=self.allow_nan,
    224                 _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
    225             # Check for specials.  Note that this type of test is processor
    226             # and/or platform-specific, so do tests which don't depend on the
    227             # internals.
    228 
    229             if o != o:
    230                 text = 'NaN'
    231             elif o == _inf:
    232                 text = 'Infinity'
    233             elif o == _neginf:
    234                 text = '-Infinity'
    235             else:
    236                 return _repr(o)
    237 
    238             if not allow_nan:
    239                 raise ValueError(
    240                     "Out of range float values are not JSON compliant: " +
    241                     repr(o))
    242 
    243             return text
    244 
    245 
    246         if (_one_shot and c_make_encoder is not None
    247                 and self.indent is None):
    248             _iterencode = c_make_encoder(
    249                 markers, self.default, _encoder, self.indent,
    250                 self.key_separator, self.item_separator, self.sort_keys,
    251                 self.skipkeys, self.allow_nan)
    252         else:
    253             _iterencode = _make_iterencode(
    254                 markers, self.default, _encoder, self.indent, floatstr,
    255                 self.key_separator, self.item_separator, self.sort_keys,
    256                 self.skipkeys, _one_shot)
    257         return _iterencode(o, 0)
    258 
    259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
    260         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
    261         ## HACK: hand-optimized bytecode; turn globals into locals
    262         ValueError=ValueError,
    263         dict=dict,
    264         float=float,
    265         id=id,
    266         int=int,
    267         isinstance=isinstance,
    268         list=list,
    269         str=str,
    270         tuple=tuple,
    271         _intstr=int.__str__,
    272     ):
    273 
    274     if _indent is not None and not isinstance(_indent, str):
    275         _indent = ' ' * _indent
    276 
    277     def _iterencode_list(lst, _current_indent_level):
    278         if not lst:
    279             yield '[]'
    280             return
    281         if markers is not None:
    282             markerid = id(lst)
    283             if markerid in markers:
    284                 raise ValueError("Circular reference detected")
    285             markers[markerid] = lst
    286         buf = '['
    287         if _indent is not None:
    288             _current_indent_level += 1
    289             newline_indent = '\n' + _indent * _current_indent_level
    290             separator = _item_separator + newline_indent
    291             buf += newline_indent
    292         else:
    293             newline_indent = None
    294             separator = _item_separator
    295         first = True
    296         for value in lst:
    297             if first:
    298                 first = False
    299             else:
    300                 buf = separator
    301             if isinstance(value, str):
    302                 yield buf + _encoder(value)
    303             elif value is None:
    304                 yield buf + 'null'
    305             elif value is True:
    306                 yield buf + 'true'
    307             elif value is False:
    308                 yield buf + 'false'
    309             elif isinstance(value, int):
    310                 # Subclasses of int/float may override __str__, but we still
    311                 # want to encode them as integers/floats in JSON. One example
    312                 # within the standard library is IntEnum.
    313                 yield buf + _intstr(value)
    314             elif isinstance(value, float):
    315                 # see comment above for int
    316                 yield buf + _floatstr(value)
    317             else:
    318                 yield buf
    319                 if isinstance(value, (list, tuple)):
    320                     chunks = _iterencode_list(value, _current_indent_level)
    321                 elif isinstance(value, dict):
    322                     chunks = _iterencode_dict(value, _current_indent_level)
    323                 else:
    324                     chunks = _iterencode(value, _current_indent_level)
    325                 yield from chunks
    326         if newline_indent is not None:
    327             _current_indent_level -= 1
    328             yield '\n' + _indent * _current_indent_level
    329         yield ']'
    330         if markers is not None:
    331             del markers[markerid]
    332 
    333     def _iterencode_dict(dct, _current_indent_level):
    334         if not dct:
    335             yield '{}'
    336             return
    337         if markers is not None:
    338             markerid = id(dct)
    339             if markerid in markers:
    340                 raise ValueError("Circular reference detected")
    341             markers[markerid] = dct
    342         yield '{'
    343         if _indent is not None:
    344             _current_indent_level += 1
    345             newline_indent = '\n' + _indent * _current_indent_level
    346             item_separator = _item_separator + newline_indent
    347             yield newline_indent
    348         else:
    349             newline_indent = None
    350             item_separator = _item_separator
    351         first = True
    352         if _sort_keys:
    353             items = sorted(dct.items(), key=lambda kv: kv[0])
    354         else:
    355             items = dct.items()
    356         for key, value in items:
    357             if isinstance(key, str):
    358                 pass
    359             # JavaScript is weakly typed for these, so it makes sense to
    360             # also allow them.  Many encoders seem to do something like this.
    361             elif isinstance(key, float):
    362                 # see comment for int/float in _make_iterencode
    363                 key = _floatstr(key)
    364             elif key is True:
    365                 key = 'true'
    366             elif key is False:
    367                 key = 'false'
    368             elif key is None:
    369                 key = 'null'
    370             elif isinstance(key, int):
    371                 # see comment for int/float in _make_iterencode
    372                 key = _intstr(key)
    373             elif _skipkeys:
    374                 continue
    375             else:
    376                 raise TypeError(f'keys must be str, int, float, bool or None, '
    377                                 f'not {key.__class__.__name__}')
    378             if first:
    379                 first = False
    380             else:
    381                 yield item_separator
    382             yield _encoder(key)
    383             yield _key_separator
    384             if isinstance(value, str):
    385                 yield _encoder(value)
    386             elif value is None:
    387                 yield 'null'
    388             elif value is True:
    389                 yield 'true'
    390             elif value is False:
    391                 yield 'false'
    392             elif isinstance(value, int):
    393                 # see comment for int/float in _make_iterencode
    394                 yield _intstr(value)
    395             elif isinstance(value, float):
    396                 # see comment for int/float in _make_iterencode
    397                 yield _floatstr(value)
    398             else:
    399                 if isinstance(value, (list, tuple)):
    400                     chunks = _iterencode_list(value, _current_indent_level)
    401                 elif isinstance(value, dict):
    402                     chunks = _iterencode_dict(value, _current_indent_level)
    403                 else:
    404                     chunks = _iterencode(value, _current_indent_level)
    405                 yield from chunks
    406         if newline_indent is not None:
    407             _current_indent_level -= 1
    408             yield '\n' + _indent * _current_indent_level
    409         yield '}'
    410         if markers is not None:
    411             del markers[markerid]
    412 
    413     def _iterencode(o, _current_indent_level):
    414         if isinstance(o, str):
    415             yield _encoder(o)
    416         elif o is None:
    417             yield 'null'
    418         elif o is True:
    419             yield 'true'
    420         elif o is False:
    421             yield 'false'
    422         elif isinstance(o, int):
    423             # see comment for int/float in _make_iterencode
    424             yield _intstr(o)
    425         elif isinstance(o, float):
    426             # see comment for int/float in _make_iterencode
    427             yield _floatstr(o)
    428         elif isinstance(o, (list, tuple)):
    429             yield from _iterencode_list(o, _current_indent_level)
    430         elif isinstance(o, dict):
    431             yield from _iterencode_dict(o, _current_indent_level)
    432         else:
    433             if markers is not None:
    434                 markerid = id(o)
    435                 if markerid in markers:
    436                     raise ValueError("Circular reference detected")
    437                 markers[markerid] = o
    438             o = _default(o)
    439             yield from _iterencode(o, _current_indent_level)
    440             if markers is not None:
    441                 del markers[markerid]
    442     return _iterencode
    443