Home | History | Annotate | Download | only in json
      1 r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
      2 JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
      3 interchange format.
      4 
      5 :mod:`json` exposes an API familiar to users of the standard library
      6 :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
      7 version of the :mod:`json` library contained in Python 2.6, but maintains
      8 compatibility with Python 2.4 and Python 2.5 and (currently) has
      9 significant performance advantages, even without using the optional C
     10 extension for speedups.
     11 
     12 Encoding basic Python object hierarchies::
     13 
     14     >>> import json
     15     >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
     16     '["foo", {"bar": ["baz", null, 1.0, 2]}]'
     17     >>> print json.dumps("\"foo\bar")
     18     "\"foo\bar"
     19     >>> print json.dumps(u'\u1234')
     20     "\u1234"
     21     >>> print json.dumps('\\')
     22     "\\"
     23     >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
     24     {"a": 0, "b": 0, "c": 0}
     25     >>> from StringIO import StringIO
     26     >>> io = StringIO()
     27     >>> json.dump(['streaming API'], io)
     28     >>> io.getvalue()
     29     '["streaming API"]'
     30 
     31 Compact encoding::
     32 
     33     >>> import json
     34     >>> json.dumps([1,2,3,{'4': 5, '6': 7}], sort_keys=True, separators=(',',':'))
     35     '[1,2,3,{"4":5,"6":7}]'
     36 
     37 Pretty printing::
     38 
     39     >>> import json
     40     >>> print json.dumps({'4': 5, '6': 7}, sort_keys=True,
     41     ...                  indent=4, separators=(',', ': '))
     42     {
     43         "4": 5,
     44         "6": 7
     45     }
     46 
     47 Decoding JSON::
     48 
     49     >>> import json
     50     >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
     51     >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
     52     True
     53     >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
     54     True
     55     >>> from StringIO import StringIO
     56     >>> io = StringIO('["streaming API"]')
     57     >>> json.load(io)[0] == 'streaming API'
     58     True
     59 
     60 Specializing JSON object decoding::
     61 
     62     >>> import json
     63     >>> def as_complex(dct):
     64     ...     if '__complex__' in dct:
     65     ...         return complex(dct['real'], dct['imag'])
     66     ...     return dct
     67     ...
     68     >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
     69     ...     object_hook=as_complex)
     70     (1+2j)
     71     >>> from decimal import Decimal
     72     >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
     73     True
     74 
     75 Specializing JSON object encoding::
     76 
     77     >>> import json
     78     >>> def encode_complex(obj):
     79     ...     if isinstance(obj, complex):
     80     ...         return [obj.real, obj.imag]
     81     ...     raise TypeError(repr(o) + " is not JSON serializable")
     82     ...
     83     >>> json.dumps(2 + 1j, default=encode_complex)
     84     '[2.0, 1.0]'
     85     >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
     86     '[2.0, 1.0]'
     87     >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
     88     '[2.0, 1.0]'
     89 
     90 
     91 Using json.tool from the shell to validate and pretty-print::
     92 
     93     $ echo '{"json":"obj"}' | python -m json.tool
     94     {
     95         "json": "obj"
     96     }
     97     $ echo '{ 1.2:3.4}' | python -m json.tool
     98     Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
     99 """
    100 __version__ = '2.0.9'
    101 __all__ = [
    102     'dump', 'dumps', 'load', 'loads',
    103     'JSONDecoder', 'JSONEncoder',
    104 ]
    105 
    106 __author__ = 'Bob Ippolito <bob@redivi.com>'
    107 
    108 from .decoder import JSONDecoder
    109 from .encoder import JSONEncoder
    110 
    111 _default_encoder = JSONEncoder(
    112     skipkeys=False,
    113     ensure_ascii=True,
    114     check_circular=True,
    115     allow_nan=True,
    116     indent=None,
    117     separators=None,
    118     encoding='utf-8',
    119     default=None,
    120 )
    121 
    122 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
    123         allow_nan=True, cls=None, indent=None, separators=None,
    124         encoding='utf-8', default=None, sort_keys=False, **kw):
    125     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
    126     ``.write()``-supporting file-like object).
    127 
    128     If ``skipkeys`` is true then ``dict`` keys that are not basic types
    129     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    130     will be skipped instead of raising a ``TypeError``.
    131 
    132     If ``ensure_ascii`` is true (the default), all non-ASCII characters in the
    133     output are escaped with ``\uXXXX`` sequences, and the result is a ``str``
    134     instance consisting of ASCII characters only.  If ``ensure_ascii`` is
    135     ``False``, some chunks written to ``fp`` may be ``unicode`` instances.
    136     This usually happens because the input contains unicode strings or the
    137     ``encoding`` parameter is used. Unless ``fp.write()`` explicitly
    138     understands ``unicode`` (as in ``codecs.getwriter``) this is likely to
    139     cause an error.
    140 
    141     If ``check_circular`` is false, then the circular reference check
    142     for container types will be skipped and a circular reference will
    143     result in an ``OverflowError`` (or worse).
    144 
    145     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    146     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
    147     in strict compliance of the JSON specification, instead of using the
    148     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    149 
    150     If ``indent`` is a non-negative integer, then JSON array elements and
    151     object members will be pretty-printed with that indent level. An indent
    152     level of 0 will only insert newlines. ``None`` is the most compact
    153     representation.  Since the default item separator is ``', '``,  the
    154     output might include trailing whitespace when ``indent`` is specified.
    155     You can use ``separators=(',', ': ')`` to avoid this.
    156 
    157     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    158     then it will be used instead of the default ``(', ', ': ')`` separators.
    159     ``(',', ':')`` is the most compact JSON representation.
    160 
    161     ``encoding`` is the character encoding for str instances, default is UTF-8.
    162 
    163     ``default(obj)`` is a function that should return a serializable version
    164     of obj or raise TypeError. The default simply raises TypeError.
    165 
    166     If *sort_keys* is ``True`` (default: ``False``), then the output of
    167     dictionaries will be sorted by key.
    168 
    169     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    170     ``.default()`` method to serialize additional types), specify it with
    171     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
    172 
    173     """
    174     # cached encoder
    175     if (not skipkeys and ensure_ascii and
    176         check_circular and allow_nan and
    177         cls is None and indent is None and separators is None and
    178         encoding == 'utf-8' and default is None and not sort_keys and not kw):
    179         iterable = _default_encoder.iterencode(obj)
    180     else:
    181         if cls is None:
    182             cls = JSONEncoder
    183         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    184             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    185             separators=separators, encoding=encoding,
    186             default=default, sort_keys=sort_keys, **kw).iterencode(obj)
    187     # could accelerate with writelines in some versions of Python, at
    188     # a debuggability cost
    189     for chunk in iterable:
    190         fp.write(chunk)
    191 
    192 
    193 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
    194         allow_nan=True, cls=None, indent=None, separators=None,
    195         encoding='utf-8', default=None, sort_keys=False, **kw):
    196     """Serialize ``obj`` to a JSON formatted ``str``.
    197 
    198     If ``skipkeys`` is false then ``dict`` keys that are not basic types
    199     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    200     will be skipped instead of raising a ``TypeError``.
    201 
    202     If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and
    203     the return value may be a ``unicode`` instance. See ``dump`` for details.
    204 
    205     If ``check_circular`` is false, then the circular reference check
    206     for container types will be skipped and a circular reference will
    207     result in an ``OverflowError`` (or worse).
    208 
    209     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    210     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
    211     strict compliance of the JSON specification, instead of using the
    212     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    213 
    214     If ``indent`` is a non-negative integer, then JSON array elements and
    215     object members will be pretty-printed with that indent level. An indent
    216     level of 0 will only insert newlines. ``None`` is the most compact
    217     representation.  Since the default item separator is ``', '``,  the
    218     output might include trailing whitespace when ``indent`` is specified.
    219     You can use ``separators=(',', ': ')`` to avoid this.
    220 
    221     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    222     then it will be used instead of the default ``(', ', ': ')`` separators.
    223     ``(',', ':')`` is the most compact JSON representation.
    224 
    225     ``encoding`` is the character encoding for str instances, default is UTF-8.
    226 
    227     ``default(obj)`` is a function that should return a serializable version
    228     of obj or raise TypeError. The default simply raises TypeError.
    229 
    230     If *sort_keys* is ``True`` (default: ``False``), then the output of
    231     dictionaries will be sorted by key.
    232 
    233     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    234     ``.default()`` method to serialize additional types), specify it with
    235     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
    236 
    237     """
    238     # cached encoder
    239     if (not skipkeys and ensure_ascii and
    240         check_circular and allow_nan and
    241         cls is None and indent is None and separators is None and
    242         encoding == 'utf-8' and default is None and not sort_keys and not kw):
    243         return _default_encoder.encode(obj)
    244     if cls is None:
    245         cls = JSONEncoder
    246     return cls(
    247         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    248         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    249         separators=separators, encoding=encoding, default=default,
    250         sort_keys=sort_keys, **kw).encode(obj)
    251 
    252 
    253 _default_decoder = JSONDecoder(encoding=None, object_hook=None,
    254                                object_pairs_hook=None)
    255 
    256 
    257 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
    258         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    259     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    260     a JSON document) to a Python object.
    261 
    262     If the contents of ``fp`` is encoded with an ASCII based encoding other
    263     than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
    264     be specified. Encodings that are not ASCII based (such as UCS-2) are
    265     not allowed, and should be wrapped with
    266     ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
    267     object and passed to ``loads()``
    268 
    269     ``object_hook`` is an optional function that will be called with the
    270     result of any object literal decode (a ``dict``). The return value of
    271     ``object_hook`` will be used instead of the ``dict``. This feature
    272     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    273 
    274     ``object_pairs_hook`` is an optional function that will be called with the
    275     result of any object literal decoded with an ordered list of pairs.  The
    276     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    277     This feature can be used to implement custom decoders that rely on the
    278     order that the key and value pairs are decoded (for example,
    279     collections.OrderedDict will remember the order of insertion). If
    280     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
    281 
    282     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    283     kwarg; otherwise ``JSONDecoder`` is used.
    284 
    285     """
    286     return loads(fp.read(),
    287         encoding=encoding, cls=cls, object_hook=object_hook,
    288         parse_float=parse_float, parse_int=parse_int,
    289         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
    290         **kw)
    291 
    292 
    293 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
    294         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    295     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
    296     document) to a Python object.
    297 
    298     If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
    299     other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
    300     must be specified. Encodings that are not ASCII based (such as UCS-2)
    301     are not allowed and should be decoded to ``unicode`` first.
    302 
    303     ``object_hook`` is an optional function that will be called with the
    304     result of any object literal decode (a ``dict``). The return value of
    305     ``object_hook`` will be used instead of the ``dict``. This feature
    306     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    307 
    308     ``object_pairs_hook`` is an optional function that will be called with the
    309     result of any object literal decoded with an ordered list of pairs.  The
    310     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    311     This feature can be used to implement custom decoders that rely on the
    312     order that the key and value pairs are decoded (for example,
    313     collections.OrderedDict will remember the order of insertion). If
    314     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
    315 
    316     ``parse_float``, if specified, will be called with the string
    317     of every JSON float to be decoded. By default this is equivalent to
    318     float(num_str). This can be used to use another datatype or parser
    319     for JSON floats (e.g. decimal.Decimal).
    320 
    321     ``parse_int``, if specified, will be called with the string
    322     of every JSON int to be decoded. By default this is equivalent to
    323     int(num_str). This can be used to use another datatype or parser
    324     for JSON integers (e.g. float).
    325 
    326     ``parse_constant``, if specified, will be called with one of the
    327     following strings: -Infinity, Infinity, NaN, null, true, false.
    328     This can be used to raise an exception if invalid JSON numbers
    329     are encountered.
    330 
    331     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    332     kwarg; otherwise ``JSONDecoder`` is used.
    333 
    334     """
    335     if (cls is None and encoding is None and object_hook is None and
    336             parse_int is None and parse_float is None and
    337             parse_constant is None and object_pairs_hook is None and not kw):
    338         return _default_decoder.decode(s)
    339     if cls is None:
    340         cls = JSONDecoder
    341     if object_hook is not None:
    342         kw['object_hook'] = object_hook
    343     if object_pairs_hook is not None:
    344         kw['object_pairs_hook'] = object_pairs_hook
    345     if parse_float is not None:
    346         kw['parse_float'] = parse_float
    347     if parse_int is not None:
    348         kw['parse_int'] = parse_int
    349     if parse_constant is not None:
    350         kw['parse_constant'] = parse_constant
    351     return cls(encoding=encoding, **kw).decode(s)
    352