Home | History | Annotate | Download | only in json
      1 r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
      2 JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
      3 interchange format.
      4 
      5 :mod:`json` exposes an API familiar to users of the standard library
      6 :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
      7 version of the :mod:`json` library contained in Python 2.6, but maintains
      8 compatibility with Python 2.4 and Python 2.5 and (currently) has
      9 significant performance advantages, even without using the optional C
     10 extension for speedups.
     11 
     12 Encoding basic Python object hierarchies::
     13 
     14     >>> import json
     15     >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
     16     '["foo", {"bar": ["baz", null, 1.0, 2]}]'
     17     >>> print json.dumps("\"foo\bar")
     18     "\"foo\bar"
     19     >>> print json.dumps(u'\u1234')
     20     "\u1234"
     21     >>> print json.dumps('\\')
     22     "\\"
     23     >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
     24     {"a": 0, "b": 0, "c": 0}
     25     >>> from StringIO import StringIO
     26     >>> io = StringIO()
     27     >>> json.dump(['streaming API'], io)
     28     >>> io.getvalue()
     29     '["streaming API"]'
     30 
     31 Compact encoding::
     32 
     33     >>> import json
     34     >>> json.dumps([1,2,3,{'4': 5, '6': 7}], sort_keys=True, separators=(',',':'))
     35     '[1,2,3,{"4":5,"6":7}]'
     36 
     37 Pretty printing::
     38 
     39     >>> import json
     40     >>> print json.dumps({'4': 5, '6': 7}, sort_keys=True,
     41     ...                  indent=4, separators=(',', ': '))
     42     {
     43         "4": 5,
     44         "6": 7
     45     }
     46 
     47 Decoding JSON::
     48 
     49     >>> import json
     50     >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
     51     >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
     52     True
     53     >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
     54     True
     55     >>> from StringIO import StringIO
     56     >>> io = StringIO('["streaming API"]')
     57     >>> json.load(io)[0] == 'streaming API'
     58     True
     59 
     60 Specializing JSON object decoding::
     61 
     62     >>> import json
     63     >>> def as_complex(dct):
     64     ...     if '__complex__' in dct:
     65     ...         return complex(dct['real'], dct['imag'])
     66     ...     return dct
     67     ...
     68     >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
     69     ...     object_hook=as_complex)
     70     (1+2j)
     71     >>> from decimal import Decimal
     72     >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
     73     True
     74 
     75 Specializing JSON object encoding::
     76 
     77     >>> import json
     78     >>> def encode_complex(obj):
     79     ...     if isinstance(obj, complex):
     80     ...         return [obj.real, obj.imag]
     81     ...     raise TypeError(repr(o) + " is not JSON serializable")
     82     ...
     83     >>> json.dumps(2 + 1j, default=encode_complex)
     84     '[2.0, 1.0]'
     85     >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
     86     '[2.0, 1.0]'
     87     >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
     88     '[2.0, 1.0]'
     89 
     90 
     91 Using json.tool from the shell to validate and pretty-print::
     92 
     93     $ echo '{"json":"obj"}' | python -m json.tool
     94     {
     95         "json": "obj"
     96     }
     97     $ echo '{ 1.2:3.4}' | python -m json.tool
     98     Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
     99 """
    100 __version__ = '2.0.9'
    101 __all__ = [
    102     'dump', 'dumps', 'load', 'loads',
    103     'JSONDecoder', 'JSONEncoder',
    104 ]
    105 
    106 __author__ = 'Bob Ippolito <bob@redivi.com>'
    107 
    108 from .decoder import JSONDecoder
    109 from .encoder import JSONEncoder
    110 
    111 _default_encoder = JSONEncoder(
    112     skipkeys=False,
    113     ensure_ascii=True,
    114     check_circular=True,
    115     allow_nan=True,
    116     indent=None,
    117     separators=None,
    118     encoding='utf-8',
    119     default=None,
    120 )
    121 
    122 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
    123         allow_nan=True, cls=None, indent=None, separators=None,
    124         encoding='utf-8', default=None, sort_keys=False, **kw):
    125     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
    126     ``.write()``-supporting file-like object).
    127 
    128     If ``skipkeys`` is true then ``dict`` keys that are not basic types
    129     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    130     will be skipped instead of raising a ``TypeError``.
    131 
    132     If ``ensure_ascii`` is true (the default), all non-ASCII characters in the
    133     output are escaped with ``\uXXXX`` sequences, and the result is a ``str``
    134     instance consisting of ASCII characters only.  If ``ensure_ascii`` is
    135     false, some chunks written to ``fp`` may be ``unicode`` instances.
    136     This usually happens because the input contains unicode strings or the
    137     ``encoding`` parameter is used. Unless ``fp.write()`` explicitly
    138     understands ``unicode`` (as in ``codecs.getwriter``) this is likely to
    139     cause an error.
    140 
    141     If ``check_circular`` is false, then the circular reference check
    142     for container types will be skipped and a circular reference will
    143     result in an ``OverflowError`` (or worse).
    144 
    145     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    146     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
    147     in strict compliance of the JSON specification, instead of using the
    148     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    149 
    150     If ``indent`` is a non-negative integer, then JSON array elements and
    151     object members will be pretty-printed with that indent level. An indent
    152     level of 0 will only insert newlines. ``None`` is the most compact
    153     representation.  Since the default item separator is ``', '``,  the
    154     output might include trailing whitespace when ``indent`` is specified.
    155     You can use ``separators=(',', ': ')`` to avoid this.
    156 
    157     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    158     then it will be used instead of the default ``(', ', ': ')`` separators.
    159     ``(',', ':')`` is the most compact JSON representation.
    160 
    161     ``encoding`` is the character encoding for str instances, default is UTF-8.
    162 
    163     ``default(obj)`` is a function that should return a serializable version
    164     of obj or raise TypeError. The default simply raises TypeError.
    165 
    166     If *sort_keys* is true (default: ``False``), then the output of
    167     dictionaries will be sorted by key.
    168 
    169     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    170     ``.default()`` method to serialize additional types), specify it with
    171     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
    172 
    173     """
    174     # cached encoder
    175     if (not skipkeys and ensure_ascii and
    176         check_circular and allow_nan and
    177         cls is None and indent is None and separators is None and
    178         encoding == 'utf-8' and default is None and not sort_keys and not kw):
    179         iterable = _default_encoder.iterencode(obj)
    180     else:
    181         if cls is None:
    182             cls = JSONEncoder
    183         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    184             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    185             separators=separators, encoding=encoding,
    186             default=default, sort_keys=sort_keys, **kw).iterencode(obj)
    187     # could accelerate with writelines in some versions of Python, at
    188     # a debuggability cost
    189     for chunk in iterable:
    190         fp.write(chunk)
    191 
    192 
    193 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
    194         allow_nan=True, cls=None, indent=None, separators=None,
    195         encoding='utf-8', default=None, sort_keys=False, **kw):
    196     """Serialize ``obj`` to a JSON formatted ``str``.
    197 
    198     If ``skipkeys`` is true then ``dict`` keys that are not basic types
    199     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    200     will be skipped instead of raising a ``TypeError``.
    201 
    202 
    203     If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and
    204     the return value may be a ``unicode`` instance. See ``dump`` for details.
    205 
    206     If ``check_circular`` is false, then the circular reference check
    207     for container types will be skipped and a circular reference will
    208     result in an ``OverflowError`` (or worse).
    209 
    210     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    211     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
    212     strict compliance of the JSON specification, instead of using the
    213     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    214 
    215     If ``indent`` is a non-negative integer, then JSON array elements and
    216     object members will be pretty-printed with that indent level. An indent
    217     level of 0 will only insert newlines. ``None`` is the most compact
    218     representation.  Since the default item separator is ``', '``,  the
    219     output might include trailing whitespace when ``indent`` is specified.
    220     You can use ``separators=(',', ': ')`` to avoid this.
    221 
    222     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    223     then it will be used instead of the default ``(', ', ': ')`` separators.
    224     ``(',', ':')`` is the most compact JSON representation.
    225 
    226     ``encoding`` is the character encoding for str instances, default is UTF-8.
    227 
    228     ``default(obj)`` is a function that should return a serializable version
    229     of obj or raise TypeError. The default simply raises TypeError.
    230 
    231     If *sort_keys* is true (default: ``False``), then the output of
    232     dictionaries will be sorted by key.
    233 
    234     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    235     ``.default()`` method to serialize additional types), specify it with
    236     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
    237 
    238     """
    239     # cached encoder
    240     if (not skipkeys and ensure_ascii and
    241         check_circular and allow_nan and
    242         cls is None and indent is None and separators is None and
    243         encoding == 'utf-8' and default is None and not sort_keys and not kw):
    244         return _default_encoder.encode(obj)
    245     if cls is None:
    246         cls = JSONEncoder
    247     return cls(
    248         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    249         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    250         separators=separators, encoding=encoding, default=default,
    251         sort_keys=sort_keys, **kw).encode(obj)
    252 
    253 
    254 _default_decoder = JSONDecoder(encoding=None, object_hook=None,
    255                                object_pairs_hook=None)
    256 
    257 
    258 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
    259         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    260     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    261     a JSON document) to a Python object.
    262 
    263     If the contents of ``fp`` is encoded with an ASCII based encoding other
    264     than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
    265     be specified. Encodings that are not ASCII based (such as UCS-2) are
    266     not allowed, and should be wrapped with
    267     ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
    268     object and passed to ``loads()``
    269 
    270     ``object_hook`` is an optional function that will be called with the
    271     result of any object literal decode (a ``dict``). The return value of
    272     ``object_hook`` will be used instead of the ``dict``. This feature
    273     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    274 
    275     ``object_pairs_hook`` is an optional function that will be called with the
    276     result of any object literal decoded with an ordered list of pairs.  The
    277     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    278     This feature can be used to implement custom decoders that rely on the
    279     order that the key and value pairs are decoded (for example,
    280     collections.OrderedDict will remember the order of insertion). If
    281     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
    282 
    283     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    284     kwarg; otherwise ``JSONDecoder`` is used.
    285 
    286     """
    287     return loads(fp.read(),
    288         encoding=encoding, cls=cls, object_hook=object_hook,
    289         parse_float=parse_float, parse_int=parse_int,
    290         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
    291         **kw)
    292 
    293 
    294 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
    295         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    296     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
    297     document) to a Python object.
    298 
    299     If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
    300     other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
    301     must be specified. Encodings that are not ASCII based (such as UCS-2)
    302     are not allowed and should be decoded to ``unicode`` first.
    303 
    304     ``object_hook`` is an optional function that will be called with the
    305     result of any object literal decode (a ``dict``). The return value of
    306     ``object_hook`` will be used instead of the ``dict``. This feature
    307     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    308 
    309     ``object_pairs_hook`` is an optional function that will be called with the
    310     result of any object literal decoded with an ordered list of pairs.  The
    311     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    312     This feature can be used to implement custom decoders that rely on the
    313     order that the key and value pairs are decoded (for example,
    314     collections.OrderedDict will remember the order of insertion). If
    315     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
    316 
    317     ``parse_float``, if specified, will be called with the string
    318     of every JSON float to be decoded. By default this is equivalent to
    319     float(num_str). This can be used to use another datatype or parser
    320     for JSON floats (e.g. decimal.Decimal).
    321 
    322     ``parse_int``, if specified, will be called with the string
    323     of every JSON int to be decoded. By default this is equivalent to
    324     int(num_str). This can be used to use another datatype or parser
    325     for JSON integers (e.g. float).
    326 
    327     ``parse_constant``, if specified, will be called with one of the
    328     following strings: -Infinity, Infinity, NaN.
    329     This can be used to raise an exception if invalid JSON numbers
    330     are encountered.
    331 
    332     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    333     kwarg; otherwise ``JSONDecoder`` is used.
    334 
    335     """
    336     if (cls is None and encoding is None and object_hook is None and
    337             parse_int is None and parse_float is None and
    338             parse_constant is None and object_pairs_hook is None and not kw):
    339         return _default_decoder.decode(s)
    340     if cls is None:
    341         cls = JSONDecoder
    342     if object_hook is not None:
    343         kw['object_hook'] = object_hook
    344     if object_pairs_hook is not None:
    345         kw['object_pairs_hook'] = object_pairs_hook
    346     if parse_float is not None:
    347         kw['parse_float'] = parse_float
    348     if parse_int is not None:
    349         kw['parse_int'] = parse_int
    350     if parse_constant is not None:
    351         kw['parse_constant'] = parse_constant
    352     return cls(encoding=encoding, **kw).decode(s)
    353