Home | History | Annotate | Download | only in json
      1 r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
      2 JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
      3 interchange format.
      4 
      5 :mod:`json` exposes an API familiar to users of the standard library
      6 :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
      7 version of the :mod:`json` library contained in Python 2.6, but maintains
      8 compatibility with Python 2.4 and Python 2.5 and (currently) has
      9 significant performance advantages, even without using the optional C
     10 extension for speedups.
     11 
     12 Encoding basic Python object hierarchies::
     13 
     14     >>> import json
     15     >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
     16     '["foo", {"bar": ["baz", null, 1.0, 2]}]'
     17     >>> print json.dumps("\"foo\bar")
     18     "\"foo\bar"
     19     >>> print json.dumps(u'\u1234')
     20     "\u1234"
     21     >>> print json.dumps('\\')
     22     "\\"
     23     >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
     24     {"a": 0, "b": 0, "c": 0}
     25     >>> from StringIO import StringIO
     26     >>> io = StringIO()
     27     >>> json.dump(['streaming API'], io)
     28     >>> io.getvalue()
     29     '["streaming API"]'
     30 
     31 Compact encoding::
     32 
     33     >>> import json
     34     >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
     35     '[1,2,3,{"4":5,"6":7}]'
     36 
     37 Pretty printing::
     38 
     39     >>> import json
     40     >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
     41     >>> print '\n'.join([l.rstrip() for l in  s.splitlines()])
     42     {
     43         "4": 5,
     44         "6": 7
     45     }
     46 
     47 Decoding JSON::
     48 
     49     >>> import json
     50     >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
     51     >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
     52     True
     53     >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
     54     True
     55     >>> from StringIO import StringIO
     56     >>> io = StringIO('["streaming API"]')
     57     >>> json.load(io)[0] == 'streaming API'
     58     True
     59 
     60 Specializing JSON object decoding::
     61 
     62     >>> import json
     63     >>> def as_complex(dct):
     64     ...     if '__complex__' in dct:
     65     ...         return complex(dct['real'], dct['imag'])
     66     ...     return dct
     67     ...
     68     >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
     69     ...     object_hook=as_complex)
     70     (1+2j)
     71     >>> from decimal import Decimal
     72     >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
     73     True
     74 
     75 Specializing JSON object encoding::
     76 
     77     >>> import json
     78     >>> def encode_complex(obj):
     79     ...     if isinstance(obj, complex):
     80     ...         return [obj.real, obj.imag]
     81     ...     raise TypeError(repr(o) + " is not JSON serializable")
     82     ...
     83     >>> json.dumps(2 + 1j, default=encode_complex)
     84     '[2.0, 1.0]'
     85     >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
     86     '[2.0, 1.0]'
     87     >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
     88     '[2.0, 1.0]'
     89 
     90 
     91 Using json.tool from the shell to validate and pretty-print::
     92 
     93     $ echo '{"json":"obj"}' | python -m json.tool
     94     {
     95         "json": "obj"
     96     }
     97     $ echo '{ 1.2:3.4}' | python -m json.tool
     98     Expecting property name: line 1 column 2 (char 2)
     99 """
    100 __version__ = '2.0.9'
    101 __all__ = [
    102     'dump', 'dumps', 'load', 'loads',
    103     'JSONDecoder', 'JSONEncoder',
    104 ]
    105 
    106 __author__ = 'Bob Ippolito <bob@redivi.com>'
    107 
    108 from .decoder import JSONDecoder
    109 from .encoder import JSONEncoder
    110 
    111 _default_encoder = JSONEncoder(
    112     skipkeys=False,
    113     ensure_ascii=True,
    114     check_circular=True,
    115     allow_nan=True,
    116     indent=None,
    117     separators=None,
    118     encoding='utf-8',
    119     default=None,
    120 )
    121 
    122 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
    123         allow_nan=True, cls=None, indent=None, separators=None,
    124         encoding='utf-8', default=None, **kw):
    125     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
    126     ``.write()``-supporting file-like object).
    127 
    128     If ``skipkeys`` is true then ``dict`` keys that are not basic types
    129     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    130     will be skipped instead of raising a ``TypeError``.
    131 
    132     If ``ensure_ascii`` is false, then the some chunks written to ``fp``
    133     may be ``unicode`` instances, subject to normal Python ``str`` to
    134     ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
    135     understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
    136     to cause an error.
    137 
    138     If ``check_circular`` is false, then the circular reference check
    139     for container types will be skipped and a circular reference will
    140     result in an ``OverflowError`` (or worse).
    141 
    142     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    143     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
    144     in strict compliance of the JSON specification, instead of using the
    145     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    146 
    147     If ``indent`` is a non-negative integer, then JSON array elements and
    148     object members will be pretty-printed with that indent level. An indent
    149     level of 0 will only insert newlines. ``None`` is the most compact
    150     representation.
    151 
    152     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    153     then it will be used instead of the default ``(', ', ': ')`` separators.
    154     ``(',', ':')`` is the most compact JSON representation.
    155 
    156     ``encoding`` is the character encoding for str instances, default is UTF-8.
    157 
    158     ``default(obj)`` is a function that should return a serializable version
    159     of obj or raise TypeError. The default simply raises TypeError.
    160 
    161     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    162     ``.default()`` method to serialize additional types), specify it with
    163     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
    164 
    165     """
    166     # cached encoder
    167     if (not skipkeys and ensure_ascii and
    168         check_circular and allow_nan and
    169         cls is None and indent is None and separators is None and
    170         encoding == 'utf-8' and default is None and not kw):
    171         iterable = _default_encoder.iterencode(obj)
    172     else:
    173         if cls is None:
    174             cls = JSONEncoder
    175         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    176             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    177             separators=separators, encoding=encoding,
    178             default=default, **kw).iterencode(obj)
    179     # could accelerate with writelines in some versions of Python, at
    180     # a debuggability cost
    181     for chunk in iterable:
    182         fp.write(chunk)
    183 
    184 
    185 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
    186         allow_nan=True, cls=None, indent=None, separators=None,
    187         encoding='utf-8', default=None, **kw):
    188     """Serialize ``obj`` to a JSON formatted ``str``.
    189 
    190     If ``skipkeys`` is false then ``dict`` keys that are not basic types
    191     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    192     will be skipped instead of raising a ``TypeError``.
    193 
    194     If ``ensure_ascii`` is false, then the return value will be a
    195     ``unicode`` instance subject to normal Python ``str`` to ``unicode``
    196     coercion rules instead of being escaped to an ASCII ``str``.
    197 
    198     If ``check_circular`` is false, then the circular reference check
    199     for container types will be skipped and a circular reference will
    200     result in an ``OverflowError`` (or worse).
    201 
    202     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    203     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
    204     strict compliance of the JSON specification, instead of using the
    205     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    206 
    207     If ``indent`` is a non-negative integer, then JSON array elements and
    208     object members will be pretty-printed with that indent level. An indent
    209     level of 0 will only insert newlines. ``None`` is the most compact
    210     representation.
    211 
    212     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    213     then it will be used instead of the default ``(', ', ': ')`` separators.
    214     ``(',', ':')`` is the most compact JSON representation.
    215 
    216     ``encoding`` is the character encoding for str instances, default is UTF-8.
    217 
    218     ``default(obj)`` is a function that should return a serializable version
    219     of obj or raise TypeError. The default simply raises TypeError.
    220 
    221     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    222     ``.default()`` method to serialize additional types), specify it with
    223     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
    224 
    225     """
    226     # cached encoder
    227     if (not skipkeys and ensure_ascii and
    228         check_circular and allow_nan and
    229         cls is None and indent is None and separators is None and
    230         encoding == 'utf-8' and default is None and not kw):
    231         return _default_encoder.encode(obj)
    232     if cls is None:
    233         cls = JSONEncoder
    234     return cls(
    235         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    236         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    237         separators=separators, encoding=encoding, default=default,
    238         **kw).encode(obj)
    239 
    240 
    241 _default_decoder = JSONDecoder(encoding=None, object_hook=None,
    242                                object_pairs_hook=None)
    243 
    244 
    245 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
    246         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    247     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    248     a JSON document) to a Python object.
    249 
    250     If the contents of ``fp`` is encoded with an ASCII based encoding other
    251     than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
    252     be specified. Encodings that are not ASCII based (such as UCS-2) are
    253     not allowed, and should be wrapped with
    254     ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
    255     object and passed to ``loads()``
    256 
    257     ``object_hook`` is an optional function that will be called with the
    258     result of any object literal decode (a ``dict``). The return value of
    259     ``object_hook`` will be used instead of the ``dict``. This feature
    260     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    261 
    262     ``object_pairs_hook`` is an optional function that will be called with the
    263     result of any object literal decoded with an ordered list of pairs.  The
    264     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    265     This feature can be used to implement custom decoders that rely on the
    266     order that the key and value pairs are decoded (for example,
    267     collections.OrderedDict will remember the order of insertion). If
    268     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
    269 
    270     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    271     kwarg; otherwise ``JSONDecoder`` is used.
    272 
    273     """
    274     return loads(fp.read(),
    275         encoding=encoding, cls=cls, object_hook=object_hook,
    276         parse_float=parse_float, parse_int=parse_int,
    277         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
    278         **kw)
    279 
    280 
    281 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
    282         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    283     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
    284     document) to a Python object.
    285 
    286     If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
    287     other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
    288     must be specified. Encodings that are not ASCII based (such as UCS-2)
    289     are not allowed and should be decoded to ``unicode`` first.
    290 
    291     ``object_hook`` is an optional function that will be called with the
    292     result of any object literal decode (a ``dict``). The return value of
    293     ``object_hook`` will be used instead of the ``dict``. This feature
    294     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
    295 
    296     ``object_pairs_hook`` is an optional function that will be called with the
    297     result of any object literal decoded with an ordered list of pairs.  The
    298     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
    299     This feature can be used to implement custom decoders that rely on the
    300     order that the key and value pairs are decoded (for example,
    301     collections.OrderedDict will remember the order of insertion). If
    302     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
    303 
    304     ``parse_float``, if specified, will be called with the string
    305     of every JSON float to be decoded. By default this is equivalent to
    306     float(num_str). This can be used to use another datatype or parser
    307     for JSON floats (e.g. decimal.Decimal).
    308 
    309     ``parse_int``, if specified, will be called with the string
    310     of every JSON int to be decoded. By default this is equivalent to
    311     int(num_str). This can be used to use another datatype or parser
    312     for JSON integers (e.g. float).
    313 
    314     ``parse_constant``, if specified, will be called with one of the
    315     following strings: -Infinity, Infinity, NaN, null, true, false.
    316     This can be used to raise an exception if invalid JSON numbers
    317     are encountered.
    318 
    319     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    320     kwarg; otherwise ``JSONDecoder`` is used.
    321 
    322     """
    323     if (cls is None and encoding is None and object_hook is None and
    324             parse_int is None and parse_float is None and
    325             parse_constant is None and object_pairs_hook is None and not kw):
    326         return _default_decoder.decode(s)
    327     if cls is None:
    328         cls = JSONDecoder
    329     if object_hook is not None:
    330         kw['object_hook'] = object_hook
    331     if object_pairs_hook is not None:
    332         kw['object_pairs_hook'] = object_pairs_hook
    333     if parse_float is not None:
    334         kw['parse_float'] = parse_float
    335     if parse_int is not None:
    336         kw['parse_int'] = parse_int
    337     if parse_constant is not None:
    338         kw['parse_constant'] = parse_constant
    339     return cls(encoding=encoding, **kw).decode(s)
    340