Home | History | Annotate | Download | only in simplejson
      1 r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
      2 JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
      3 interchange format.
      4 
      5 :mod:`simplejson` exposes an API familiar to users of the standard library
      6 :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
      7 version of the :mod:`json` library contained in Python 2.6, but maintains
      8 compatibility with Python 2.4 and Python 2.5 and (currently) has
      9 significant performance advantages, even without using the optional C
     10 extension for speedups.
     11 
     12 Encoding basic Python object hierarchies::
     13 
     14     >>> import simplejson as json
     15     >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
     16     '["foo", {"bar": ["baz", null, 1.0, 2]}]'
     17     >>> print json.dumps("\"foo\bar")
     18     "\"foo\bar"
     19     >>> print json.dumps(u'\u1234')
     20     "\u1234"
     21     >>> print json.dumps('\\')
     22     "\\"
     23     >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
     24     {"a": 0, "b": 0, "c": 0}
     25     >>> from StringIO import StringIO
     26     >>> io = StringIO()
     27     >>> json.dump(['streaming API'], io)
     28     >>> io.getvalue()
     29     '["streaming API"]'
     30 
     31 Compact encoding::
     32 
     33     >>> import simplejson as json
     34     >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
     35     '[1,2,3,{"4":5,"6":7}]'
     36 
     37 Pretty printing::
     38 
     39     >>> import simplejson as json
     40     >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent='    ')
     41     >>> print '\n'.join([l.rstrip() for l in  s.splitlines()])
     42     {
     43         "4": 5,
     44         "6": 7
     45     }
     46 
     47 Decoding JSON::
     48 
     49     >>> import simplejson as json
     50     >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
     51     >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
     52     True
     53     >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
     54     True
     55     >>> from StringIO import StringIO
     56     >>> io = StringIO('["streaming API"]')
     57     >>> json.load(io)[0] == 'streaming API'
     58     True
     59 
     60 Specializing JSON object decoding::
     61 
     62     >>> import simplejson as json
     63     >>> def as_complex(dct):
     64     ...     if '__complex__' in dct:
     65     ...         return complex(dct['real'], dct['imag'])
     66     ...     return dct
     67     ...
     68     >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
     69     ...     object_hook=as_complex)
     70     (1+2j)
     71     >>> from decimal import Decimal
     72     >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
     73     True
     74 
     75 Specializing JSON object encoding::
     76 
     77     >>> import simplejson as json
     78     >>> def encode_complex(obj):
     79     ...     if isinstance(obj, complex):
     80     ...         return [obj.real, obj.imag]
     81     ...     raise TypeError(repr(o) + " is not JSON serializable")
     82     ...
     83     >>> json.dumps(2 + 1j, default=encode_complex)
     84     '[2.0, 1.0]'
     85     >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
     86     '[2.0, 1.0]'
     87     >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
     88     '[2.0, 1.0]'
     89 
     90 
     91 Using simplejson.tool from the shell to validate and pretty-print::
     92 
     93     $ echo '{"json":"obj"}' | python -m simplejson.tool
     94     {
     95         "json": "obj"
     96     }
     97     $ echo '{ 1.2:3.4}' | python -m simplejson.tool
     98     Expecting property name: line 1 column 2 (char 2)
     99 """
    100 __version__ = '2.6.2'
    101 __all__ = [
    102     'dump', 'dumps', 'load', 'loads',
    103     'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
    104     'OrderedDict', 'simple_first',
    105 ]
    106 
    107 __author__ = 'Bob Ippolito <bob@redivi.com>'
    108 
    109 from decimal import Decimal
    110 
    111 from decoder import JSONDecoder, JSONDecodeError
    112 from encoder import JSONEncoder, JSONEncoderForHTML
    113 def _import_OrderedDict():
    114     import collections
    115     try:
    116         return collections.OrderedDict
    117     except AttributeError:
    118         import ordered_dict
    119         return ordered_dict.OrderedDict
    120 OrderedDict = _import_OrderedDict()
    121 
    122 def _import_c_make_encoder():
    123     try:
    124         from simplejson._speedups import make_encoder
    125         return make_encoder
    126     except ImportError:
    127         return None
    128 
    129 _default_encoder = JSONEncoder(
    130     skipkeys=False,
    131     ensure_ascii=True,
    132     check_circular=True,
    133     allow_nan=True,
    134     indent=None,
    135     separators=None,
    136     encoding='utf-8',
    137     default=None,
    138     use_decimal=True,
    139     namedtuple_as_object=True,
    140     tuple_as_array=True,
    141     bigint_as_string=False,
    142     item_sort_key=None,
    143 )
    144 
    145 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
    146         allow_nan=True, cls=None, indent=None, separators=None,
    147         encoding='utf-8', default=None, use_decimal=True,
    148         namedtuple_as_object=True, tuple_as_array=True,
    149         bigint_as_string=False, sort_keys=False, item_sort_key=None,
    150         **kw):
    151     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
    152     ``.write()``-supporting file-like object).
    153 
    154     If ``skipkeys`` is true then ``dict`` keys that are not basic types
    155     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    156     will be skipped instead of raising a ``TypeError``.
    157 
    158     If ``ensure_ascii`` is false, then the some chunks written to ``fp``
    159     may be ``unicode`` instances, subject to normal Python ``str`` to
    160     ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
    161     understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
    162     to cause an error.
    163 
    164     If ``check_circular`` is false, then the circular reference check
    165     for container types will be skipped and a circular reference will
    166     result in an ``OverflowError`` (or worse).
    167 
    168     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    169     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
    170     in strict compliance of the JSON specification, instead of using the
    171     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    172 
    173     If *indent* is a string, then JSON array elements and object members
    174     will be pretty-printed with a newline followed by that string repeated
    175     for each level of nesting. ``None`` (the default) selects the most compact
    176     representation without any newlines. For backwards compatibility with
    177     versions of simplejson earlier than 2.1.0, an integer is also accepted
    178     and is converted to a string with that many spaces.
    179 
    180     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    181     then it will be used instead of the default ``(', ', ': ')`` separators.
    182     ``(',', ':')`` is the most compact JSON representation.
    183 
    184     ``encoding`` is the character encoding for str instances, default is UTF-8.
    185 
    186     ``default(obj)`` is a function that should return a serializable version
    187     of obj or raise TypeError. The default simply raises TypeError.
    188 
    189     If *use_decimal* is true (default: ``True``) then decimal.Decimal
    190     will be natively serialized to JSON with full precision.
    191 
    192     If *namedtuple_as_object* is true (default: ``True``),
    193     :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
    194     as JSON objects.
    195 
    196     If *tuple_as_array* is true (default: ``True``),
    197     :class:`tuple` (and subclasses) will be encoded as JSON arrays.
    198 
    199     If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher
    200     or lower than -2**53 will be encoded as strings. This is to avoid the
    201     rounding that happens in Javascript otherwise. Note that this is still a
    202     lossy operation that will not round-trip correctly and should be used
    203     sparingly.
    204 
    205     If specified, *item_sort_key* is a callable used to sort the items in
    206     each dictionary. This is useful if you want to sort items other than
    207     in alphabetical order by key. This option takes precedence over
    208     *sort_keys*.
    209 
    210     If *sort_keys* is true (default: ``False``), the output of dictionaries
    211     will be sorted by item.
    212 
    213     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    214     ``.default()`` method to serialize additional types), specify it with
    215     the ``cls`` kwarg.
    216 
    217     """
    218     # cached encoder
    219     if (not skipkeys and ensure_ascii and
    220         check_circular and allow_nan and
    221         cls is None and indent is None and separators is None and
    222         encoding == 'utf-8' and default is None and use_decimal
    223         and namedtuple_as_object and tuple_as_array
    224         and not bigint_as_string and not item_sort_key and not kw):
    225         iterable = _default_encoder.iterencode(obj)
    226     else:
    227         if cls is None:
    228             cls = JSONEncoder
    229         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    230             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    231             separators=separators, encoding=encoding,
    232             default=default, use_decimal=use_decimal,
    233             namedtuple_as_object=namedtuple_as_object,
    234             tuple_as_array=tuple_as_array,
    235             bigint_as_string=bigint_as_string,
    236             sort_keys=sort_keys,
    237             item_sort_key=item_sort_key,
    238             **kw).iterencode(obj)
    239     # could accelerate with writelines in some versions of Python, at
    240     # a debuggability cost
    241     for chunk in iterable:
    242         fp.write(chunk)
    243 
    244 
    245 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
    246         allow_nan=True, cls=None, indent=None, separators=None,
    247         encoding='utf-8', default=None, use_decimal=True,
    248         namedtuple_as_object=True, tuple_as_array=True,
    249         bigint_as_string=False, sort_keys=False, item_sort_key=None,
    250         **kw):
    251     """Serialize ``obj`` to a JSON formatted ``str``.
    252 
    253     If ``skipkeys`` is false then ``dict`` keys that are not basic types
    254     (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    255     will be skipped instead of raising a ``TypeError``.
    256 
    257     If ``ensure_ascii`` is false, then the return value will be a
    258     ``unicode`` instance subject to normal Python ``str`` to ``unicode``
    259     coercion rules instead of being escaped to an ASCII ``str``.
    260 
    261     If ``check_circular`` is false, then the circular reference check
    262     for container types will be skipped and a circular reference will
    263     result in an ``OverflowError`` (or worse).
    264 
    265     If ``allow_nan`` is false, then it will be a ``ValueError`` to
    266     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
    267     strict compliance of the JSON specification, instead of using the
    268     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
    269 
    270     If ``indent`` is a string, then JSON array elements and object members
    271     will be pretty-printed with a newline followed by that string repeated
    272     for each level of nesting. ``None`` (the default) selects the most compact
    273     representation without any newlines. For backwards compatibility with
    274     versions of simplejson earlier than 2.1.0, an integer is also accepted
    275     and is converted to a string with that many spaces.
    276 
    277     If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    278     then it will be used instead of the default ``(', ', ': ')`` separators.
    279     ``(',', ':')`` is the most compact JSON representation.
    280 
    281     ``encoding`` is the character encoding for str instances, default is UTF-8.
    282 
    283     ``default(obj)`` is a function that should return a serializable version
    284     of obj or raise TypeError. The default simply raises TypeError.
    285 
    286     If *use_decimal* is true (default: ``True``) then decimal.Decimal
    287     will be natively serialized to JSON with full precision.
    288 
    289     If *namedtuple_as_object* is true (default: ``True``),
    290     :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
    291     as JSON objects.
    292 
    293     If *tuple_as_array* is true (default: ``True``),
    294     :class:`tuple` (and subclasses) will be encoded as JSON arrays.
    295 
    296     If *bigint_as_string* is true (not the default), ints 2**53 and higher
    297     or lower than -2**53 will be encoded as strings. This is to avoid the
    298     rounding that happens in Javascript otherwise.
    299 
    300     If specified, *item_sort_key* is a callable used to sort the items in
    301     each dictionary. This is useful if you want to sort items other than
    302     in alphabetical order by key. This option takes precendence over
    303     *sort_keys*.
    304 
    305     If *sort_keys* is true (default: ``False``), the output of dictionaries
    306     will be sorted by item.
    307 
    308     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    309     ``.default()`` method to serialize additional types), specify it with
    310     the ``cls`` kwarg.
    311 
    312     """
    313     # cached encoder
    314     if (not skipkeys and ensure_ascii and
    315         check_circular and allow_nan and
    316         cls is None and indent is None and separators is None and
    317         encoding == 'utf-8' and default is None and use_decimal
    318         and namedtuple_as_object and tuple_as_array
    319         and not bigint_as_string and not sort_keys
    320         and not item_sort_key and not kw):
    321         return _default_encoder.encode(obj)
    322     if cls is None:
    323         cls = JSONEncoder
    324     return cls(
    325         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    326         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    327         separators=separators, encoding=encoding, default=default,
    328         use_decimal=use_decimal,
    329         namedtuple_as_object=namedtuple_as_object,
    330         tuple_as_array=tuple_as_array,
    331         bigint_as_string=bigint_as_string,
    332         sort_keys=sort_keys,
    333         item_sort_key=item_sort_key,
    334         **kw).encode(obj)
    335 
    336 
    337 _default_decoder = JSONDecoder(encoding=None, object_hook=None,
    338                                object_pairs_hook=None)
    339 
    340 
    341 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
    342         parse_int=None, parse_constant=None, object_pairs_hook=None,
    343         use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
    344         **kw):
    345     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    346     a JSON document) to a Python object.
    347 
    348     *encoding* determines the encoding used to interpret any
    349     :class:`str` objects decoded by this instance (``'utf-8'`` by
    350     default).  It has no effect when decoding :class:`unicode` objects.
    351 
    352     Note that currently only encodings that are a superset of ASCII work,
    353     strings of other encodings should be passed in as :class:`unicode`.
    354 
    355     *object_hook*, if specified, will be called with the result of every
    356     JSON object decoded and its return value will be used in place of the
    357     given :class:`dict`.  This can be used to provide custom
    358     deserializations (e.g. to support JSON-RPC class hinting).
    359 
    360     *object_pairs_hook* is an optional function that will be called with
    361     the result of any object literal decode with an ordered list of pairs.
    362     The return value of *object_pairs_hook* will be used instead of the
    363     :class:`dict`.  This feature can be used to implement custom decoders
    364     that rely on the order that the key and value pairs are decoded (for
    365     example, :func:`collections.OrderedDict` will remember the order of
    366     insertion). If *object_hook* is also defined, the *object_pairs_hook*
    367     takes priority.
    368 
    369     *parse_float*, if specified, will be called with the string of every
    370     JSON float to be decoded.  By default, this is equivalent to
    371     ``float(num_str)``. This can be used to use another datatype or parser
    372     for JSON floats (e.g. :class:`decimal.Decimal`).
    373 
    374     *parse_int*, if specified, will be called with the string of every
    375     JSON int to be decoded.  By default, this is equivalent to
    376     ``int(num_str)``.  This can be used to use another datatype or parser
    377     for JSON integers (e.g. :class:`float`).
    378 
    379     *parse_constant*, if specified, will be called with one of the
    380     following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
    381     can be used to raise an exception if invalid JSON numbers are
    382     encountered.
    383 
    384     If *use_decimal* is true (default: ``False``) then it implies
    385     parse_float=decimal.Decimal for parity with ``dump``.
    386 
    387     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    388     kwarg.
    389 
    390     """
    391     return loads(fp.read(),
    392         encoding=encoding, cls=cls, object_hook=object_hook,
    393         parse_float=parse_float, parse_int=parse_int,
    394         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
    395         use_decimal=use_decimal, **kw)
    396 
    397 
    398 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
    399         parse_int=None, parse_constant=None, object_pairs_hook=None,
    400         use_decimal=False, **kw):
    401     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
    402     document) to a Python object.
    403 
    404     *encoding* determines the encoding used to interpret any
    405     :class:`str` objects decoded by this instance (``'utf-8'`` by
    406     default).  It has no effect when decoding :class:`unicode` objects.
    407 
    408     Note that currently only encodings that are a superset of ASCII work,
    409     strings of other encodings should be passed in as :class:`unicode`.
    410 
    411     *object_hook*, if specified, will be called with the result of every
    412     JSON object decoded and its return value will be used in place of the
    413     given :class:`dict`.  This can be used to provide custom
    414     deserializations (e.g. to support JSON-RPC class hinting).
    415 
    416     *object_pairs_hook* is an optional function that will be called with
    417     the result of any object literal decode with an ordered list of pairs.
    418     The return value of *object_pairs_hook* will be used instead of the
    419     :class:`dict`.  This feature can be used to implement custom decoders
    420     that rely on the order that the key and value pairs are decoded (for
    421     example, :func:`collections.OrderedDict` will remember the order of
    422     insertion). If *object_hook* is also defined, the *object_pairs_hook*
    423     takes priority.
    424 
    425     *parse_float*, if specified, will be called with the string of every
    426     JSON float to be decoded.  By default, this is equivalent to
    427     ``float(num_str)``. This can be used to use another datatype or parser
    428     for JSON floats (e.g. :class:`decimal.Decimal`).
    429 
    430     *parse_int*, if specified, will be called with the string of every
    431     JSON int to be decoded.  By default, this is equivalent to
    432     ``int(num_str)``.  This can be used to use another datatype or parser
    433     for JSON integers (e.g. :class:`float`).
    434 
    435     *parse_constant*, if specified, will be called with one of the
    436     following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
    437     can be used to raise an exception if invalid JSON numbers are
    438     encountered.
    439 
    440     If *use_decimal* is true (default: ``False``) then it implies
    441     parse_float=decimal.Decimal for parity with ``dump``.
    442 
    443     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
    444     kwarg.
    445 
    446     """
    447     if (cls is None and encoding is None and object_hook is None and
    448             parse_int is None and parse_float is None and
    449             parse_constant is None and object_pairs_hook is None
    450             and not use_decimal and not kw):
    451         return _default_decoder.decode(s)
    452     if cls is None:
    453         cls = JSONDecoder
    454     if object_hook is not None:
    455         kw['object_hook'] = object_hook
    456     if object_pairs_hook is not None:
    457         kw['object_pairs_hook'] = object_pairs_hook
    458     if parse_float is not None:
    459         kw['parse_float'] = parse_float
    460     if parse_int is not None:
    461         kw['parse_int'] = parse_int
    462     if parse_constant is not None:
    463         kw['parse_constant'] = parse_constant
    464     if use_decimal:
    465         if parse_float is not None:
    466             raise TypeError("use_decimal=True implies parse_float=Decimal")
    467         kw['parse_float'] = Decimal
    468     return cls(encoding=encoding, **kw).decode(s)
    469 
    470 
    471 def _toggle_speedups(enabled):
    472     import simplejson.decoder as dec
    473     import simplejson.encoder as enc
    474     import simplejson.scanner as scan
    475     c_make_encoder = _import_c_make_encoder()
    476     if enabled:
    477         dec.scanstring = dec.c_scanstring or dec.py_scanstring
    478         enc.c_make_encoder = c_make_encoder
    479         enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
    480             enc.py_encode_basestring_ascii)
    481         scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
    482     else:
    483         dec.scanstring = dec.py_scanstring
    484         enc.c_make_encoder = None
    485         enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
    486         scan.make_scanner = scan.py_make_scanner
    487     dec.make_scanner = scan.make_scanner
    488     global _default_decoder
    489     _default_decoder = JSONDecoder(
    490         encoding=None,
    491         object_hook=None,
    492         object_pairs_hook=None,
    493     )
    494     global _default_encoder
    495     _default_encoder = JSONEncoder(
    496        skipkeys=False,
    497        ensure_ascii=True,
    498        check_circular=True,
    499        allow_nan=True,
    500        indent=None,
    501        separators=None,
    502        encoding='utf-8',
    503        default=None,
    504    )
    505 
    506 def simple_first(kv):
    507     """Helper function to pass to item_sort_key to sort simple
    508     elements to the top, then container elements.
    509     """
    510     return (isinstance(kv[1], (list, dict, tuple)), kv[0])
    511