Home | History | Annotate | Download | only in simplejson
      1 """Implementation of JSONEncoder
      2 """
      3 import re
      4 from decimal import Decimal
      5 
      6 def _import_speedups():
      7     try:
      8         from simplejson import _speedups
      9         return _speedups.encode_basestring_ascii, _speedups.make_encoder
     10     except ImportError:
     11         return None, None
     12 c_encode_basestring_ascii, c_make_encoder = _import_speedups()
     13 
     14 from simplejson.decoder import PosInf
     15 
     16 ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
     17 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
     18 HAS_UTF8 = re.compile(r'[\x80-\xff]')
     19 ESCAPE_DCT = {
     20     '\\': '\\\\',
     21     '"': '\\"',
     22     '\b': '\\b',
     23     '\f': '\\f',
     24     '\n': '\\n',
     25     '\r': '\\r',
     26     '\t': '\\t',
     27     u'\u2028': '\\u2028',
     28     u'\u2029': '\\u2029',
     29 }
     30 for i in range(0x20):
     31     #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
     32     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
     33 
     34 FLOAT_REPR = repr
     35 
     36 def encode_basestring(s):
     37     """Return a JSON representation of a Python string
     38 
     39     """
     40     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
     41         s = s.decode('utf-8')
     42     def replace(match):
     43         return ESCAPE_DCT[match.group(0)]
     44     return u'"' + ESCAPE.sub(replace, s) + u'"'
     45 
     46 
     47 def py_encode_basestring_ascii(s):
     48     """Return an ASCII-only JSON representation of a Python string
     49 
     50     """
     51     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
     52         s = s.decode('utf-8')
     53     def replace(match):
     54         s = match.group(0)
     55         try:
     56             return ESCAPE_DCT[s]
     57         except KeyError:
     58             n = ord(s)
     59             if n < 0x10000:
     60                 #return '\\u{0:04x}'.format(n)
     61                 return '\\u%04x' % (n,)
     62             else:
     63                 # surrogate pair
     64                 n -= 0x10000
     65                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
     66                 s2 = 0xdc00 | (n & 0x3ff)
     67                 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
     68                 return '\\u%04x\\u%04x' % (s1, s2)
     69     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
     70 
     71 
     72 encode_basestring_ascii = (
     73     c_encode_basestring_ascii or py_encode_basestring_ascii)
     74 
     75 class JSONEncoder(object):
     76     """Extensible JSON <http://json.org> encoder for Python data structures.
     77 
     78     Supports the following objects and types by default:
     79 
     80     +-------------------+---------------+
     81     | Python            | JSON          |
     82     +===================+===============+
     83     | dict, namedtuple  | object        |
     84     +-------------------+---------------+
     85     | list, tuple       | array         |
     86     +-------------------+---------------+
     87     | str, unicode      | string        |
     88     +-------------------+---------------+
     89     | int, long, float  | number        |
     90     +-------------------+---------------+
     91     | True              | true          |
     92     +-------------------+---------------+
     93     | False             | false         |
     94     +-------------------+---------------+
     95     | None              | null          |
     96     +-------------------+---------------+
     97 
     98     To extend this to recognize other objects, subclass and implement a
     99     ``.default()`` method with another method that returns a serializable
    100     object for ``o`` if possible, otherwise it should call the superclass
    101     implementation (to raise ``TypeError``).
    102 
    103     """
    104     item_separator = ', '
    105     key_separator = ': '
    106     def __init__(self, skipkeys=False, ensure_ascii=True,
    107             check_circular=True, allow_nan=True, sort_keys=False,
    108             indent=None, separators=None, encoding='utf-8', default=None,
    109             use_decimal=True, namedtuple_as_object=True,
    110             tuple_as_array=True, bigint_as_string=False,
    111             item_sort_key=None):
    112         """Constructor for JSONEncoder, with sensible defaults.
    113 
    114         If skipkeys is false, then it is a TypeError to attempt
    115         encoding of keys that are not str, int, long, float or None.  If
    116         skipkeys is True, such items are simply skipped.
    117 
    118         If ensure_ascii is true, the output is guaranteed to be str
    119         objects with all incoming unicode characters escaped.  If
    120         ensure_ascii is false, the output will be unicode object.
    121 
    122         If check_circular is true, then lists, dicts, and custom encoded
    123         objects will be checked for circular references during encoding to
    124         prevent an infinite recursion (which would cause an OverflowError).
    125         Otherwise, no such check takes place.
    126 
    127         If allow_nan is true, then NaN, Infinity, and -Infinity will be
    128         encoded as such.  This behavior is not JSON specification compliant,
    129         but is consistent with most JavaScript based encoders and decoders.
    130         Otherwise, it will be a ValueError to encode such floats.
    131 
    132         If sort_keys is true, then the output of dictionaries will be
    133         sorted by key; this is useful for regression tests to ensure
    134         that JSON serializations can be compared on a day-to-day basis.
    135 
    136         If indent is a string, then JSON array elements and object members
    137         will be pretty-printed with a newline followed by that string repeated
    138         for each level of nesting. ``None`` (the default) selects the most compact
    139         representation without any newlines. For backwards compatibility with
    140         versions of simplejson earlier than 2.1.0, an integer is also accepted
    141         and is converted to a string with that many spaces.
    142 
    143         If specified, separators should be a (item_separator, key_separator)
    144         tuple.  The default is (', ', ': ').  To get the most compact JSON
    145         representation you should specify (',', ':') to eliminate whitespace.
    146 
    147         If specified, default is a function that gets called for objects
    148         that can't otherwise be serialized.  It should return a JSON encodable
    149         version of the object or raise a ``TypeError``.
    150 
    151         If encoding is not None, then all input strings will be
    152         transformed into unicode using that encoding prior to JSON-encoding.
    153         The default is UTF-8.
    154 
    155         If use_decimal is true (not the default), ``decimal.Decimal`` will
    156         be supported directly by the encoder. For the inverse, decode JSON
    157         with ``parse_float=decimal.Decimal``.
    158 
    159         If namedtuple_as_object is true (the default), objects with
    160         ``_asdict()`` methods will be encoded as JSON objects.
    161 
    162         If tuple_as_array is true (the default), tuple (and subclasses) will
    163         be encoded as JSON arrays.
    164 
    165         If bigint_as_string is true (not the default), ints 2**53 and higher
    166         or lower than -2**53 will be encoded as strings. This is to avoid the
    167         rounding that happens in Javascript otherwise.
    168 
    169         If specified, item_sort_key is a callable used to sort the items in
    170         each dictionary. This is useful if you want to sort items other than
    171         in alphabetical order by key.
    172         """
    173 
    174         self.skipkeys = skipkeys
    175         self.ensure_ascii = ensure_ascii
    176         self.check_circular = check_circular
    177         self.allow_nan = allow_nan
    178         self.sort_keys = sort_keys
    179         self.use_decimal = use_decimal
    180         self.namedtuple_as_object = namedtuple_as_object
    181         self.tuple_as_array = tuple_as_array
    182         self.bigint_as_string = bigint_as_string
    183         self.item_sort_key = item_sort_key
    184         if indent is not None and not isinstance(indent, basestring):
    185             indent = indent * ' '
    186         self.indent = indent
    187         if separators is not None:
    188             self.item_separator, self.key_separator = separators
    189         elif indent is not None:
    190             self.item_separator = ','
    191         if default is not None:
    192             self.default = default
    193         self.encoding = encoding
    194 
    195     def default(self, o):
    196         """Implement this method in a subclass such that it returns
    197         a serializable object for ``o``, or calls the base implementation
    198         (to raise a ``TypeError``).
    199 
    200         For example, to support arbitrary iterators, you could
    201         implement default like this::
    202 
    203             def default(self, o):
    204                 try:
    205                     iterable = iter(o)
    206                 except TypeError:
    207                     pass
    208                 else:
    209                     return list(iterable)
    210                 return JSONEncoder.default(self, o)
    211 
    212         """
    213         raise TypeError(repr(o) + " is not JSON serializable")
    214 
    215     def encode(self, o):
    216         """Return a JSON string representation of a Python data structure.
    217 
    218         >>> from simplejson import JSONEncoder
    219         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
    220         '{"foo": ["bar", "baz"]}'
    221 
    222         """
    223         # This is for extremely simple cases and benchmarks.
    224         if isinstance(o, basestring):
    225             if isinstance(o, str):
    226                 _encoding = self.encoding
    227                 if (_encoding is not None
    228                         and not (_encoding == 'utf-8')):
    229                     o = o.decode(_encoding)
    230             if self.ensure_ascii:
    231                 return encode_basestring_ascii(o)
    232             else:
    233                 return encode_basestring(o)
    234         # This doesn't pass the iterator directly to ''.join() because the
    235         # exceptions aren't as detailed.  The list call should be roughly
    236         # equivalent to the PySequence_Fast that ''.join() would do.
    237         chunks = self.iterencode(o, _one_shot=True)
    238         if not isinstance(chunks, (list, tuple)):
    239             chunks = list(chunks)
    240         if self.ensure_ascii:
    241             return ''.join(chunks)
    242         else:
    243             return u''.join(chunks)
    244 
    245     def iterencode(self, o, _one_shot=False):
    246         """Encode the given object and yield each string
    247         representation as available.
    248 
    249         For example::
    250 
    251             for chunk in JSONEncoder().iterencode(bigobject):
    252                 mysocket.write(chunk)
    253 
    254         """
    255         if self.check_circular:
    256             markers = {}
    257         else:
    258             markers = None
    259         if self.ensure_ascii:
    260             _encoder = encode_basestring_ascii
    261         else:
    262             _encoder = encode_basestring
    263         if self.encoding != 'utf-8':
    264             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
    265                 if isinstance(o, str):
    266                     o = o.decode(_encoding)
    267                 return _orig_encoder(o)
    268 
    269         def floatstr(o, allow_nan=self.allow_nan,
    270                 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
    271             # Check for specials. Note that this type of test is processor
    272             # and/or platform-specific, so do tests which don't depend on
    273             # the internals.
    274 
    275             if o != o:
    276                 text = 'NaN'
    277             elif o == _inf:
    278                 text = 'Infinity'
    279             elif o == _neginf:
    280                 text = '-Infinity'
    281             else:
    282                 return _repr(o)
    283 
    284             if not allow_nan:
    285                 raise ValueError(
    286                     "Out of range float values are not JSON compliant: " +
    287                     repr(o))
    288 
    289             return text
    290 
    291 
    292         key_memo = {}
    293         if (_one_shot and c_make_encoder is not None
    294                 and self.indent is None):
    295             _iterencode = c_make_encoder(
    296                 markers, self.default, _encoder, self.indent,
    297                 self.key_separator, self.item_separator, self.sort_keys,
    298                 self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
    299                 self.namedtuple_as_object, self.tuple_as_array,
    300                 self.bigint_as_string, self.item_sort_key,
    301                 Decimal)
    302         else:
    303             _iterencode = _make_iterencode(
    304                 markers, self.default, _encoder, self.indent, floatstr,
    305                 self.key_separator, self.item_separator, self.sort_keys,
    306                 self.skipkeys, _one_shot, self.use_decimal,
    307                 self.namedtuple_as_object, self.tuple_as_array,
    308                 self.bigint_as_string, self.item_sort_key,
    309                 Decimal=Decimal)
    310         try:
    311             return _iterencode(o, 0)
    312         finally:
    313             key_memo.clear()
    314 
    315 
    316 class JSONEncoderForHTML(JSONEncoder):
    317     """An encoder that produces JSON safe to embed in HTML.
    318 
    319     To embed JSON content in, say, a script tag on a web page, the
    320     characters &, < and > should be escaped. They cannot be escaped
    321     with the usual entities (e.g. &amp;) because they are not expanded
    322     within <script> tags.
    323     """
    324 
    325     def encode(self, o):
    326         # Override JSONEncoder.encode because it has hacks for
    327         # performance that make things more complicated.
    328         chunks = self.iterencode(o, True)
    329         if self.ensure_ascii:
    330             return ''.join(chunks)
    331         else:
    332             return u''.join(chunks)
    333 
    334     def iterencode(self, o, _one_shot=False):
    335         chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
    336         for chunk in chunks:
    337             chunk = chunk.replace('&', '\\u0026')
    338             chunk = chunk.replace('<', '\\u003c')
    339             chunk = chunk.replace('>', '\\u003e')
    340             yield chunk
    341 
    342 
    343 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
    344         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
    345         _use_decimal, _namedtuple_as_object, _tuple_as_array,
    346         _bigint_as_string, _item_sort_key,
    347         ## HACK: hand-optimized bytecode; turn globals into locals
    348         False=False,
    349         True=True,
    350         ValueError=ValueError,
    351         basestring=basestring,
    352         Decimal=Decimal,
    353         dict=dict,
    354         float=float,
    355         id=id,
    356         int=int,
    357         isinstance=isinstance,
    358         list=list,
    359         long=long,
    360         str=str,
    361         tuple=tuple,
    362     ):
    363     if _item_sort_key and not callable(_item_sort_key):
    364         raise TypeError("item_sort_key must be None or callable")
    365 
    366     def _iterencode_list(lst, _current_indent_level):
    367         if not lst:
    368             yield '[]'
    369             return
    370         if markers is not None:
    371             markerid = id(lst)
    372             if markerid in markers:
    373                 raise ValueError("Circular reference detected")
    374             markers[markerid] = lst
    375         buf = '['
    376         if _indent is not None:
    377             _current_indent_level += 1
    378             newline_indent = '\n' + (_indent * _current_indent_level)
    379             separator = _item_separator + newline_indent
    380             buf += newline_indent
    381         else:
    382             newline_indent = None
    383             separator = _item_separator
    384         first = True
    385         for value in lst:
    386             if first:
    387                 first = False
    388             else:
    389                 buf = separator
    390             if isinstance(value, basestring):
    391                 yield buf + _encoder(value)
    392             elif value is None:
    393                 yield buf + 'null'
    394             elif value is True:
    395                 yield buf + 'true'
    396             elif value is False:
    397                 yield buf + 'false'
    398             elif isinstance(value, (int, long)):
    399                 yield ((buf + str(value))
    400                        if (not _bigint_as_string or
    401                            (-1 << 53) < value < (1 << 53))
    402                            else (buf + '"' + str(value) + '"'))
    403             elif isinstance(value, float):
    404                 yield buf + _floatstr(value)
    405             elif _use_decimal and isinstance(value, Decimal):
    406                 yield buf + str(value)
    407             else:
    408                 yield buf
    409                 if isinstance(value, list):
    410                     chunks = _iterencode_list(value, _current_indent_level)
    411                 else:
    412                     _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
    413                     if _asdict and callable(_asdict):
    414                         chunks = _iterencode_dict(_asdict(),
    415                                                   _current_indent_level)
    416                     elif _tuple_as_array and isinstance(value, tuple):
    417                         chunks = _iterencode_list(value, _current_indent_level)
    418                     elif isinstance(value, dict):
    419                         chunks = _iterencode_dict(value, _current_indent_level)
    420                     else:
    421                         chunks = _iterencode(value, _current_indent_level)
    422                 for chunk in chunks:
    423                     yield chunk
    424         if newline_indent is not None:
    425             _current_indent_level -= 1
    426             yield '\n' + (_indent * _current_indent_level)
    427         yield ']'
    428         if markers is not None:
    429             del markers[markerid]
    430 
    431     def _iterencode_dict(dct, _current_indent_level):
    432         if not dct:
    433             yield '{}'
    434             return
    435         if markers is not None:
    436             markerid = id(dct)
    437             if markerid in markers:
    438                 raise ValueError("Circular reference detected")
    439             markers[markerid] = dct
    440         yield '{'
    441         if _indent is not None:
    442             _current_indent_level += 1
    443             newline_indent = '\n' + (_indent * _current_indent_level)
    444             item_separator = _item_separator + newline_indent
    445             yield newline_indent
    446         else:
    447             newline_indent = None
    448             item_separator = _item_separator
    449         first = True
    450         if _item_sort_key:
    451             items = dct.items()
    452             items.sort(key=_item_sort_key)
    453         elif _sort_keys:
    454             items = dct.items()
    455             items.sort(key=lambda kv: kv[0])
    456         else:
    457             items = dct.iteritems()
    458         for key, value in items:
    459             if isinstance(key, basestring):
    460                 pass
    461             # JavaScript is weakly typed for these, so it makes sense to
    462             # also allow them.  Many encoders seem to do something like this.
    463             elif isinstance(key, float):
    464                 key = _floatstr(key)
    465             elif key is True:
    466                 key = 'true'
    467             elif key is False:
    468                 key = 'false'
    469             elif key is None:
    470                 key = 'null'
    471             elif isinstance(key, (int, long)):
    472                 key = str(key)
    473             elif _skipkeys:
    474                 continue
    475             else:
    476                 raise TypeError("key " + repr(key) + " is not a string")
    477             if first:
    478                 first = False
    479             else:
    480                 yield item_separator
    481             yield _encoder(key)
    482             yield _key_separator
    483             if isinstance(value, basestring):
    484                 yield _encoder(value)
    485             elif value is None:
    486                 yield 'null'
    487             elif value is True:
    488                 yield 'true'
    489             elif value is False:
    490                 yield 'false'
    491             elif isinstance(value, (int, long)):
    492                 yield (str(value)
    493                        if (not _bigint_as_string or
    494                            (-1 << 53) < value < (1 << 53))
    495                            else ('"' + str(value) + '"'))
    496             elif isinstance(value, float):
    497                 yield _floatstr(value)
    498             elif _use_decimal and isinstance(value, Decimal):
    499                 yield str(value)
    500             else:
    501                 if isinstance(value, list):
    502                     chunks = _iterencode_list(value, _current_indent_level)
    503                 else:
    504                     _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
    505                     if _asdict and callable(_asdict):
    506                         chunks = _iterencode_dict(_asdict(),
    507                                                   _current_indent_level)
    508                     elif _tuple_as_array and isinstance(value, tuple):
    509                         chunks = _iterencode_list(value, _current_indent_level)
    510                     elif isinstance(value, dict):
    511                         chunks = _iterencode_dict(value, _current_indent_level)
    512                     else:
    513                         chunks = _iterencode(value, _current_indent_level)
    514                 for chunk in chunks:
    515                     yield chunk
    516         if newline_indent is not None:
    517             _current_indent_level -= 1
    518             yield '\n' + (_indent * _current_indent_level)
    519         yield '}'
    520         if markers is not None:
    521             del markers[markerid]
    522 
    523     def _iterencode(o, _current_indent_level):
    524         if isinstance(o, basestring):
    525             yield _encoder(o)
    526         elif o is None:
    527             yield 'null'
    528         elif o is True:
    529             yield 'true'
    530         elif o is False:
    531             yield 'false'
    532         elif isinstance(o, (int, long)):
    533             yield (str(o)
    534                    if (not _bigint_as_string or
    535                        (-1 << 53) < o < (1 << 53))
    536                        else ('"' + str(o) + '"'))
    537         elif isinstance(o, float):
    538             yield _floatstr(o)
    539         elif isinstance(o, list):
    540             for chunk in _iterencode_list(o, _current_indent_level):
    541                 yield chunk
    542         else:
    543             _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
    544             if _asdict and callable(_asdict):
    545                 for chunk in _iterencode_dict(_asdict(), _current_indent_level):
    546                     yield chunk
    547             elif (_tuple_as_array and isinstance(o, tuple)):
    548                 for chunk in _iterencode_list(o, _current_indent_level):
    549                     yield chunk
    550             elif isinstance(o, dict):
    551                 for chunk in _iterencode_dict(o, _current_indent_level):
    552                     yield chunk
    553             elif _use_decimal and isinstance(o, Decimal):
    554                 yield str(o)
    555             else:
    556                 if markers is not None:
    557                     markerid = id(o)
    558                     if markerid in markers:
    559                         raise ValueError("Circular reference detected")
    560                     markers[markerid] = o
    561                 o = _default(o)
    562                 for chunk in _iterencode(o, _current_indent_level):
    563                     yield chunk
    564                 if markers is not None:
    565                     del markers[markerid]
    566 
    567     return _iterencode
    568