Home | History | Annotate | Download | only in simplejson
      1 """
      2 Implementation of JSONEncoder
      3 """
      4 import re
      5 try:
      6     from simplejson import _speedups
      7 except ImportError:
      8     _speedups = None
      9 
     10 ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
     11 ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
     12 ESCAPE_DCT = {
     13     # escape all forward slashes to prevent </script> attack
     14     '/': '\\/',
     15     '\\': '\\\\',
     16     '"': '\\"',
     17     '\b': '\\b',
     18     '\f': '\\f',
     19     '\n': '\\n',
     20     '\r': '\\r',
     21     '\t': '\\t',
     22 }
     23 for i in range(0x20):
     24     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
     25 
     26 # assume this produces an infinity on all machines (probably not guaranteed)
     27 INFINITY = float('1e66666')
     28 
     29 def floatstr(o, allow_nan=True):
     30     # Check for specials.  Note that this type of test is processor- and/or
     31     # platform-specific, so do tests which don't depend on the internals.
     32 
     33     if o != o:
     34         text = 'NaN'
     35     elif o == INFINITY:
     36         text = 'Infinity'
     37     elif o == -INFINITY:
     38         text = '-Infinity'
     39     else:
     40         return repr(o)
     41 
     42     if not allow_nan:
     43         raise ValueError("Out of range float values are not JSON compliant: %r"
     44             % (o,))
     45 
     46     return text
     47 
     48 
     49 def encode_basestring(s):
     50     """
     51     Return a JSON representation of a Python string
     52     """
     53     def replace(match):
     54         return ESCAPE_DCT[match.group(0)]
     55     return '"' + ESCAPE.sub(replace, s) + '"'
     56 
     57 def encode_basestring_ascii(s):
     58     def replace(match):
     59         s = match.group(0)
     60         try:
     61             return ESCAPE_DCT[s]
     62         except KeyError:
     63             n = ord(s)
     64             if n < 0x10000:
     65                 return '\\u%04x' % (n,)
     66             else:
     67                 # surrogate pair
     68                 n -= 0x10000
     69                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
     70                 s2 = 0xdc00 | (n & 0x3ff)
     71                 return '\\u%04x\\u%04x' % (s1, s2)
     72     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
     73         
     74 try:
     75     encode_basestring_ascii = _speedups.encode_basestring_ascii
     76     _need_utf8 = True
     77 except AttributeError:
     78     _need_utf8 = False
     79 
     80 class JSONEncoder(object):
     81     """
     82     Extensible JSON <http://json.org> encoder for Python data structures.
     83 
     84     Supports the following objects and types by default:
     85     
     86     +-------------------+---------------+
     87     | Python            | JSON          |
     88     +===================+===============+
     89     | dict              | object        |
     90     +-------------------+---------------+
     91     | list, tuple       | array         |
     92     +-------------------+---------------+
     93     | str, unicode      | string        |
     94     +-------------------+---------------+
     95     | int, long, float  | number        |
     96     +-------------------+---------------+
     97     | True              | true          |
     98     +-------------------+---------------+
     99     | False             | false         |
    100     +-------------------+---------------+
    101     | None              | null          |
    102     +-------------------+---------------+
    103 
    104     To extend this to recognize other objects, subclass and implement a
    105     ``.default()`` method with another method that returns a serializable
    106     object for ``o`` if possible, otherwise it should call the superclass
    107     implementation (to raise ``TypeError``).
    108     """
    109     __all__ = ['__init__', 'default', 'encode', 'iterencode']
    110     item_separator = ', '
    111     key_separator = ': '
    112     def __init__(self, skipkeys=False, ensure_ascii=True,
    113             check_circular=True, allow_nan=True, sort_keys=False,
    114             indent=None, separators=None, encoding='utf-8'):
    115         """
    116         Constructor for JSONEncoder, with sensible defaults.
    117 
    118         If skipkeys is False, then it is a TypeError to attempt
    119         encoding of keys that are not str, int, long, float or None.  If
    120         skipkeys is True, such items are simply skipped.
    121 
    122         If ensure_ascii is True, the output is guaranteed to be str
    123         objects with all incoming unicode characters escaped.  If
    124         ensure_ascii is false, the output will be unicode object.
    125 
    126         If check_circular is True, then lists, dicts, and custom encoded
    127         objects will be checked for circular references during encoding to
    128         prevent an infinite recursion (which would cause an OverflowError).
    129         Otherwise, no such check takes place.
    130 
    131         If allow_nan is True, then NaN, Infinity, and -Infinity will be
    132         encoded as such.  This behavior is not JSON specification compliant,
    133         but is consistent with most JavaScript based encoders and decoders.
    134         Otherwise, it will be a ValueError to encode such floats.
    135 
    136         If sort_keys is True, then the output of dictionaries will be
    137         sorted by key; this is useful for regression tests to ensure
    138         that JSON serializations can be compared on a day-to-day basis.
    139 
    140         If indent is a non-negative integer, then JSON array
    141         elements and object members will be pretty-printed with that
    142         indent level.  An indent level of 0 will only insert newlines.
    143         None is the most compact representation.
    144 
    145         If specified, separators should be a (item_separator, key_separator)
    146         tuple. The default is (', ', ': '). To get the most compact JSON
    147         representation you should specify (',', ':') to eliminate whitespace.
    148 
    149         If encoding is not None, then all input strings will be
    150         transformed into unicode using that encoding prior to JSON-encoding. 
    151         The default is UTF-8.
    152         """
    153 
    154         self.skipkeys = skipkeys
    155         self.ensure_ascii = ensure_ascii
    156         self.check_circular = check_circular
    157         self.allow_nan = allow_nan
    158         self.sort_keys = sort_keys
    159         self.indent = indent
    160         self.current_indent_level = 0
    161         if separators is not None:
    162             self.item_separator, self.key_separator = separators
    163         self.encoding = encoding
    164 
    165     def _newline_indent(self):
    166         return '\n' + (' ' * (self.indent * self.current_indent_level))
    167 
    168     def _iterencode_list(self, lst, markers=None):
    169         if not lst:
    170             yield '[]'
    171             return
    172         if markers is not None:
    173             markerid = id(lst)
    174             if markerid in markers:
    175                 raise ValueError("Circular reference detected")
    176             markers[markerid] = lst
    177         yield '['
    178         if self.indent is not None:
    179             self.current_indent_level += 1
    180             newline_indent = self._newline_indent()
    181             separator = self.item_separator + newline_indent
    182             yield newline_indent
    183         else:
    184             newline_indent = None
    185             separator = self.item_separator
    186         first = True
    187         for value in lst:
    188             if first:
    189                 first = False
    190             else:
    191                 yield separator
    192             for chunk in self._iterencode(value, markers):
    193                 yield chunk
    194         if newline_indent is not None:
    195             self.current_indent_level -= 1
    196             yield self._newline_indent()
    197         yield ']'
    198         if markers is not None:
    199             del markers[markerid]
    200 
    201     def _iterencode_dict(self, dct, markers=None):
    202         if not dct:
    203             yield '{}'
    204             return
    205         if markers is not None:
    206             markerid = id(dct)
    207             if markerid in markers:
    208                 raise ValueError("Circular reference detected")
    209             markers[markerid] = dct
    210         yield '{'
    211         key_separator = self.key_separator
    212         if self.indent is not None:
    213             self.current_indent_level += 1
    214             newline_indent = self._newline_indent()
    215             item_separator = self.item_separator + newline_indent
    216             yield newline_indent
    217         else:
    218             newline_indent = None
    219             item_separator = self.item_separator
    220         first = True
    221         if self.ensure_ascii:
    222             encoder = encode_basestring_ascii
    223         else:
    224             encoder = encode_basestring
    225         allow_nan = self.allow_nan
    226         if self.sort_keys:
    227             keys = dct.keys()
    228             keys.sort()
    229             items = [(k, dct[k]) for k in keys]
    230         else:
    231             items = dct.iteritems()
    232         _encoding = self.encoding
    233         _do_decode = (_encoding is not None
    234             and not (_need_utf8 and _encoding == 'utf-8'))
    235         for key, value in items:
    236             if isinstance(key, str):
    237                 if _do_decode:
    238                     key = key.decode(_encoding)
    239             elif isinstance(key, basestring):
    240                 pass
    241             # JavaScript is weakly typed for these, so it makes sense to
    242             # also allow them.  Many encoders seem to do something like this.
    243             elif isinstance(key, float):
    244                 key = floatstr(key, allow_nan)
    245             elif isinstance(key, (int, long)):
    246                 key = str(key)
    247             elif key is True:
    248                 key = 'true'
    249             elif key is False:
    250                 key = 'false'
    251             elif key is None:
    252                 key = 'null'
    253             elif self.skipkeys:
    254                 continue
    255             else:
    256                 raise TypeError("key %r is not a string" % (key,))
    257             if first:
    258                 first = False
    259             else:
    260                 yield item_separator
    261             yield encoder(key)
    262             yield key_separator
    263             for chunk in self._iterencode(value, markers):
    264                 yield chunk
    265         if newline_indent is not None:
    266             self.current_indent_level -= 1
    267             yield self._newline_indent()
    268         yield '}'
    269         if markers is not None:
    270             del markers[markerid]
    271 
    272     def _iterencode(self, o, markers=None):
    273         if isinstance(o, basestring):
    274             if self.ensure_ascii:
    275                 encoder = encode_basestring_ascii
    276             else:
    277                 encoder = encode_basestring
    278             _encoding = self.encoding
    279             if (_encoding is not None and isinstance(o, str)
    280                     and not (_need_utf8 and _encoding == 'utf-8')):
    281                 o = o.decode(_encoding)
    282             yield encoder(o)
    283         elif o is None:
    284             yield 'null'
    285         elif o is True:
    286             yield 'true'
    287         elif o is False:
    288             yield 'false'
    289         elif isinstance(o, (int, long)):
    290             yield str(o)
    291         elif isinstance(o, float):
    292             yield floatstr(o, self.allow_nan)
    293         elif isinstance(o, (list, tuple)):
    294             for chunk in self._iterencode_list(o, markers):
    295                 yield chunk
    296         elif isinstance(o, dict):
    297             for chunk in self._iterencode_dict(o, markers):
    298                 yield chunk
    299         else:
    300             if markers is not None:
    301                 markerid = id(o)
    302                 if markerid in markers:
    303                     raise ValueError("Circular reference detected")
    304                 markers[markerid] = o
    305             for chunk in self._iterencode_default(o, markers):
    306                 yield chunk
    307             if markers is not None:
    308                 del markers[markerid]
    309 
    310     def _iterencode_default(self, o, markers=None):
    311         newobj = self.default(o)
    312         return self._iterencode(newobj, markers)
    313 
    314     def default(self, o):
    315         """
    316         Implement this method in a subclass such that it returns
    317         a serializable object for ``o``, or calls the base implementation
    318         (to raise a ``TypeError``).
    319 
    320         For example, to support arbitrary iterators, you could
    321         implement default like this::
    322             
    323             def default(self, o):
    324                 try:
    325                     iterable = iter(o)
    326                 except TypeError:
    327                     pass
    328                 else:
    329                     return list(iterable)
    330                 return JSONEncoder.default(self, o)
    331         """
    332         raise TypeError("%r is not JSON serializable" % (o,))
    333 
    334     def encode(self, o):
    335         """
    336         Return a JSON string representation of a Python data structure.
    337 
    338         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
    339         '{"foo":["bar", "baz"]}'
    340         """
    341         # This is for extremely simple cases and benchmarks...
    342         if isinstance(o, basestring):
    343             if isinstance(o, str):
    344                 _encoding = self.encoding
    345                 if (_encoding is not None 
    346                         and not (_encoding == 'utf-8' and _need_utf8)):
    347                     o = o.decode(_encoding)
    348             return encode_basestring_ascii(o)
    349         # This doesn't pass the iterator directly to ''.join() because it
    350         # sucks at reporting exceptions.  It's going to do this internally
    351         # anyway because it uses PySequence_Fast or similar.
    352         chunks = list(self.iterencode(o))
    353         return ''.join(chunks)
    354 
    355     def iterencode(self, o):
    356         """
    357         Encode the given object and yield each string
    358         representation as available.
    359         
    360         For example::
    361             
    362             for chunk in JSONEncoder().iterencode(bigobject):
    363                 mysocket.write(chunk)
    364         """
    365         if self.check_circular:
    366             markers = {}
    367         else:
    368             markers = None
    369         return self._iterencode(o, markers)
    370 
    371 __all__ = ['JSONEncoder']
    372