1 """Implementation of JSONEncoder 2 """ 3 import re 4 from decimal import Decimal 5 6 def _import_speedups(): 7 try: 8 from simplejson import _speedups 9 return _speedups.encode_basestring_ascii, _speedups.make_encoder 10 except ImportError: 11 return None, None 12 c_encode_basestring_ascii, c_make_encoder = _import_speedups() 13 14 from simplejson.decoder import PosInf 15 16 ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') 17 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 18 HAS_UTF8 = re.compile(r'[\x80-\xff]') 19 ESCAPE_DCT = { 20 '\\': '\\\\', 21 '"': '\\"', 22 '\b': '\\b', 23 '\f': '\\f', 24 '\n': '\\n', 25 '\r': '\\r', 26 '\t': '\\t', 27 u'\u2028': '\\u2028', 28 u'\u2029': '\\u2029', 29 } 30 for i in range(0x20): 31 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 32 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 33 34 FLOAT_REPR = repr 35 36 def encode_basestring(s): 37 """Return a JSON representation of a Python string 38 39 """ 40 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 41 s = s.decode('utf-8') 42 def replace(match): 43 return ESCAPE_DCT[match.group(0)] 44 return u'"' + ESCAPE.sub(replace, s) + u'"' 45 46 47 def py_encode_basestring_ascii(s): 48 """Return an ASCII-only JSON representation of a Python string 49 50 """ 51 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 52 s = s.decode('utf-8') 53 def replace(match): 54 s = match.group(0) 55 try: 56 return ESCAPE_DCT[s] 57 except KeyError: 58 n = ord(s) 59 if n < 0x10000: 60 #return '\\u{0:04x}'.format(n) 61 return '\\u%04x' % (n,) 62 else: 63 # surrogate pair 64 n -= 0x10000 65 s1 = 0xd800 | ((n >> 10) & 0x3ff) 66 s2 = 0xdc00 | (n & 0x3ff) 67 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 68 return '\\u%04x\\u%04x' % (s1, s2) 69 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 70 71 72 encode_basestring_ascii = ( 73 c_encode_basestring_ascii or py_encode_basestring_ascii) 74 75 class JSONEncoder(object): 76 """Extensible JSON <http://json.org> encoder for Python data structures. 77 78 Supports the following objects and types by default: 79 80 +-------------------+---------------+ 81 | Python | JSON | 82 +===================+===============+ 83 | dict, namedtuple | object | 84 +-------------------+---------------+ 85 | list, tuple | array | 86 +-------------------+---------------+ 87 | str, unicode | string | 88 +-------------------+---------------+ 89 | int, long, float | number | 90 +-------------------+---------------+ 91 | True | true | 92 +-------------------+---------------+ 93 | False | false | 94 +-------------------+---------------+ 95 | None | null | 96 +-------------------+---------------+ 97 98 To extend this to recognize other objects, subclass and implement a 99 ``.default()`` method with another method that returns a serializable 100 object for ``o`` if possible, otherwise it should call the superclass 101 implementation (to raise ``TypeError``). 102 103 """ 104 item_separator = ', ' 105 key_separator = ': ' 106 def __init__(self, skipkeys=False, ensure_ascii=True, 107 check_circular=True, allow_nan=True, sort_keys=False, 108 indent=None, separators=None, encoding='utf-8', default=None, 109 use_decimal=True, namedtuple_as_object=True, 110 tuple_as_array=True, bigint_as_string=False, 111 item_sort_key=None): 112 """Constructor for JSONEncoder, with sensible defaults. 113 114 If skipkeys is false, then it is a TypeError to attempt 115 encoding of keys that are not str, int, long, float or None. If 116 skipkeys is True, such items are simply skipped. 117 118 If ensure_ascii is true, the output is guaranteed to be str 119 objects with all incoming unicode characters escaped. If 120 ensure_ascii is false, the output will be unicode object. 121 122 If check_circular is true, then lists, dicts, and custom encoded 123 objects will be checked for circular references during encoding to 124 prevent an infinite recursion (which would cause an OverflowError). 125 Otherwise, no such check takes place. 126 127 If allow_nan is true, then NaN, Infinity, and -Infinity will be 128 encoded as such. This behavior is not JSON specification compliant, 129 but is consistent with most JavaScript based encoders and decoders. 130 Otherwise, it will be a ValueError to encode such floats. 131 132 If sort_keys is true, then the output of dictionaries will be 133 sorted by key; this is useful for regression tests to ensure 134 that JSON serializations can be compared on a day-to-day basis. 135 136 If indent is a string, then JSON array elements and object members 137 will be pretty-printed with a newline followed by that string repeated 138 for each level of nesting. ``None`` (the default) selects the most compact 139 representation without any newlines. For backwards compatibility with 140 versions of simplejson earlier than 2.1.0, an integer is also accepted 141 and is converted to a string with that many spaces. 142 143 If specified, separators should be a (item_separator, key_separator) 144 tuple. The default is (', ', ': '). To get the most compact JSON 145 representation you should specify (',', ':') to eliminate whitespace. 146 147 If specified, default is a function that gets called for objects 148 that can't otherwise be serialized. It should return a JSON encodable 149 version of the object or raise a ``TypeError``. 150 151 If encoding is not None, then all input strings will be 152 transformed into unicode using that encoding prior to JSON-encoding. 153 The default is UTF-8. 154 155 If use_decimal is true (not the default), ``decimal.Decimal`` will 156 be supported directly by the encoder. For the inverse, decode JSON 157 with ``parse_float=decimal.Decimal``. 158 159 If namedtuple_as_object is true (the default), objects with 160 ``_asdict()`` methods will be encoded as JSON objects. 161 162 If tuple_as_array is true (the default), tuple (and subclasses) will 163 be encoded as JSON arrays. 164 165 If bigint_as_string is true (not the default), ints 2**53 and higher 166 or lower than -2**53 will be encoded as strings. This is to avoid the 167 rounding that happens in Javascript otherwise. 168 169 If specified, item_sort_key is a callable used to sort the items in 170 each dictionary. This is useful if you want to sort items other than 171 in alphabetical order by key. 172 """ 173 174 self.skipkeys = skipkeys 175 self.ensure_ascii = ensure_ascii 176 self.check_circular = check_circular 177 self.allow_nan = allow_nan 178 self.sort_keys = sort_keys 179 self.use_decimal = use_decimal 180 self.namedtuple_as_object = namedtuple_as_object 181 self.tuple_as_array = tuple_as_array 182 self.bigint_as_string = bigint_as_string 183 self.item_sort_key = item_sort_key 184 if indent is not None and not isinstance(indent, basestring): 185 indent = indent * ' ' 186 self.indent = indent 187 if separators is not None: 188 self.item_separator, self.key_separator = separators 189 elif indent is not None: 190 self.item_separator = ',' 191 if default is not None: 192 self.default = default 193 self.encoding = encoding 194 195 def default(self, o): 196 """Implement this method in a subclass such that it returns 197 a serializable object for ``o``, or calls the base implementation 198 (to raise a ``TypeError``). 199 200 For example, to support arbitrary iterators, you could 201 implement default like this:: 202 203 def default(self, o): 204 try: 205 iterable = iter(o) 206 except TypeError: 207 pass 208 else: 209 return list(iterable) 210 return JSONEncoder.default(self, o) 211 212 """ 213 raise TypeError(repr(o) + " is not JSON serializable") 214 215 def encode(self, o): 216 """Return a JSON string representation of a Python data structure. 217 218 >>> from simplejson import JSONEncoder 219 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 220 '{"foo": ["bar", "baz"]}' 221 222 """ 223 # This is for extremely simple cases and benchmarks. 224 if isinstance(o, basestring): 225 if isinstance(o, str): 226 _encoding = self.encoding 227 if (_encoding is not None 228 and not (_encoding == 'utf-8')): 229 o = o.decode(_encoding) 230 if self.ensure_ascii: 231 return encode_basestring_ascii(o) 232 else: 233 return encode_basestring(o) 234 # This doesn't pass the iterator directly to ''.join() because the 235 # exceptions aren't as detailed. The list call should be roughly 236 # equivalent to the PySequence_Fast that ''.join() would do. 237 chunks = self.iterencode(o, _one_shot=True) 238 if not isinstance(chunks, (list, tuple)): 239 chunks = list(chunks) 240 if self.ensure_ascii: 241 return ''.join(chunks) 242 else: 243 return u''.join(chunks) 244 245 def iterencode(self, o, _one_shot=False): 246 """Encode the given object and yield each string 247 representation as available. 248 249 For example:: 250 251 for chunk in JSONEncoder().iterencode(bigobject): 252 mysocket.write(chunk) 253 254 """ 255 if self.check_circular: 256 markers = {} 257 else: 258 markers = None 259 if self.ensure_ascii: 260 _encoder = encode_basestring_ascii 261 else: 262 _encoder = encode_basestring 263 if self.encoding != 'utf-8': 264 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): 265 if isinstance(o, str): 266 o = o.decode(_encoding) 267 return _orig_encoder(o) 268 269 def floatstr(o, allow_nan=self.allow_nan, 270 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): 271 # Check for specials. Note that this type of test is processor 272 # and/or platform-specific, so do tests which don't depend on 273 # the internals. 274 275 if o != o: 276 text = 'NaN' 277 elif o == _inf: 278 text = 'Infinity' 279 elif o == _neginf: 280 text = '-Infinity' 281 else: 282 return _repr(o) 283 284 if not allow_nan: 285 raise ValueError( 286 "Out of range float values are not JSON compliant: " + 287 repr(o)) 288 289 return text 290 291 292 key_memo = {} 293 if (_one_shot and c_make_encoder is not None 294 and self.indent is None): 295 _iterencode = c_make_encoder( 296 markers, self.default, _encoder, self.indent, 297 self.key_separator, self.item_separator, self.sort_keys, 298 self.skipkeys, self.allow_nan, key_memo, self.use_decimal, 299 self.namedtuple_as_object, self.tuple_as_array, 300 self.bigint_as_string, self.item_sort_key, 301 Decimal) 302 else: 303 _iterencode = _make_iterencode( 304 markers, self.default, _encoder, self.indent, floatstr, 305 self.key_separator, self.item_separator, self.sort_keys, 306 self.skipkeys, _one_shot, self.use_decimal, 307 self.namedtuple_as_object, self.tuple_as_array, 308 self.bigint_as_string, self.item_sort_key, 309 Decimal=Decimal) 310 try: 311 return _iterencode(o, 0) 312 finally: 313 key_memo.clear() 314 315 316 class JSONEncoderForHTML(JSONEncoder): 317 """An encoder that produces JSON safe to embed in HTML. 318 319 To embed JSON content in, say, a script tag on a web page, the 320 characters &, < and > should be escaped. They cannot be escaped 321 with the usual entities (e.g. &) because they are not expanded 322 within <script> tags. 323 """ 324 325 def encode(self, o): 326 # Override JSONEncoder.encode because it has hacks for 327 # performance that make things more complicated. 328 chunks = self.iterencode(o, True) 329 if self.ensure_ascii: 330 return ''.join(chunks) 331 else: 332 return u''.join(chunks) 333 334 def iterencode(self, o, _one_shot=False): 335 chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) 336 for chunk in chunks: 337 chunk = chunk.replace('&', '\\u0026') 338 chunk = chunk.replace('<', '\\u003c') 339 chunk = chunk.replace('>', '\\u003e') 340 yield chunk 341 342 343 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, 344 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, 345 _use_decimal, _namedtuple_as_object, _tuple_as_array, 346 _bigint_as_string, _item_sort_key, 347 ## HACK: hand-optimized bytecode; turn globals into locals 348 False=False, 349 True=True, 350 ValueError=ValueError, 351 basestring=basestring, 352 Decimal=Decimal, 353 dict=dict, 354 float=float, 355 id=id, 356 int=int, 357 isinstance=isinstance, 358 list=list, 359 long=long, 360 str=str, 361 tuple=tuple, 362 ): 363 if _item_sort_key and not callable(_item_sort_key): 364 raise TypeError("item_sort_key must be None or callable") 365 366 def _iterencode_list(lst, _current_indent_level): 367 if not lst: 368 yield '[]' 369 return 370 if markers is not None: 371 markerid = id(lst) 372 if markerid in markers: 373 raise ValueError("Circular reference detected") 374 markers[markerid] = lst 375 buf = '[' 376 if _indent is not None: 377 _current_indent_level += 1 378 newline_indent = '\n' + (_indent * _current_indent_level) 379 separator = _item_separator + newline_indent 380 buf += newline_indent 381 else: 382 newline_indent = None 383 separator = _item_separator 384 first = True 385 for value in lst: 386 if first: 387 first = False 388 else: 389 buf = separator 390 if isinstance(value, basestring): 391 yield buf + _encoder(value) 392 elif value is None: 393 yield buf + 'null' 394 elif value is True: 395 yield buf + 'true' 396 elif value is False: 397 yield buf + 'false' 398 elif isinstance(value, (int, long)): 399 yield ((buf + str(value)) 400 if (not _bigint_as_string or 401 (-1 << 53) < value < (1 << 53)) 402 else (buf + '"' + str(value) + '"')) 403 elif isinstance(value, float): 404 yield buf + _floatstr(value) 405 elif _use_decimal and isinstance(value, Decimal): 406 yield buf + str(value) 407 else: 408 yield buf 409 if isinstance(value, list): 410 chunks = _iterencode_list(value, _current_indent_level) 411 else: 412 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) 413 if _asdict and callable(_asdict): 414 chunks = _iterencode_dict(_asdict(), 415 _current_indent_level) 416 elif _tuple_as_array and isinstance(value, tuple): 417 chunks = _iterencode_list(value, _current_indent_level) 418 elif isinstance(value, dict): 419 chunks = _iterencode_dict(value, _current_indent_level) 420 else: 421 chunks = _iterencode(value, _current_indent_level) 422 for chunk in chunks: 423 yield chunk 424 if newline_indent is not None: 425 _current_indent_level -= 1 426 yield '\n' + (_indent * _current_indent_level) 427 yield ']' 428 if markers is not None: 429 del markers[markerid] 430 431 def _iterencode_dict(dct, _current_indent_level): 432 if not dct: 433 yield '{}' 434 return 435 if markers is not None: 436 markerid = id(dct) 437 if markerid in markers: 438 raise ValueError("Circular reference detected") 439 markers[markerid] = dct 440 yield '{' 441 if _indent is not None: 442 _current_indent_level += 1 443 newline_indent = '\n' + (_indent * _current_indent_level) 444 item_separator = _item_separator + newline_indent 445 yield newline_indent 446 else: 447 newline_indent = None 448 item_separator = _item_separator 449 first = True 450 if _item_sort_key: 451 items = dct.items() 452 items.sort(key=_item_sort_key) 453 elif _sort_keys: 454 items = dct.items() 455 items.sort(key=lambda kv: kv[0]) 456 else: 457 items = dct.iteritems() 458 for key, value in items: 459 if isinstance(key, basestring): 460 pass 461 # JavaScript is weakly typed for these, so it makes sense to 462 # also allow them. Many encoders seem to do something like this. 463 elif isinstance(key, float): 464 key = _floatstr(key) 465 elif key is True: 466 key = 'true' 467 elif key is False: 468 key = 'false' 469 elif key is None: 470 key = 'null' 471 elif isinstance(key, (int, long)): 472 key = str(key) 473 elif _skipkeys: 474 continue 475 else: 476 raise TypeError("key " + repr(key) + " is not a string") 477 if first: 478 first = False 479 else: 480 yield item_separator 481 yield _encoder(key) 482 yield _key_separator 483 if isinstance(value, basestring): 484 yield _encoder(value) 485 elif value is None: 486 yield 'null' 487 elif value is True: 488 yield 'true' 489 elif value is False: 490 yield 'false' 491 elif isinstance(value, (int, long)): 492 yield (str(value) 493 if (not _bigint_as_string or 494 (-1 << 53) < value < (1 << 53)) 495 else ('"' + str(value) + '"')) 496 elif isinstance(value, float): 497 yield _floatstr(value) 498 elif _use_decimal and isinstance(value, Decimal): 499 yield str(value) 500 else: 501 if isinstance(value, list): 502 chunks = _iterencode_list(value, _current_indent_level) 503 else: 504 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) 505 if _asdict and callable(_asdict): 506 chunks = _iterencode_dict(_asdict(), 507 _current_indent_level) 508 elif _tuple_as_array and isinstance(value, tuple): 509 chunks = _iterencode_list(value, _current_indent_level) 510 elif isinstance(value, dict): 511 chunks = _iterencode_dict(value, _current_indent_level) 512 else: 513 chunks = _iterencode(value, _current_indent_level) 514 for chunk in chunks: 515 yield chunk 516 if newline_indent is not None: 517 _current_indent_level -= 1 518 yield '\n' + (_indent * _current_indent_level) 519 yield '}' 520 if markers is not None: 521 del markers[markerid] 522 523 def _iterencode(o, _current_indent_level): 524 if isinstance(o, basestring): 525 yield _encoder(o) 526 elif o is None: 527 yield 'null' 528 elif o is True: 529 yield 'true' 530 elif o is False: 531 yield 'false' 532 elif isinstance(o, (int, long)): 533 yield (str(o) 534 if (not _bigint_as_string or 535 (-1 << 53) < o < (1 << 53)) 536 else ('"' + str(o) + '"')) 537 elif isinstance(o, float): 538 yield _floatstr(o) 539 elif isinstance(o, list): 540 for chunk in _iterencode_list(o, _current_indent_level): 541 yield chunk 542 else: 543 _asdict = _namedtuple_as_object and getattr(o, '_asdict', None) 544 if _asdict and callable(_asdict): 545 for chunk in _iterencode_dict(_asdict(), _current_indent_level): 546 yield chunk 547 elif (_tuple_as_array and isinstance(o, tuple)): 548 for chunk in _iterencode_list(o, _current_indent_level): 549 yield chunk 550 elif isinstance(o, dict): 551 for chunk in _iterencode_dict(o, _current_indent_level): 552 yield chunk 553 elif _use_decimal and isinstance(o, Decimal): 554 yield str(o) 555 else: 556 if markers is not None: 557 markerid = id(o) 558 if markerid in markers: 559 raise ValueError("Circular reference detected") 560 markers[markerid] = o 561 o = _default(o) 562 for chunk in _iterencode(o, _current_indent_level): 563 yield chunk 564 if markers is not None: 565 del markers[markerid] 566 567 return _iterencode 568