0001"""
0002Implementation of JSONEncoder
0003"""
0004import re
0005try:
0006 from simplejson import _speedups
0007except ImportError:
0008 _speedups = None
0009
0010ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
0011ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
0012ESCAPE_DCT = {
0013 '\\': '\\\\',
0014 '"': '\\"',
0015 '\b': '\\b',
0016 '\f': '\\f',
0017 '\n': '\\n',
0018 '\r': '\\r',
0019 '\t': '\\t',
0020}
0021for i in range(0x20):
0022 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
0023
0024
0025INFINITY = float('1e66666')
0026FLOAT_REPR = repr
0027
0028def floatstr(o, allow_nan=True):
0029
0030
0031
0032 if o != o:
0033 text = 'NaN'
0034 elif o == INFINITY:
0035 text = 'Infinity'
0036 elif o == -INFINITY:
0037 text = '-Infinity'
0038 else:
0039 return FLOAT_REPR(o)
0040
0041 if not allow_nan:
0042 raise ValueError("Out of range float values are not JSON compliant: %r"
0043 % (o,))
0044
0045 return text
0046
0047
0048def encode_basestring(s):
0049 """
0050 Return a JSON representation of a Python string
0051 """
0052 def replace(match):
0053 return ESCAPE_DCT[match.group(0)]
0054 return '"' + ESCAPE.sub(replace, s) + '"'
0055
0056def encode_basestring_ascii(s):
0057 def replace(match):
0058 s = match.group(0)
0059 try:
0060 return ESCAPE_DCT[s]
0061 except KeyError:
0062 n = ord(s)
0063 if n < 0x10000:
0064 return '\\u%04x' % (n,)
0065 else:
0066
0067 n -= 0x10000
0068 s1 = 0xd800 | ((n >> 10) & 0x3ff)
0069 s2 = 0xdc00 | (n & 0x3ff)
0070 return '\\u%04x\\u%04x' % (s1, s2)
0071 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
0072
0073try:
0074 encode_basestring_ascii = _speedups.encode_basestring_ascii
0075 _need_utf8 = True
0076except AttributeError:
0077 _need_utf8 = False
0078
0079class JSONEncoder(object):
0080 """
0081 Extensible JSON <http://json.org> encoder for Python data structures.
0082
0083 Supports the following objects and types by default:
0084
0085 +-------------------+---------------+
0086 | Python | JSON |
0087 +===================+===============+
0088 | dict | object |
0089 +-------------------+---------------+
0090 | list, tuple | array |
0091 +-------------------+---------------+
0092 | str, unicode | string |
0093 +-------------------+---------------+
0094 | int, long, float | number |
0095 +-------------------+---------------+
0096 | True | true |
0097 +-------------------+---------------+
0098 | False | false |
0099 +-------------------+---------------+
0100 | None | null |
0101 +-------------------+---------------+
0102
0103 To extend this to recognize other objects, subclass and implement a
0104 ``.default()`` method with another method that returns a serializable
0105 object for ``o`` if possible, otherwise it should call the superclass
0106 implementation (to raise ``TypeError``).
0107 """
0108 __all__ = ['__init__', 'default', 'encode', 'iterencode']
0109 item_separator = ', '
0110 key_separator = ': '
0111 def __init__(self, skipkeys=False, ensure_ascii=True,
0112 check_circular=True, allow_nan=True, sort_keys=False,
0113 indent=None, separators=None, encoding='utf-8', default=None):
0114 """
0115 Constructor for JSONEncoder, with sensible defaults.
0116
0117 If skipkeys is False, then it is a TypeError to attempt
0118 encoding of keys that are not str, int, long, float or None. If
0119 skipkeys is True, such items are simply skipped.
0120
0121 If ensure_ascii is True, the output is guaranteed to be str
0122 objects with all incoming unicode characters escaped. If
0123 ensure_ascii is false, the output will be unicode object.
0124
0125 If check_circular is True, then lists, dicts, and custom encoded
0126 objects will be checked for circular references during encoding to
0127 prevent an infinite recursion (which would cause an OverflowError).
0128 Otherwise, no such check takes place.
0129
0130 If allow_nan is True, then NaN, Infinity, and -Infinity will be
0131 encoded as such. This behavior is not JSON specification compliant,
0132 but is consistent with most JavaScript based encoders and decoders.
0133 Otherwise, it will be a ValueError to encode such floats.
0134
0135 If sort_keys is True, then the output of dictionaries will be
0136 sorted by key; this is useful for regression tests to ensure
0137 that JSON serializations can be compared on a day-to-day basis.
0138
0139 If indent is a non-negative integer, then JSON array
0140 elements and object members will be pretty-printed with that
0141 indent level. An indent level of 0 will only insert newlines.
0142 None is the most compact representation.
0143
0144 If specified, separators should be a (item_separator, key_separator)
0145 tuple. The default is (', ', ': '). To get the most compact JSON
0146 representation you should specify (',', ':') to eliminate whitespace.
0147
0148 If specified, default is a function that gets called for objects
0149 that can't otherwise be serialized. It should return a JSON encodable
0150 version of the object or raise a ``TypeError``.
0151
0152 If encoding is not None, then all input strings will be
0153 transformed into unicode using that encoding prior to JSON-encoding.
0154 The default is UTF-8.
0155 """
0156
0157 self.skipkeys = skipkeys
0158 self.ensure_ascii = ensure_ascii
0159 self.check_circular = check_circular
0160 self.allow_nan = allow_nan
0161 self.sort_keys = sort_keys
0162 self.indent = indent
0163 self.current_indent_level = 0
0164 if separators is not None:
0165 self.item_separator, self.key_separator = separators
0166 if default is not None:
0167 self.default = default
0168 self.encoding = encoding
0169
0170 def _newline_indent(self):
0171 return '\n' + (' ' * (self.indent * self.current_indent_level))
0172
0173 def _iterencode_list(self, lst, markers=None):
0174 if not lst:
0175 yield '[]'
0176 return
0177 if markers is not None:
0178 markerid = id(lst)
0179 if markerid in markers:
0180 raise ValueError("Circular reference detected")
0181 markers[markerid] = lst
0182 yield '['
0183 if self.indent is not None:
0184 self.current_indent_level += 1
0185 newline_indent = self._newline_indent()
0186 separator = self.item_separator + newline_indent
0187 yield newline_indent
0188 else:
0189 newline_indent = None
0190 separator = self.item_separator
0191 first = True
0192 for value in lst:
0193 if first:
0194 first = False
0195 else:
0196 yield separator
0197 for chunk in self._iterencode(value, markers):
0198 yield chunk
0199 if newline_indent is not None:
0200 self.current_indent_level -= 1
0201 yield self._newline_indent()
0202 yield ']'
0203 if markers is not None:
0204 del markers[markerid]
0205
0206 def _iterencode_dict(self, dct, markers=None):
0207 if not dct:
0208 yield '{}'
0209 return
0210 if markers is not None:
0211 markerid = id(dct)
0212 if markerid in markers:
0213 raise ValueError("Circular reference detected")
0214 markers[markerid] = dct
0215 yield '{'
0216 key_separator = self.key_separator
0217 if self.indent is not None:
0218 self.current_indent_level += 1
0219 newline_indent = self._newline_indent()
0220 item_separator = self.item_separator + newline_indent
0221 yield newline_indent
0222 else:
0223 newline_indent = None
0224 item_separator = self.item_separator
0225 first = True
0226 if self.ensure_ascii:
0227 encoder = encode_basestring_ascii
0228 else:
0229 encoder = encode_basestring
0230 allow_nan = self.allow_nan
0231 if self.sort_keys:
0232 keys = dct.keys()
0233 keys.sort()
0234 items = [(k, dct[k]) for k in keys]
0235 else:
0236 items = dct.iteritems()
0237 _encoding = self.encoding
0238 _do_decode = (_encoding is not None
0239 and not (_need_utf8 and _encoding == 'utf-8'))
0240 for key, value in items:
0241 if isinstance(key, str):
0242 if _do_decode:
0243 key = key.decode(_encoding)
0244 elif isinstance(key, basestring):
0245 pass
0246
0247
0248 elif isinstance(key, float):
0249 key = floatstr(key, allow_nan)
0250 elif isinstance(key, (int, long)):
0251 key = str(key)
0252 elif key is True:
0253 key = 'true'
0254 elif key is False:
0255 key = 'false'
0256 elif key is None:
0257 key = 'null'
0258 elif self.skipkeys:
0259 continue
0260 else:
0261 raise TypeError("key %r is not a string" % (key,))
0262 if first:
0263 first = False
0264 else:
0265 yield item_separator
0266 yield encoder(key)
0267 yield key_separator
0268 for chunk in self._iterencode(value, markers):
0269 yield chunk
0270 if newline_indent is not None:
0271 self.current_indent_level -= 1
0272 yield self._newline_indent()
0273 yield '}'
0274 if markers is not None:
0275 del markers[markerid]
0276
0277 def _iterencode(self, o, markers=None):
0278 if isinstance(o, basestring):
0279 if self.ensure_ascii:
0280 encoder = encode_basestring_ascii
0281 else:
0282 encoder = encode_basestring
0283 _encoding = self.encoding
0284 if (_encoding is not None and isinstance(o, str)
0285 and not (_need_utf8 and _encoding == 'utf-8')):
0286 o = o.decode(_encoding)
0287 yield encoder(o)
0288 elif o is None:
0289 yield 'null'
0290 elif o is True:
0291 yield 'true'
0292 elif o is False:
0293 yield 'false'
0294 elif isinstance(o, (int, long)):
0295 yield str(o)
0296 elif isinstance(o, float):
0297 yield floatstr(o, self.allow_nan)
0298 elif isinstance(o, (list, tuple)):
0299 for chunk in self._iterencode_list(o, markers):
0300 yield chunk
0301 elif isinstance(o, dict):
0302 for chunk in self._iterencode_dict(o, markers):
0303 yield chunk
0304 else:
0305 if markers is not None:
0306 markerid = id(o)
0307 if markerid in markers:
0308 raise ValueError("Circular reference detected")
0309 markers[markerid] = o
0310 for chunk in self._iterencode_default(o, markers):
0311 yield chunk
0312 if markers is not None:
0313 del markers[markerid]
0314
0315 def _iterencode_default(self, o, markers=None):
0316 newobj = self.default(o)
0317 return self._iterencode(newobj, markers)
0318
0319 def default(self, o):
0320 """
0321 Implement this method in a subclass such that it returns
0322 a serializable object for ``o``, or calls the base implementation
0323 (to raise a ``TypeError``).
0324
0325 For example, to support arbitrary iterators, you could
0326 implement default like this::
0327
0328 def default(self, o):
0329 try:
0330 iterable = iter(o)
0331 except TypeError:
0332 pass
0333 else:
0334 return list(iterable)
0335 return JSONEncoder.default(self, o)
0336 """
0337 raise TypeError("%r is not JSON serializable" % (o,))
0338
0339 def encode(self, o):
0340 """
0341 Return a JSON string representation of a Python data structure.
0342
0343 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
0344 '{"foo":["bar", "baz"]}'
0345 """
0346
0347 if isinstance(o, basestring):
0348 if isinstance(o, str):
0349 _encoding = self.encoding
0350 if (_encoding is not None
0351 and not (_encoding == 'utf-8' and _need_utf8)):
0352 o = o.decode(_encoding)
0353 return encode_basestring_ascii(o)
0354
0355
0356
0357 chunks = list(self.iterencode(o))
0358 return ''.join(chunks)
0359
0360 def iterencode(self, o):
0361 """
0362 Encode the given object and yield each string
0363 representation as available.
0364
0365 For example::
0366
0367 for chunk in JSONEncoder().iterencode(bigobject):
0368 mysocket.write(chunk)
0369 """
0370 if self.check_circular:
0371 markers = {}
0372 else:
0373 markers = None
0374 return self._iterencode(o, markers)
0375
0376__all__ = ['JSONEncoder']