Home | History | Annotate | Download | only in json
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package json
      6 
      7 import (
      8 	"bytes"
      9 	"errors"
     10 	"io"
     11 )
     12 
     13 // A Decoder reads and decodes JSON values from an input stream.
     14 type Decoder struct {
     15 	r     io.Reader
     16 	buf   []byte
     17 	d     decodeState
     18 	scanp int // start of unread data in buf
     19 	scan  scanner
     20 	err   error
     21 
     22 	tokenState int
     23 	tokenStack []int
     24 }
     25 
     26 // NewDecoder returns a new decoder that reads from r.
     27 //
     28 // The decoder introduces its own buffering and may
     29 // read data from r beyond the JSON values requested.
     30 func NewDecoder(r io.Reader) *Decoder {
     31 	return &Decoder{r: r}
     32 }
     33 
     34 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
     35 // Number instead of as a float64.
     36 func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
     37 
     38 // Decode reads the next JSON-encoded value from its
     39 // input and stores it in the value pointed to by v.
     40 //
     41 // See the documentation for Unmarshal for details about
     42 // the conversion of JSON into a Go value.
     43 func (dec *Decoder) Decode(v interface{}) error {
     44 	if dec.err != nil {
     45 		return dec.err
     46 	}
     47 
     48 	if err := dec.tokenPrepareForDecode(); err != nil {
     49 		return err
     50 	}
     51 
     52 	if !dec.tokenValueAllowed() {
     53 		return &SyntaxError{msg: "not at beginning of value"}
     54 	}
     55 
     56 	// Read whole value into buffer.
     57 	n, err := dec.readValue()
     58 	if err != nil {
     59 		return err
     60 	}
     61 	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
     62 	dec.scanp += n
     63 
     64 	// Don't save err from unmarshal into dec.err:
     65 	// the connection is still usable since we read a complete JSON
     66 	// object from it before the error happened.
     67 	err = dec.d.unmarshal(v)
     68 
     69 	// fixup token streaming state
     70 	dec.tokenValueEnd()
     71 
     72 	return err
     73 }
     74 
     75 // Buffered returns a reader of the data remaining in the Decoder's
     76 // buffer. The reader is valid until the next call to Decode.
     77 func (dec *Decoder) Buffered() io.Reader {
     78 	return bytes.NewReader(dec.buf[dec.scanp:])
     79 }
     80 
     81 // readValue reads a JSON value into dec.buf.
     82 // It returns the length of the encoding.
     83 func (dec *Decoder) readValue() (int, error) {
     84 	dec.scan.reset()
     85 
     86 	scanp := dec.scanp
     87 	var err error
     88 Input:
     89 	for {
     90 		// Look in the buffer for a new value.
     91 		for i, c := range dec.buf[scanp:] {
     92 			dec.scan.bytes++
     93 			v := dec.scan.step(&dec.scan, c)
     94 			if v == scanEnd {
     95 				scanp += i
     96 				break Input
     97 			}
     98 			// scanEnd is delayed one byte.
     99 			// We might block trying to get that byte from src,
    100 			// so instead invent a space byte.
    101 			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
    102 				scanp += i + 1
    103 				break Input
    104 			}
    105 			if v == scanError {
    106 				dec.err = dec.scan.err
    107 				return 0, dec.scan.err
    108 			}
    109 		}
    110 		scanp = len(dec.buf)
    111 
    112 		// Did the last read have an error?
    113 		// Delayed until now to allow buffer scan.
    114 		if err != nil {
    115 			if err == io.EOF {
    116 				if dec.scan.step(&dec.scan, ' ') == scanEnd {
    117 					break Input
    118 				}
    119 				if nonSpace(dec.buf) {
    120 					err = io.ErrUnexpectedEOF
    121 				}
    122 			}
    123 			dec.err = err
    124 			return 0, err
    125 		}
    126 
    127 		n := scanp - dec.scanp
    128 		err = dec.refill()
    129 		scanp = dec.scanp + n
    130 	}
    131 	return scanp - dec.scanp, nil
    132 }
    133 
    134 func (dec *Decoder) refill() error {
    135 	// Make room to read more into the buffer.
    136 	// First slide down data already consumed.
    137 	if dec.scanp > 0 {
    138 		n := copy(dec.buf, dec.buf[dec.scanp:])
    139 		dec.buf = dec.buf[:n]
    140 		dec.scanp = 0
    141 	}
    142 
    143 	// Grow buffer if not large enough.
    144 	const minRead = 512
    145 	if cap(dec.buf)-len(dec.buf) < minRead {
    146 		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
    147 		copy(newBuf, dec.buf)
    148 		dec.buf = newBuf
    149 	}
    150 
    151 	// Read. Delay error for next iteration (after scan).
    152 	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
    153 	dec.buf = dec.buf[0 : len(dec.buf)+n]
    154 
    155 	return err
    156 }
    157 
    158 func nonSpace(b []byte) bool {
    159 	for _, c := range b {
    160 		if !isSpace(c) {
    161 			return true
    162 		}
    163 	}
    164 	return false
    165 }
    166 
    167 // An Encoder writes JSON values to an output stream.
    168 type Encoder struct {
    169 	w          io.Writer
    170 	err        error
    171 	escapeHTML bool
    172 
    173 	indentBuf    *bytes.Buffer
    174 	indentPrefix string
    175 	indentValue  string
    176 }
    177 
    178 // NewEncoder returns a new encoder that writes to w.
    179 func NewEncoder(w io.Writer) *Encoder {
    180 	return &Encoder{w: w, escapeHTML: true}
    181 }
    182 
    183 // Encode writes the JSON encoding of v to the stream,
    184 // followed by a newline character.
    185 //
    186 // See the documentation for Marshal for details about the
    187 // conversion of Go values to JSON.
    188 func (enc *Encoder) Encode(v interface{}) error {
    189 	if enc.err != nil {
    190 		return enc.err
    191 	}
    192 	e := newEncodeState()
    193 	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
    194 	if err != nil {
    195 		return err
    196 	}
    197 
    198 	// Terminate each value with a newline.
    199 	// This makes the output look a little nicer
    200 	// when debugging, and some kind of space
    201 	// is required if the encoded value was a number,
    202 	// so that the reader knows there aren't more
    203 	// digits coming.
    204 	e.WriteByte('\n')
    205 
    206 	b := e.Bytes()
    207 	if enc.indentPrefix != "" || enc.indentValue != "" {
    208 		if enc.indentBuf == nil {
    209 			enc.indentBuf = new(bytes.Buffer)
    210 		}
    211 		enc.indentBuf.Reset()
    212 		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
    213 		if err != nil {
    214 			return err
    215 		}
    216 		b = enc.indentBuf.Bytes()
    217 	}
    218 	if _, err = enc.w.Write(b); err != nil {
    219 		enc.err = err
    220 	}
    221 	encodeStatePool.Put(e)
    222 	return err
    223 }
    224 
    225 // SetIndent instructs the encoder to format each subsequent encoded
    226 // value as if indented by the package-level function Indent(dst, src, prefix, indent).
    227 // Calling SetIndent("", "") disables indentation.
    228 func (enc *Encoder) SetIndent(prefix, indent string) {
    229 	enc.indentPrefix = prefix
    230 	enc.indentValue = indent
    231 }
    232 
    233 // SetEscapeHTML specifies whether problematic HTML characters
    234 // should be escaped inside JSON quoted strings.
    235 // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
    236 // to avoid certain safety problems that can arise when embedding JSON in HTML.
    237 //
    238 // In non-HTML settings where the escaping interferes with the readability
    239 // of the output, SetEscapeHTML(false) disables this behavior.
    240 func (enc *Encoder) SetEscapeHTML(on bool) {
    241 	enc.escapeHTML = on
    242 }
    243 
    244 // RawMessage is a raw encoded JSON value.
    245 // It implements Marshaler and Unmarshaler and can
    246 // be used to delay JSON decoding or precompute a JSON encoding.
    247 type RawMessage []byte
    248 
    249 // MarshalJSON returns m as the JSON encoding of m.
    250 func (m RawMessage) MarshalJSON() ([]byte, error) {
    251 	if m == nil {
    252 		return []byte("null"), nil
    253 	}
    254 	return m, nil
    255 }
    256 
    257 // UnmarshalJSON sets *m to a copy of data.
    258 func (m *RawMessage) UnmarshalJSON(data []byte) error {
    259 	if m == nil {
    260 		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
    261 	}
    262 	*m = append((*m)[0:0], data...)
    263 	return nil
    264 }
    265 
    266 var _ Marshaler = (*RawMessage)(nil)
    267 var _ Unmarshaler = (*RawMessage)(nil)
    268 
    269 // A Token holds a value of one of these types:
    270 //
    271 //	Delim, for the four JSON delimiters [ ] { }
    272 //	bool, for JSON booleans
    273 //	float64, for JSON numbers
    274 //	Number, for JSON numbers
    275 //	string, for JSON string literals
    276 //	nil, for JSON null
    277 //
    278 type Token interface{}
    279 
    280 const (
    281 	tokenTopValue = iota
    282 	tokenArrayStart
    283 	tokenArrayValue
    284 	tokenArrayComma
    285 	tokenObjectStart
    286 	tokenObjectKey
    287 	tokenObjectColon
    288 	tokenObjectValue
    289 	tokenObjectComma
    290 )
    291 
    292 // advance tokenstate from a separator state to a value state
    293 func (dec *Decoder) tokenPrepareForDecode() error {
    294 	// Note: Not calling peek before switch, to avoid
    295 	// putting peek into the standard Decode path.
    296 	// peek is only called when using the Token API.
    297 	switch dec.tokenState {
    298 	case tokenArrayComma:
    299 		c, err := dec.peek()
    300 		if err != nil {
    301 			return err
    302 		}
    303 		if c != ',' {
    304 			return &SyntaxError{"expected comma after array element", 0}
    305 		}
    306 		dec.scanp++
    307 		dec.tokenState = tokenArrayValue
    308 	case tokenObjectColon:
    309 		c, err := dec.peek()
    310 		if err != nil {
    311 			return err
    312 		}
    313 		if c != ':' {
    314 			return &SyntaxError{"expected colon after object key", 0}
    315 		}
    316 		dec.scanp++
    317 		dec.tokenState = tokenObjectValue
    318 	}
    319 	return nil
    320 }
    321 
    322 func (dec *Decoder) tokenValueAllowed() bool {
    323 	switch dec.tokenState {
    324 	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
    325 		return true
    326 	}
    327 	return false
    328 }
    329 
    330 func (dec *Decoder) tokenValueEnd() {
    331 	switch dec.tokenState {
    332 	case tokenArrayStart, tokenArrayValue:
    333 		dec.tokenState = tokenArrayComma
    334 	case tokenObjectValue:
    335 		dec.tokenState = tokenObjectComma
    336 	}
    337 }
    338 
    339 // A Delim is a JSON array or object delimiter, one of [ ] { or }.
    340 type Delim rune
    341 
    342 func (d Delim) String() string {
    343 	return string(d)
    344 }
    345 
    346 // Token returns the next JSON token in the input stream.
    347 // At the end of the input stream, Token returns nil, io.EOF.
    348 //
    349 // Token guarantees that the delimiters [ ] { } it returns are
    350 // properly nested and matched: if Token encounters an unexpected
    351 // delimiter in the input, it will return an error.
    352 //
    353 // The input stream consists of basic JSON valuesbool, string,
    354 // number, and nullalong with delimiters [ ] { } of type Delim
    355 // to mark the start and end of arrays and objects.
    356 // Commas and colons are elided.
    357 func (dec *Decoder) Token() (Token, error) {
    358 	for {
    359 		c, err := dec.peek()
    360 		if err != nil {
    361 			return nil, err
    362 		}
    363 		switch c {
    364 		case '[':
    365 			if !dec.tokenValueAllowed() {
    366 				return dec.tokenError(c)
    367 			}
    368 			dec.scanp++
    369 			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
    370 			dec.tokenState = tokenArrayStart
    371 			return Delim('['), nil
    372 
    373 		case ']':
    374 			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
    375 				return dec.tokenError(c)
    376 			}
    377 			dec.scanp++
    378 			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
    379 			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
    380 			dec.tokenValueEnd()
    381 			return Delim(']'), nil
    382 
    383 		case '{':
    384 			if !dec.tokenValueAllowed() {
    385 				return dec.tokenError(c)
    386 			}
    387 			dec.scanp++
    388 			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
    389 			dec.tokenState = tokenObjectStart
    390 			return Delim('{'), nil
    391 
    392 		case '}':
    393 			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
    394 				return dec.tokenError(c)
    395 			}
    396 			dec.scanp++
    397 			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
    398 			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
    399 			dec.tokenValueEnd()
    400 			return Delim('}'), nil
    401 
    402 		case ':':
    403 			if dec.tokenState != tokenObjectColon {
    404 				return dec.tokenError(c)
    405 			}
    406 			dec.scanp++
    407 			dec.tokenState = tokenObjectValue
    408 			continue
    409 
    410 		case ',':
    411 			if dec.tokenState == tokenArrayComma {
    412 				dec.scanp++
    413 				dec.tokenState = tokenArrayValue
    414 				continue
    415 			}
    416 			if dec.tokenState == tokenObjectComma {
    417 				dec.scanp++
    418 				dec.tokenState = tokenObjectKey
    419 				continue
    420 			}
    421 			return dec.tokenError(c)
    422 
    423 		case '"':
    424 			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
    425 				var x string
    426 				old := dec.tokenState
    427 				dec.tokenState = tokenTopValue
    428 				err := dec.Decode(&x)
    429 				dec.tokenState = old
    430 				if err != nil {
    431 					clearOffset(err)
    432 					return nil, err
    433 				}
    434 				dec.tokenState = tokenObjectColon
    435 				return x, nil
    436 			}
    437 			fallthrough
    438 
    439 		default:
    440 			if !dec.tokenValueAllowed() {
    441 				return dec.tokenError(c)
    442 			}
    443 			var x interface{}
    444 			if err := dec.Decode(&x); err != nil {
    445 				clearOffset(err)
    446 				return nil, err
    447 			}
    448 			return x, nil
    449 		}
    450 	}
    451 }
    452 
    453 func clearOffset(err error) {
    454 	if s, ok := err.(*SyntaxError); ok {
    455 		s.Offset = 0
    456 	}
    457 }
    458 
    459 func (dec *Decoder) tokenError(c byte) (Token, error) {
    460 	var context string
    461 	switch dec.tokenState {
    462 	case tokenTopValue:
    463 		context = " looking for beginning of value"
    464 	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
    465 		context = " looking for beginning of value"
    466 	case tokenArrayComma:
    467 		context = " after array element"
    468 	case tokenObjectKey:
    469 		context = " looking for beginning of object key string"
    470 	case tokenObjectColon:
    471 		context = " after object key"
    472 	case tokenObjectComma:
    473 		context = " after object key:value pair"
    474 	}
    475 	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
    476 }
    477 
    478 // More reports whether there is another element in the
    479 // current array or object being parsed.
    480 func (dec *Decoder) More() bool {
    481 	c, err := dec.peek()
    482 	return err == nil && c != ']' && c != '}'
    483 }
    484 
    485 func (dec *Decoder) peek() (byte, error) {
    486 	var err error
    487 	for {
    488 		for i := dec.scanp; i < len(dec.buf); i++ {
    489 			c := dec.buf[i]
    490 			if isSpace(c) {
    491 				continue
    492 			}
    493 			dec.scanp = i
    494 			return c, nil
    495 		}
    496 		// buffer has been scanned, now report any error
    497 		if err != nil {
    498 			return 0, err
    499 		}
    500 		err = dec.refill()
    501 	}
    502 }
    503 
    504 /*
    505 TODO
    506 
    507 // EncodeToken writes the given JSON token to the stream.
    508 // It returns an error if the delimiters [ ] { } are not properly used.
    509 //
    510 // EncodeToken does not call Flush, because usually it is part of
    511 // a larger operation such as Encode, and those will call Flush when finished.
    512 // Callers that create an Encoder and then invoke EncodeToken directly,
    513 // without using Encode, need to call Flush when finished to ensure that
    514 // the JSON is written to the underlying writer.
    515 func (e *Encoder) EncodeToken(t Token) error  {
    516 	...
    517 }
    518 
    519 */
    520