Home | History | Annotate | Download | only in json
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package json
      6 
      7 import (
      8 	"bytes"
      9 	"errors"
     10 	"io"
     11 )
     12 
     13 // A Decoder reads and decodes JSON values from an input stream.
     14 type Decoder struct {
     15 	r       io.Reader
     16 	buf     []byte
     17 	d       decodeState
     18 	scanp   int   // start of unread data in buf
     19 	scanned int64 // amount of data already scanned
     20 	scan    scanner
     21 	err     error
     22 
     23 	tokenState int
     24 	tokenStack []int
     25 }
     26 
     27 // NewDecoder returns a new decoder that reads from r.
     28 //
     29 // The decoder introduces its own buffering and may
     30 // read data from r beyond the JSON values requested.
     31 func NewDecoder(r io.Reader) *Decoder {
     32 	return &Decoder{r: r}
     33 }
     34 
     35 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
     36 // Number instead of as a float64.
     37 func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
     38 
     39 // DisallowUnknownFields causes the Decoder to return an error when the destination
     40 // is a struct and the input contains object keys which do not match any
     41 // non-ignored, exported fields in the destination.
     42 func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
     43 
     44 // Decode reads the next JSON-encoded value from its
     45 // input and stores it in the value pointed to by v.
     46 //
     47 // See the documentation for Unmarshal for details about
     48 // the conversion of JSON into a Go value.
     49 func (dec *Decoder) Decode(v interface{}) error {
     50 	if dec.err != nil {
     51 		return dec.err
     52 	}
     53 
     54 	if err := dec.tokenPrepareForDecode(); err != nil {
     55 		return err
     56 	}
     57 
     58 	if !dec.tokenValueAllowed() {
     59 		return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()}
     60 	}
     61 
     62 	// Read whole value into buffer.
     63 	n, err := dec.readValue()
     64 	if err != nil {
     65 		return err
     66 	}
     67 	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
     68 	dec.scanp += n
     69 
     70 	// Don't save err from unmarshal into dec.err:
     71 	// the connection is still usable since we read a complete JSON
     72 	// object from it before the error happened.
     73 	err = dec.d.unmarshal(v)
     74 
     75 	// fixup token streaming state
     76 	dec.tokenValueEnd()
     77 
     78 	return err
     79 }
     80 
     81 // Buffered returns a reader of the data remaining in the Decoder's
     82 // buffer. The reader is valid until the next call to Decode.
     83 func (dec *Decoder) Buffered() io.Reader {
     84 	return bytes.NewReader(dec.buf[dec.scanp:])
     85 }
     86 
     87 // readValue reads a JSON value into dec.buf.
     88 // It returns the length of the encoding.
     89 func (dec *Decoder) readValue() (int, error) {
     90 	dec.scan.reset()
     91 
     92 	scanp := dec.scanp
     93 	var err error
     94 Input:
     95 	for {
     96 		// Look in the buffer for a new value.
     97 		for i, c := range dec.buf[scanp:] {
     98 			dec.scan.bytes++
     99 			v := dec.scan.step(&dec.scan, c)
    100 			if v == scanEnd {
    101 				scanp += i
    102 				break Input
    103 			}
    104 			// scanEnd is delayed one byte.
    105 			// We might block trying to get that byte from src,
    106 			// so instead invent a space byte.
    107 			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
    108 				scanp += i + 1
    109 				break Input
    110 			}
    111 			if v == scanError {
    112 				dec.err = dec.scan.err
    113 				return 0, dec.scan.err
    114 			}
    115 		}
    116 		scanp = len(dec.buf)
    117 
    118 		// Did the last read have an error?
    119 		// Delayed until now to allow buffer scan.
    120 		if err != nil {
    121 			if err == io.EOF {
    122 				if dec.scan.step(&dec.scan, ' ') == scanEnd {
    123 					break Input
    124 				}
    125 				if nonSpace(dec.buf) {
    126 					err = io.ErrUnexpectedEOF
    127 				}
    128 			}
    129 			dec.err = err
    130 			return 0, err
    131 		}
    132 
    133 		n := scanp - dec.scanp
    134 		err = dec.refill()
    135 		scanp = dec.scanp + n
    136 	}
    137 	return scanp - dec.scanp, nil
    138 }
    139 
    140 func (dec *Decoder) refill() error {
    141 	// Make room to read more into the buffer.
    142 	// First slide down data already consumed.
    143 	if dec.scanp > 0 {
    144 		dec.scanned += int64(dec.scanp)
    145 		n := copy(dec.buf, dec.buf[dec.scanp:])
    146 		dec.buf = dec.buf[:n]
    147 		dec.scanp = 0
    148 	}
    149 
    150 	// Grow buffer if not large enough.
    151 	const minRead = 512
    152 	if cap(dec.buf)-len(dec.buf) < minRead {
    153 		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
    154 		copy(newBuf, dec.buf)
    155 		dec.buf = newBuf
    156 	}
    157 
    158 	// Read. Delay error for next iteration (after scan).
    159 	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
    160 	dec.buf = dec.buf[0 : len(dec.buf)+n]
    161 
    162 	return err
    163 }
    164 
    165 func nonSpace(b []byte) bool {
    166 	for _, c := range b {
    167 		if !isSpace(c) {
    168 			return true
    169 		}
    170 	}
    171 	return false
    172 }
    173 
    174 // An Encoder writes JSON values to an output stream.
    175 type Encoder struct {
    176 	w          io.Writer
    177 	err        error
    178 	escapeHTML bool
    179 
    180 	indentBuf    *bytes.Buffer
    181 	indentPrefix string
    182 	indentValue  string
    183 }
    184 
    185 // NewEncoder returns a new encoder that writes to w.
    186 func NewEncoder(w io.Writer) *Encoder {
    187 	return &Encoder{w: w, escapeHTML: true}
    188 }
    189 
    190 // Encode writes the JSON encoding of v to the stream,
    191 // followed by a newline character.
    192 //
    193 // See the documentation for Marshal for details about the
    194 // conversion of Go values to JSON.
    195 func (enc *Encoder) Encode(v interface{}) error {
    196 	if enc.err != nil {
    197 		return enc.err
    198 	}
    199 	e := newEncodeState()
    200 	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
    201 	if err != nil {
    202 		return err
    203 	}
    204 
    205 	// Terminate each value with a newline.
    206 	// This makes the output look a little nicer
    207 	// when debugging, and some kind of space
    208 	// is required if the encoded value was a number,
    209 	// so that the reader knows there aren't more
    210 	// digits coming.
    211 	e.WriteByte('\n')
    212 
    213 	b := e.Bytes()
    214 	if enc.indentPrefix != "" || enc.indentValue != "" {
    215 		if enc.indentBuf == nil {
    216 			enc.indentBuf = new(bytes.Buffer)
    217 		}
    218 		enc.indentBuf.Reset()
    219 		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
    220 		if err != nil {
    221 			return err
    222 		}
    223 		b = enc.indentBuf.Bytes()
    224 	}
    225 	if _, err = enc.w.Write(b); err != nil {
    226 		enc.err = err
    227 	}
    228 	encodeStatePool.Put(e)
    229 	return err
    230 }
    231 
    232 // SetIndent instructs the encoder to format each subsequent encoded
    233 // value as if indented by the package-level function Indent(dst, src, prefix, indent).
    234 // Calling SetIndent("", "") disables indentation.
    235 func (enc *Encoder) SetIndent(prefix, indent string) {
    236 	enc.indentPrefix = prefix
    237 	enc.indentValue = indent
    238 }
    239 
    240 // SetEscapeHTML specifies whether problematic HTML characters
    241 // should be escaped inside JSON quoted strings.
    242 // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
    243 // to avoid certain safety problems that can arise when embedding JSON in HTML.
    244 //
    245 // In non-HTML settings where the escaping interferes with the readability
    246 // of the output, SetEscapeHTML(false) disables this behavior.
    247 func (enc *Encoder) SetEscapeHTML(on bool) {
    248 	enc.escapeHTML = on
    249 }
    250 
    251 // RawMessage is a raw encoded JSON value.
    252 // It implements Marshaler and Unmarshaler and can
    253 // be used to delay JSON decoding or precompute a JSON encoding.
    254 type RawMessage []byte
    255 
    256 // MarshalJSON returns m as the JSON encoding of m.
    257 func (m RawMessage) MarshalJSON() ([]byte, error) {
    258 	if m == nil {
    259 		return []byte("null"), nil
    260 	}
    261 	return m, nil
    262 }
    263 
    264 // UnmarshalJSON sets *m to a copy of data.
    265 func (m *RawMessage) UnmarshalJSON(data []byte) error {
    266 	if m == nil {
    267 		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
    268 	}
    269 	*m = append((*m)[0:0], data...)
    270 	return nil
    271 }
    272 
    273 var _ Marshaler = (*RawMessage)(nil)
    274 var _ Unmarshaler = (*RawMessage)(nil)
    275 
    276 // A Token holds a value of one of these types:
    277 //
    278 //	Delim, for the four JSON delimiters [ ] { }
    279 //	bool, for JSON booleans
    280 //	float64, for JSON numbers
    281 //	Number, for JSON numbers
    282 //	string, for JSON string literals
    283 //	nil, for JSON null
    284 //
    285 type Token interface{}
    286 
    287 const (
    288 	tokenTopValue = iota
    289 	tokenArrayStart
    290 	tokenArrayValue
    291 	tokenArrayComma
    292 	tokenObjectStart
    293 	tokenObjectKey
    294 	tokenObjectColon
    295 	tokenObjectValue
    296 	tokenObjectComma
    297 )
    298 
    299 // advance tokenstate from a separator state to a value state
    300 func (dec *Decoder) tokenPrepareForDecode() error {
    301 	// Note: Not calling peek before switch, to avoid
    302 	// putting peek into the standard Decode path.
    303 	// peek is only called when using the Token API.
    304 	switch dec.tokenState {
    305 	case tokenArrayComma:
    306 		c, err := dec.peek()
    307 		if err != nil {
    308 			return err
    309 		}
    310 		if c != ',' {
    311 			return &SyntaxError{"expected comma after array element", dec.offset()}
    312 		}
    313 		dec.scanp++
    314 		dec.tokenState = tokenArrayValue
    315 	case tokenObjectColon:
    316 		c, err := dec.peek()
    317 		if err != nil {
    318 			return err
    319 		}
    320 		if c != ':' {
    321 			return &SyntaxError{"expected colon after object key", dec.offset()}
    322 		}
    323 		dec.scanp++
    324 		dec.tokenState = tokenObjectValue
    325 	}
    326 	return nil
    327 }
    328 
    329 func (dec *Decoder) tokenValueAllowed() bool {
    330 	switch dec.tokenState {
    331 	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
    332 		return true
    333 	}
    334 	return false
    335 }
    336 
    337 func (dec *Decoder) tokenValueEnd() {
    338 	switch dec.tokenState {
    339 	case tokenArrayStart, tokenArrayValue:
    340 		dec.tokenState = tokenArrayComma
    341 	case tokenObjectValue:
    342 		dec.tokenState = tokenObjectComma
    343 	}
    344 }
    345 
    346 // A Delim is a JSON array or object delimiter, one of [ ] { or }.
    347 type Delim rune
    348 
    349 func (d Delim) String() string {
    350 	return string(d)
    351 }
    352 
    353 // Token returns the next JSON token in the input stream.
    354 // At the end of the input stream, Token returns nil, io.EOF.
    355 //
    356 // Token guarantees that the delimiters [ ] { } it returns are
    357 // properly nested and matched: if Token encounters an unexpected
    358 // delimiter in the input, it will return an error.
    359 //
    360 // The input stream consists of basic JSON valuesbool, string,
    361 // number, and nullalong with delimiters [ ] { } of type Delim
    362 // to mark the start and end of arrays and objects.
    363 // Commas and colons are elided.
    364 func (dec *Decoder) Token() (Token, error) {
    365 	for {
    366 		c, err := dec.peek()
    367 		if err != nil {
    368 			return nil, err
    369 		}
    370 		switch c {
    371 		case '[':
    372 			if !dec.tokenValueAllowed() {
    373 				return dec.tokenError(c)
    374 			}
    375 			dec.scanp++
    376 			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
    377 			dec.tokenState = tokenArrayStart
    378 			return Delim('['), nil
    379 
    380 		case ']':
    381 			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
    382 				return dec.tokenError(c)
    383 			}
    384 			dec.scanp++
    385 			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
    386 			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
    387 			dec.tokenValueEnd()
    388 			return Delim(']'), nil
    389 
    390 		case '{':
    391 			if !dec.tokenValueAllowed() {
    392 				return dec.tokenError(c)
    393 			}
    394 			dec.scanp++
    395 			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
    396 			dec.tokenState = tokenObjectStart
    397 			return Delim('{'), nil
    398 
    399 		case '}':
    400 			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
    401 				return dec.tokenError(c)
    402 			}
    403 			dec.scanp++
    404 			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
    405 			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
    406 			dec.tokenValueEnd()
    407 			return Delim('}'), nil
    408 
    409 		case ':':
    410 			if dec.tokenState != tokenObjectColon {
    411 				return dec.tokenError(c)
    412 			}
    413 			dec.scanp++
    414 			dec.tokenState = tokenObjectValue
    415 			continue
    416 
    417 		case ',':
    418 			if dec.tokenState == tokenArrayComma {
    419 				dec.scanp++
    420 				dec.tokenState = tokenArrayValue
    421 				continue
    422 			}
    423 			if dec.tokenState == tokenObjectComma {
    424 				dec.scanp++
    425 				dec.tokenState = tokenObjectKey
    426 				continue
    427 			}
    428 			return dec.tokenError(c)
    429 
    430 		case '"':
    431 			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
    432 				var x string
    433 				old := dec.tokenState
    434 				dec.tokenState = tokenTopValue
    435 				err := dec.Decode(&x)
    436 				dec.tokenState = old
    437 				if err != nil {
    438 					return nil, err
    439 				}
    440 				dec.tokenState = tokenObjectColon
    441 				return x, nil
    442 			}
    443 			fallthrough
    444 
    445 		default:
    446 			if !dec.tokenValueAllowed() {
    447 				return dec.tokenError(c)
    448 			}
    449 			var x interface{}
    450 			if err := dec.Decode(&x); err != nil {
    451 				return nil, err
    452 			}
    453 			return x, nil
    454 		}
    455 	}
    456 }
    457 
    458 func (dec *Decoder) tokenError(c byte) (Token, error) {
    459 	var context string
    460 	switch dec.tokenState {
    461 	case tokenTopValue:
    462 		context = " looking for beginning of value"
    463 	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
    464 		context = " looking for beginning of value"
    465 	case tokenArrayComma:
    466 		context = " after array element"
    467 	case tokenObjectKey:
    468 		context = " looking for beginning of object key string"
    469 	case tokenObjectColon:
    470 		context = " after object key"
    471 	case tokenObjectComma:
    472 		context = " after object key:value pair"
    473 	}
    474 	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, dec.offset()}
    475 }
    476 
    477 // More reports whether there is another element in the
    478 // current array or object being parsed.
    479 func (dec *Decoder) More() bool {
    480 	c, err := dec.peek()
    481 	return err == nil && c != ']' && c != '}'
    482 }
    483 
    484 func (dec *Decoder) peek() (byte, error) {
    485 	var err error
    486 	for {
    487 		for i := dec.scanp; i < len(dec.buf); i++ {
    488 			c := dec.buf[i]
    489 			if isSpace(c) {
    490 				continue
    491 			}
    492 			dec.scanp = i
    493 			return c, nil
    494 		}
    495 		// buffer has been scanned, now report any error
    496 		if err != nil {
    497 			return 0, err
    498 		}
    499 		err = dec.refill()
    500 	}
    501 }
    502 
    503 func (dec *Decoder) offset() int64 {
    504 	return dec.scanned + int64(dec.scanp)
    505 }
    506