Home | History | Annotate | Download | only in json
      1 // Copyright 2010 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package json
      6 
      7 import (
      8 	"bytes"
      9 	"errors"
     10 	"io"
     11 )
     12 
     13 // A Decoder reads and decodes JSON objects from an input stream.
     14 type Decoder struct {
     15 	r     io.Reader
     16 	buf   []byte
     17 	d     decodeState
     18 	scanp int // start of unread data in buf
     19 	scan  scanner
     20 	err   error
     21 
     22 	tokenState int
     23 	tokenStack []int
     24 }
     25 
     26 // NewDecoder returns a new decoder that reads from r.
     27 //
     28 // The decoder introduces its own buffering and may
     29 // read data from r beyond the JSON values requested.
     30 func NewDecoder(r io.Reader) *Decoder {
     31 	return &Decoder{r: r}
     32 }
     33 
     34 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
     35 // Number instead of as a float64.
     36 func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
     37 
     38 // Decode reads the next JSON-encoded value from its
     39 // input and stores it in the value pointed to by v.
     40 //
     41 // See the documentation for Unmarshal for details about
     42 // the conversion of JSON into a Go value.
     43 func (dec *Decoder) Decode(v interface{}) error {
     44 	if dec.err != nil {
     45 		return dec.err
     46 	}
     47 
     48 	if err := dec.tokenPrepareForDecode(); err != nil {
     49 		return err
     50 	}
     51 
     52 	if !dec.tokenValueAllowed() {
     53 		return &SyntaxError{msg: "not at beginning of value"}
     54 	}
     55 
     56 	// Read whole value into buffer.
     57 	n, err := dec.readValue()
     58 	if err != nil {
     59 		return err
     60 	}
     61 	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
     62 	dec.scanp += n
     63 
     64 	// Don't save err from unmarshal into dec.err:
     65 	// the connection is still usable since we read a complete JSON
     66 	// object from it before the error happened.
     67 	err = dec.d.unmarshal(v)
     68 
     69 	// fixup token streaming state
     70 	dec.tokenValueEnd()
     71 
     72 	return err
     73 }
     74 
     75 // Buffered returns a reader of the data remaining in the Decoder's
     76 // buffer. The reader is valid until the next call to Decode.
     77 func (dec *Decoder) Buffered() io.Reader {
     78 	return bytes.NewReader(dec.buf[dec.scanp:])
     79 }
     80 
     81 // readValue reads a JSON value into dec.buf.
     82 // It returns the length of the encoding.
     83 func (dec *Decoder) readValue() (int, error) {
     84 	dec.scan.reset()
     85 
     86 	scanp := dec.scanp
     87 	var err error
     88 Input:
     89 	for {
     90 		// Look in the buffer for a new value.
     91 		for i, c := range dec.buf[scanp:] {
     92 			dec.scan.bytes++
     93 			v := dec.scan.step(&dec.scan, int(c))
     94 			if v == scanEnd {
     95 				scanp += i
     96 				break Input
     97 			}
     98 			// scanEnd is delayed one byte.
     99 			// We might block trying to get that byte from src,
    100 			// so instead invent a space byte.
    101 			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
    102 				scanp += i + 1
    103 				break Input
    104 			}
    105 			if v == scanError {
    106 				dec.err = dec.scan.err
    107 				return 0, dec.scan.err
    108 			}
    109 		}
    110 		scanp = len(dec.buf)
    111 
    112 		// Did the last read have an error?
    113 		// Delayed until now to allow buffer scan.
    114 		if err != nil {
    115 			if err == io.EOF {
    116 				if dec.scan.step(&dec.scan, ' ') == scanEnd {
    117 					break Input
    118 				}
    119 				if nonSpace(dec.buf) {
    120 					err = io.ErrUnexpectedEOF
    121 				}
    122 			}
    123 			dec.err = err
    124 			return 0, err
    125 		}
    126 
    127 		n := scanp - dec.scanp
    128 		err = dec.refill()
    129 		scanp = dec.scanp + n
    130 	}
    131 	return scanp - dec.scanp, nil
    132 }
    133 
    134 func (dec *Decoder) refill() error {
    135 	// Make room to read more into the buffer.
    136 	// First slide down data already consumed.
    137 	if dec.scanp > 0 {
    138 		n := copy(dec.buf, dec.buf[dec.scanp:])
    139 		dec.buf = dec.buf[:n]
    140 		dec.scanp = 0
    141 	}
    142 
    143 	// Grow buffer if not large enough.
    144 	const minRead = 512
    145 	if cap(dec.buf)-len(dec.buf) < minRead {
    146 		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
    147 		copy(newBuf, dec.buf)
    148 		dec.buf = newBuf
    149 	}
    150 
    151 	// Read.  Delay error for next iteration (after scan).
    152 	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
    153 	dec.buf = dec.buf[0 : len(dec.buf)+n]
    154 
    155 	return err
    156 }
    157 
    158 func nonSpace(b []byte) bool {
    159 	for _, c := range b {
    160 		if !isSpace(rune(c)) {
    161 			return true
    162 		}
    163 	}
    164 	return false
    165 }
    166 
    167 // An Encoder writes JSON objects to an output stream.
    168 type Encoder struct {
    169 	w   io.Writer
    170 	err error
    171 }
    172 
    173 // NewEncoder returns a new encoder that writes to w.
    174 func NewEncoder(w io.Writer) *Encoder {
    175 	return &Encoder{w: w}
    176 }
    177 
    178 // Encode writes the JSON encoding of v to the stream,
    179 // followed by a newline character.
    180 //
    181 // See the documentation for Marshal for details about the
    182 // conversion of Go values to JSON.
    183 func (enc *Encoder) Encode(v interface{}) error {
    184 	if enc.err != nil {
    185 		return enc.err
    186 	}
    187 	e := newEncodeState()
    188 	err := e.marshal(v)
    189 	if err != nil {
    190 		return err
    191 	}
    192 
    193 	// Terminate each value with a newline.
    194 	// This makes the output look a little nicer
    195 	// when debugging, and some kind of space
    196 	// is required if the encoded value was a number,
    197 	// so that the reader knows there aren't more
    198 	// digits coming.
    199 	e.WriteByte('\n')
    200 
    201 	if _, err = enc.w.Write(e.Bytes()); err != nil {
    202 		enc.err = err
    203 	}
    204 	encodeStatePool.Put(e)
    205 	return err
    206 }
    207 
    208 // RawMessage is a raw encoded JSON object.
    209 // It implements Marshaler and Unmarshaler and can
    210 // be used to delay JSON decoding or precompute a JSON encoding.
    211 type RawMessage []byte
    212 
    213 // MarshalJSON returns *m as the JSON encoding of m.
    214 func (m *RawMessage) MarshalJSON() ([]byte, error) {
    215 	return *m, nil
    216 }
    217 
    218 // UnmarshalJSON sets *m to a copy of data.
    219 func (m *RawMessage) UnmarshalJSON(data []byte) error {
    220 	if m == nil {
    221 		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
    222 	}
    223 	*m = append((*m)[0:0], data...)
    224 	return nil
    225 }
    226 
    227 var _ Marshaler = (*RawMessage)(nil)
    228 var _ Unmarshaler = (*RawMessage)(nil)
    229 
    230 // A Token holds a value of one of these types:
    231 //
    232 //	Delim, for the four JSON delimiters [ ] { }
    233 //	bool, for JSON booleans
    234 //	float64, for JSON numbers
    235 //	Number, for JSON numbers
    236 //	string, for JSON string literals
    237 //	nil, for JSON null
    238 //
    239 type Token interface{}
    240 
    241 const (
    242 	tokenTopValue = iota
    243 	tokenArrayStart
    244 	tokenArrayValue
    245 	tokenArrayComma
    246 	tokenObjectStart
    247 	tokenObjectKey
    248 	tokenObjectColon
    249 	tokenObjectValue
    250 	tokenObjectComma
    251 )
    252 
    253 // advance tokenstate from a separator state to a value state
    254 func (dec *Decoder) tokenPrepareForDecode() error {
    255 	// Note: Not calling peek before switch, to avoid
    256 	// putting peek into the standard Decode path.
    257 	// peek is only called when using the Token API.
    258 	switch dec.tokenState {
    259 	case tokenArrayComma:
    260 		c, err := dec.peek()
    261 		if err != nil {
    262 			return err
    263 		}
    264 		if c != ',' {
    265 			return &SyntaxError{"expected comma after array element", 0}
    266 		}
    267 		dec.scanp++
    268 		dec.tokenState = tokenArrayValue
    269 	case tokenObjectColon:
    270 		c, err := dec.peek()
    271 		if err != nil {
    272 			return err
    273 		}
    274 		if c != ':' {
    275 			return &SyntaxError{"expected colon after object key", 0}
    276 		}
    277 		dec.scanp++
    278 		dec.tokenState = tokenObjectValue
    279 	}
    280 	return nil
    281 }
    282 
    283 func (dec *Decoder) tokenValueAllowed() bool {
    284 	switch dec.tokenState {
    285 	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
    286 		return true
    287 	}
    288 	return false
    289 }
    290 
    291 func (dec *Decoder) tokenValueEnd() {
    292 	switch dec.tokenState {
    293 	case tokenArrayStart, tokenArrayValue:
    294 		dec.tokenState = tokenArrayComma
    295 	case tokenObjectValue:
    296 		dec.tokenState = tokenObjectComma
    297 	}
    298 }
    299 
    300 // A Delim is a JSON array or object delimiter, one of [ ] { or }.
    301 type Delim rune
    302 
    303 func (d Delim) String() string {
    304 	return string(d)
    305 }
    306 
    307 // Token returns the next JSON token in the input stream.
    308 // At the end of the input stream, Token returns nil, io.EOF.
    309 //
    310 // Token guarantees that the delimiters [ ] { } it returns are
    311 // properly nested and matched: if Token encounters an unexpected
    312 // delimiter in the input, it will return an error.
    313 //
    314 // The input stream consists of basic JSON valuesbool, string,
    315 // number, and nullalong with delimiters [ ] { } of type Delim
    316 // to mark the start and end of arrays and objects.
    317 // Commas and colons are elided.
    318 func (dec *Decoder) Token() (Token, error) {
    319 	for {
    320 		c, err := dec.peek()
    321 		if err != nil {
    322 			return nil, err
    323 		}
    324 		switch c {
    325 		case '[':
    326 			if !dec.tokenValueAllowed() {
    327 				return dec.tokenError(c)
    328 			}
    329 			dec.scanp++
    330 			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
    331 			dec.tokenState = tokenArrayStart
    332 			return Delim('['), nil
    333 
    334 		case ']':
    335 			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
    336 				return dec.tokenError(c)
    337 			}
    338 			dec.scanp++
    339 			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
    340 			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
    341 			dec.tokenValueEnd()
    342 			return Delim(']'), nil
    343 
    344 		case '{':
    345 			if !dec.tokenValueAllowed() {
    346 				return dec.tokenError(c)
    347 			}
    348 			dec.scanp++
    349 			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
    350 			dec.tokenState = tokenObjectStart
    351 			return Delim('{'), nil
    352 
    353 		case '}':
    354 			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
    355 				return dec.tokenError(c)
    356 			}
    357 			dec.scanp++
    358 			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
    359 			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
    360 			dec.tokenValueEnd()
    361 			return Delim('}'), nil
    362 
    363 		case ':':
    364 			if dec.tokenState != tokenObjectColon {
    365 				return dec.tokenError(c)
    366 			}
    367 			dec.scanp++
    368 			dec.tokenState = tokenObjectValue
    369 			continue
    370 
    371 		case ',':
    372 			if dec.tokenState == tokenArrayComma {
    373 				dec.scanp++
    374 				dec.tokenState = tokenArrayValue
    375 				continue
    376 			}
    377 			if dec.tokenState == tokenObjectComma {
    378 				dec.scanp++
    379 				dec.tokenState = tokenObjectKey
    380 				continue
    381 			}
    382 			return dec.tokenError(c)
    383 
    384 		case '"':
    385 			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
    386 				var x string
    387 				old := dec.tokenState
    388 				dec.tokenState = tokenTopValue
    389 				err := dec.Decode(&x)
    390 				dec.tokenState = old
    391 				if err != nil {
    392 					clearOffset(err)
    393 					return nil, err
    394 				}
    395 				dec.tokenState = tokenObjectColon
    396 				return x, nil
    397 			}
    398 			fallthrough
    399 
    400 		default:
    401 			if !dec.tokenValueAllowed() {
    402 				return dec.tokenError(c)
    403 			}
    404 			var x interface{}
    405 			if err := dec.Decode(&x); err != nil {
    406 				clearOffset(err)
    407 				return nil, err
    408 			}
    409 			return x, nil
    410 		}
    411 	}
    412 }
    413 
    414 func clearOffset(err error) {
    415 	if s, ok := err.(*SyntaxError); ok {
    416 		s.Offset = 0
    417 	}
    418 }
    419 
    420 func (dec *Decoder) tokenError(c byte) (Token, error) {
    421 	var context string
    422 	switch dec.tokenState {
    423 	case tokenTopValue:
    424 		context = " looking for beginning of value"
    425 	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
    426 		context = " looking for beginning of value"
    427 	case tokenArrayComma:
    428 		context = " after array element"
    429 	case tokenObjectKey:
    430 		context = " looking for beginning of object key string"
    431 	case tokenObjectColon:
    432 		context = " after object key"
    433 	case tokenObjectComma:
    434 		context = " after object key:value pair"
    435 	}
    436 	return nil, &SyntaxError{"invalid character " + quoteChar(int(c)) + " " + context, 0}
    437 }
    438 
    439 // More reports whether there is another element in the
    440 // current array or object being parsed.
    441 func (dec *Decoder) More() bool {
    442 	c, err := dec.peek()
    443 	return err == nil && c != ']' && c != '}'
    444 }
    445 
    446 func (dec *Decoder) peek() (byte, error) {
    447 	var err error
    448 	for {
    449 		for i := dec.scanp; i < len(dec.buf); i++ {
    450 			c := dec.buf[i]
    451 			if isSpace(rune(c)) {
    452 				continue
    453 			}
    454 			dec.scanp = i
    455 			return c, nil
    456 		}
    457 		// buffer has been scanned, now report any error
    458 		if err != nil {
    459 			return 0, err
    460 		}
    461 		err = dec.refill()
    462 	}
    463 }
    464 
    465 /*
    466 TODO
    467 
    468 // EncodeToken writes the given JSON token to the stream.
    469 // It returns an error if the delimiters [ ] { } are not properly used.
    470 //
    471 // EncodeToken does not call Flush, because usually it is part of
    472 // a larger operation such as Encode, and those will call Flush when finished.
    473 // Callers that create an Encoder and then invoke EncodeToken directly,
    474 // without using Encode, need to call Flush when finished to ensure that
    475 // the JSON is written to the underlying writer.
    476 func (e *Encoder) EncodeToken(t Token) error  {
    477 	...
    478 }
    479 
    480 */
    481