1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 // JSON value parser state machine. 8 // Just about at the limit of what is reasonable to write by hand. 9 // Some parts are a bit tedious, but overall it nicely factors out the 10 // otherwise common code from the multiple scanning functions 11 // in this package (Compact, Indent, checkValid, nextValue, etc). 12 // 13 // This file starts with two simple examples using the scanner 14 // before diving into the scanner itself. 15 16 import "strconv" 17 18 // Valid reports whether data is a valid JSON encoding. 19 func Valid(data []byte) bool { 20 return checkValid(data, &scanner{}) == nil 21 } 22 23 // checkValid verifies that data is valid JSON-encoded data. 24 // scan is passed in for use by checkValid to avoid an allocation. 25 func checkValid(data []byte, scan *scanner) error { 26 scan.reset() 27 for _, c := range data { 28 scan.bytes++ 29 if scan.step(scan, c) == scanError { 30 return scan.err 31 } 32 } 33 if scan.eof() == scanError { 34 return scan.err 35 } 36 return nil 37 } 38 39 // nextValue splits data after the next whole JSON value, 40 // returning that value and the bytes that follow it as separate slices. 41 // scan is passed in for use by nextValue to avoid an allocation. 42 func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) { 43 scan.reset() 44 for i, c := range data { 45 v := scan.step(scan, c) 46 if v >= scanEndObject { 47 switch v { 48 // probe the scanner with a space to determine whether we will 49 // get scanEnd on the next character. Otherwise, if the next character 50 // is not a space, scanEndTop allocates a needless error. 51 case scanEndObject, scanEndArray: 52 if scan.step(scan, ' ') == scanEnd { 53 return data[:i+1], data[i+1:], nil 54 } 55 case scanError: 56 return nil, nil, scan.err 57 case scanEnd: 58 return data[:i], data[i:], nil 59 } 60 } 61 } 62 if scan.eof() == scanError { 63 return nil, nil, scan.err 64 } 65 return data, nil, nil 66 } 67 68 // A SyntaxError is a description of a JSON syntax error. 69 type SyntaxError struct { 70 msg string // description of error 71 Offset int64 // error occurred after reading Offset bytes 72 } 73 74 func (e *SyntaxError) Error() string { return e.msg } 75 76 // A scanner is a JSON scanning state machine. 77 // Callers call scan.reset() and then pass bytes in one at a time 78 // by calling scan.step(&scan, c) for each byte. 79 // The return value, referred to as an opcode, tells the 80 // caller about significant parsing events like beginning 81 // and ending literals, objects, and arrays, so that the 82 // caller can follow along if it wishes. 83 // The return value scanEnd indicates that a single top-level 84 // JSON value has been completed, *before* the byte that 85 // just got passed in. (The indication must be delayed in order 86 // to recognize the end of numbers: is 123 a whole value or 87 // the beginning of 12345e+6?). 88 type scanner struct { 89 // The step is a func to be called to execute the next transition. 90 // Also tried using an integer constant and a single func 91 // with a switch, but using the func directly was 10% faster 92 // on a 64-bit Mac Mini, and it's nicer to read. 93 step func(*scanner, byte) int 94 95 // Reached end of top-level value. 96 endTop bool 97 98 // Stack of what we're in the middle of - array values, object keys, object values. 99 parseState []int 100 101 // Error that happened, if any. 102 err error 103 104 // 1-byte redo (see undo method) 105 redo bool 106 redoCode int 107 redoState func(*scanner, byte) int 108 109 // total bytes consumed, updated by decoder.Decode 110 bytes int64 111 } 112 113 // These values are returned by the state transition functions 114 // assigned to scanner.state and the method scanner.eof. 115 // They give details about the current state of the scan that 116 // callers might be interested to know about. 117 // It is okay to ignore the return value of any particular 118 // call to scanner.state: if one call returns scanError, 119 // every subsequent call will return scanError too. 120 const ( 121 // Continue. 122 scanContinue = iota // uninteresting byte 123 scanBeginLiteral // end implied by next result != scanContinue 124 scanBeginObject // begin object 125 scanObjectKey // just finished object key (string) 126 scanObjectValue // just finished non-last object value 127 scanEndObject // end object (implies scanObjectValue if possible) 128 scanBeginArray // begin array 129 scanArrayValue // just finished array value 130 scanEndArray // end array (implies scanArrayValue if possible) 131 scanSkipSpace // space byte; can skip; known to be last "continue" result 132 133 // Stop. 134 scanEnd // top-level value ended *before* this byte; known to be first "stop" result 135 scanError // hit an error, scanner.err. 136 ) 137 138 // These values are stored in the parseState stack. 139 // They give the current state of a composite value 140 // being scanned. If the parser is inside a nested value 141 // the parseState describes the nested state, outermost at entry 0. 142 const ( 143 parseObjectKey = iota // parsing object key (before colon) 144 parseObjectValue // parsing object value (after colon) 145 parseArrayValue // parsing array value 146 ) 147 148 // reset prepares the scanner for use. 149 // It must be called before calling s.step. 150 func (s *scanner) reset() { 151 s.step = stateBeginValue 152 s.parseState = s.parseState[0:0] 153 s.err = nil 154 s.redo = false 155 s.endTop = false 156 } 157 158 // eof tells the scanner that the end of input has been reached. 159 // It returns a scan status just as s.step does. 160 func (s *scanner) eof() int { 161 if s.err != nil { 162 return scanError 163 } 164 if s.endTop { 165 return scanEnd 166 } 167 s.step(s, ' ') 168 if s.endTop { 169 return scanEnd 170 } 171 if s.err == nil { 172 s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} 173 } 174 return scanError 175 } 176 177 // pushParseState pushes a new parse state p onto the parse stack. 178 func (s *scanner) pushParseState(p int) { 179 s.parseState = append(s.parseState, p) 180 } 181 182 // popParseState pops a parse state (already obtained) off the stack 183 // and updates s.step accordingly. 184 func (s *scanner) popParseState() { 185 n := len(s.parseState) - 1 186 s.parseState = s.parseState[0:n] 187 s.redo = false 188 if n == 0 { 189 s.step = stateEndTop 190 s.endTop = true 191 } else { 192 s.step = stateEndValue 193 } 194 } 195 196 func isSpace(c byte) bool { 197 return c == ' ' || c == '\t' || c == '\r' || c == '\n' 198 } 199 200 // stateBeginValueOrEmpty is the state after reading `[`. 201 func stateBeginValueOrEmpty(s *scanner, c byte) int { 202 if c <= ' ' && isSpace(c) { 203 return scanSkipSpace 204 } 205 if c == ']' { 206 return stateEndValue(s, c) 207 } 208 return stateBeginValue(s, c) 209 } 210 211 // stateBeginValue is the state at the beginning of the input. 212 func stateBeginValue(s *scanner, c byte) int { 213 if c <= ' ' && isSpace(c) { 214 return scanSkipSpace 215 } 216 switch c { 217 case '{': 218 s.step = stateBeginStringOrEmpty 219 s.pushParseState(parseObjectKey) 220 return scanBeginObject 221 case '[': 222 s.step = stateBeginValueOrEmpty 223 s.pushParseState(parseArrayValue) 224 return scanBeginArray 225 case '"': 226 s.step = stateInString 227 return scanBeginLiteral 228 case '-': 229 s.step = stateNeg 230 return scanBeginLiteral 231 case '0': // beginning of 0.123 232 s.step = state0 233 return scanBeginLiteral 234 case 't': // beginning of true 235 s.step = stateT 236 return scanBeginLiteral 237 case 'f': // beginning of false 238 s.step = stateF 239 return scanBeginLiteral 240 case 'n': // beginning of null 241 s.step = stateN 242 return scanBeginLiteral 243 } 244 if '1' <= c && c <= '9' { // beginning of 1234.5 245 s.step = state1 246 return scanBeginLiteral 247 } 248 return s.error(c, "looking for beginning of value") 249 } 250 251 // stateBeginStringOrEmpty is the state after reading `{`. 252 func stateBeginStringOrEmpty(s *scanner, c byte) int { 253 if c <= ' ' && isSpace(c) { 254 return scanSkipSpace 255 } 256 if c == '}' { 257 n := len(s.parseState) 258 s.parseState[n-1] = parseObjectValue 259 return stateEndValue(s, c) 260 } 261 return stateBeginString(s, c) 262 } 263 264 // stateBeginString is the state after reading `{"key": value,`. 265 func stateBeginString(s *scanner, c byte) int { 266 if c <= ' ' && isSpace(c) { 267 return scanSkipSpace 268 } 269 if c == '"' { 270 s.step = stateInString 271 return scanBeginLiteral 272 } 273 return s.error(c, "looking for beginning of object key string") 274 } 275 276 // stateEndValue is the state after completing a value, 277 // such as after reading `{}` or `true` or `["x"`. 278 func stateEndValue(s *scanner, c byte) int { 279 n := len(s.parseState) 280 if n == 0 { 281 // Completed top-level before the current byte. 282 s.step = stateEndTop 283 s.endTop = true 284 return stateEndTop(s, c) 285 } 286 if c <= ' ' && isSpace(c) { 287 s.step = stateEndValue 288 return scanSkipSpace 289 } 290 ps := s.parseState[n-1] 291 switch ps { 292 case parseObjectKey: 293 if c == ':' { 294 s.parseState[n-1] = parseObjectValue 295 s.step = stateBeginValue 296 return scanObjectKey 297 } 298 return s.error(c, "after object key") 299 case parseObjectValue: 300 if c == ',' { 301 s.parseState[n-1] = parseObjectKey 302 s.step = stateBeginString 303 return scanObjectValue 304 } 305 if c == '}' { 306 s.popParseState() 307 return scanEndObject 308 } 309 return s.error(c, "after object key:value pair") 310 case parseArrayValue: 311 if c == ',' { 312 s.step = stateBeginValue 313 return scanArrayValue 314 } 315 if c == ']' { 316 s.popParseState() 317 return scanEndArray 318 } 319 return s.error(c, "after array element") 320 } 321 return s.error(c, "") 322 } 323 324 // stateEndTop is the state after finishing the top-level value, 325 // such as after reading `{}` or `[1,2,3]`. 326 // Only space characters should be seen now. 327 func stateEndTop(s *scanner, c byte) int { 328 if c != ' ' && c != '\t' && c != '\r' && c != '\n' { 329 // Complain about non-space byte on next call. 330 s.error(c, "after top-level value") 331 } 332 return scanEnd 333 } 334 335 // stateInString is the state after reading `"`. 336 func stateInString(s *scanner, c byte) int { 337 if c == '"' { 338 s.step = stateEndValue 339 return scanContinue 340 } 341 if c == '\\' { 342 s.step = stateInStringEsc 343 return scanContinue 344 } 345 if c < 0x20 { 346 return s.error(c, "in string literal") 347 } 348 return scanContinue 349 } 350 351 // stateInStringEsc is the state after reading `"\` during a quoted string. 352 func stateInStringEsc(s *scanner, c byte) int { 353 switch c { 354 case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': 355 s.step = stateInString 356 return scanContinue 357 case 'u': 358 s.step = stateInStringEscU 359 return scanContinue 360 } 361 return s.error(c, "in string escape code") 362 } 363 364 // stateInStringEscU is the state after reading `"\u` during a quoted string. 365 func stateInStringEscU(s *scanner, c byte) int { 366 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 367 s.step = stateInStringEscU1 368 return scanContinue 369 } 370 // numbers 371 return s.error(c, "in \\u hexadecimal character escape") 372 } 373 374 // stateInStringEscU1 is the state after reading `"\u1` during a quoted string. 375 func stateInStringEscU1(s *scanner, c byte) int { 376 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 377 s.step = stateInStringEscU12 378 return scanContinue 379 } 380 // numbers 381 return s.error(c, "in \\u hexadecimal character escape") 382 } 383 384 // stateInStringEscU12 is the state after reading `"\u12` during a quoted string. 385 func stateInStringEscU12(s *scanner, c byte) int { 386 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 387 s.step = stateInStringEscU123 388 return scanContinue 389 } 390 // numbers 391 return s.error(c, "in \\u hexadecimal character escape") 392 } 393 394 // stateInStringEscU123 is the state after reading `"\u123` during a quoted string. 395 func stateInStringEscU123(s *scanner, c byte) int { 396 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 397 s.step = stateInString 398 return scanContinue 399 } 400 // numbers 401 return s.error(c, "in \\u hexadecimal character escape") 402 } 403 404 // stateNeg is the state after reading `-` during a number. 405 func stateNeg(s *scanner, c byte) int { 406 if c == '0' { 407 s.step = state0 408 return scanContinue 409 } 410 if '1' <= c && c <= '9' { 411 s.step = state1 412 return scanContinue 413 } 414 return s.error(c, "in numeric literal") 415 } 416 417 // state1 is the state after reading a non-zero integer during a number, 418 // such as after reading `1` or `100` but not `0`. 419 func state1(s *scanner, c byte) int { 420 if '0' <= c && c <= '9' { 421 s.step = state1 422 return scanContinue 423 } 424 return state0(s, c) 425 } 426 427 // state0 is the state after reading `0` during a number. 428 func state0(s *scanner, c byte) int { 429 if c == '.' { 430 s.step = stateDot 431 return scanContinue 432 } 433 if c == 'e' || c == 'E' { 434 s.step = stateE 435 return scanContinue 436 } 437 return stateEndValue(s, c) 438 } 439 440 // stateDot is the state after reading the integer and decimal point in a number, 441 // such as after reading `1.`. 442 func stateDot(s *scanner, c byte) int { 443 if '0' <= c && c <= '9' { 444 s.step = stateDot0 445 return scanContinue 446 } 447 return s.error(c, "after decimal point in numeric literal") 448 } 449 450 // stateDot0 is the state after reading the integer, decimal point, and subsequent 451 // digits of a number, such as after reading `3.14`. 452 func stateDot0(s *scanner, c byte) int { 453 if '0' <= c && c <= '9' { 454 return scanContinue 455 } 456 if c == 'e' || c == 'E' { 457 s.step = stateE 458 return scanContinue 459 } 460 return stateEndValue(s, c) 461 } 462 463 // stateE is the state after reading the mantissa and e in a number, 464 // such as after reading `314e` or `0.314e`. 465 func stateE(s *scanner, c byte) int { 466 if c == '+' || c == '-' { 467 s.step = stateESign 468 return scanContinue 469 } 470 return stateESign(s, c) 471 } 472 473 // stateESign is the state after reading the mantissa, e, and sign in a number, 474 // such as after reading `314e-` or `0.314e+`. 475 func stateESign(s *scanner, c byte) int { 476 if '0' <= c && c <= '9' { 477 s.step = stateE0 478 return scanContinue 479 } 480 return s.error(c, "in exponent of numeric literal") 481 } 482 483 // stateE0 is the state after reading the mantissa, e, optional sign, 484 // and at least one digit of the exponent in a number, 485 // such as after reading `314e-2` or `0.314e+1` or `3.14e0`. 486 func stateE0(s *scanner, c byte) int { 487 if '0' <= c && c <= '9' { 488 return scanContinue 489 } 490 return stateEndValue(s, c) 491 } 492 493 // stateT is the state after reading `t`. 494 func stateT(s *scanner, c byte) int { 495 if c == 'r' { 496 s.step = stateTr 497 return scanContinue 498 } 499 return s.error(c, "in literal true (expecting 'r')") 500 } 501 502 // stateTr is the state after reading `tr`. 503 func stateTr(s *scanner, c byte) int { 504 if c == 'u' { 505 s.step = stateTru 506 return scanContinue 507 } 508 return s.error(c, "in literal true (expecting 'u')") 509 } 510 511 // stateTru is the state after reading `tru`. 512 func stateTru(s *scanner, c byte) int { 513 if c == 'e' { 514 s.step = stateEndValue 515 return scanContinue 516 } 517 return s.error(c, "in literal true (expecting 'e')") 518 } 519 520 // stateF is the state after reading `f`. 521 func stateF(s *scanner, c byte) int { 522 if c == 'a' { 523 s.step = stateFa 524 return scanContinue 525 } 526 return s.error(c, "in literal false (expecting 'a')") 527 } 528 529 // stateFa is the state after reading `fa`. 530 func stateFa(s *scanner, c byte) int { 531 if c == 'l' { 532 s.step = stateFal 533 return scanContinue 534 } 535 return s.error(c, "in literal false (expecting 'l')") 536 } 537 538 // stateFal is the state after reading `fal`. 539 func stateFal(s *scanner, c byte) int { 540 if c == 's' { 541 s.step = stateFals 542 return scanContinue 543 } 544 return s.error(c, "in literal false (expecting 's')") 545 } 546 547 // stateFals is the state after reading `fals`. 548 func stateFals(s *scanner, c byte) int { 549 if c == 'e' { 550 s.step = stateEndValue 551 return scanContinue 552 } 553 return s.error(c, "in literal false (expecting 'e')") 554 } 555 556 // stateN is the state after reading `n`. 557 func stateN(s *scanner, c byte) int { 558 if c == 'u' { 559 s.step = stateNu 560 return scanContinue 561 } 562 return s.error(c, "in literal null (expecting 'u')") 563 } 564 565 // stateNu is the state after reading `nu`. 566 func stateNu(s *scanner, c byte) int { 567 if c == 'l' { 568 s.step = stateNul 569 return scanContinue 570 } 571 return s.error(c, "in literal null (expecting 'l')") 572 } 573 574 // stateNul is the state after reading `nul`. 575 func stateNul(s *scanner, c byte) int { 576 if c == 'l' { 577 s.step = stateEndValue 578 return scanContinue 579 } 580 return s.error(c, "in literal null (expecting 'l')") 581 } 582 583 // stateError is the state after reaching a syntax error, 584 // such as after reading `[1}` or `5.1.2`. 585 func stateError(s *scanner, c byte) int { 586 return scanError 587 } 588 589 // error records an error and switches to the error state. 590 func (s *scanner) error(c byte, context string) int { 591 s.step = stateError 592 s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} 593 return scanError 594 } 595 596 // quoteChar formats c as a quoted character literal 597 func quoteChar(c byte) string { 598 // special cases - different from quoted strings 599 if c == '\'' { 600 return `'\''` 601 } 602 if c == '"' { 603 return `'"'` 604 } 605 606 // use quoted string with different quotation marks 607 s := strconv.Quote(string(c)) 608 return "'" + s[1:len(s)-1] + "'" 609 } 610 611 // undo causes the scanner to return scanCode from the next state transition. 612 // This gives callers a simple 1-byte undo mechanism. 613 func (s *scanner) undo(scanCode int) { 614 if s.redo { 615 panic("json: invalid use of scanner") 616 } 617 s.redoCode = scanCode 618 s.redoState = s.step 619 s.step = stateRedo 620 s.redo = true 621 } 622 623 // stateRedo helps implement the scanner's 1-byte undo. 624 func stateRedo(s *scanner, c byte) int { 625 s.redo = false 626 s.step = s.redoState 627 return s.redoCode 628 } 629