Home | History | Annotate | Download | only in scanner
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package scanner implements a scanner for Go source text.
      6 // It takes a []byte as source which can then be tokenized
      7 // through repeated calls to the Scan method.
      8 //
      9 package scanner
     10 
     11 import (
     12 	"bytes"
     13 	"fmt"
     14 	"go/token"
     15 	"path/filepath"
     16 	"strconv"
     17 	"unicode"
     18 	"unicode/utf8"
     19 )
     20 
     21 // An ErrorHandler may be provided to Scanner.Init. If a syntax error is
     22 // encountered and a handler was installed, the handler is called with a
     23 // position and an error message. The position points to the beginning of
     24 // the offending token.
     25 //
     26 type ErrorHandler func(pos token.Position, msg string)
     27 
     28 // A Scanner holds the scanner's internal state while processing
     29 // a given text. It can be allocated as part of another data
     30 // structure but must be initialized via Init before use.
     31 //
     32 type Scanner struct {
     33 	// immutable state
     34 	file *token.File  // source file handle
     35 	dir  string       // directory portion of file.Name()
     36 	src  []byte       // source
     37 	err  ErrorHandler // error reporting; or nil
     38 	mode Mode         // scanning mode
     39 
     40 	// scanning state
     41 	ch         rune // current character
     42 	offset     int  // character offset
     43 	rdOffset   int  // reading offset (position after current character)
     44 	lineOffset int  // current line offset
     45 	insertSemi bool // insert a semicolon before next newline
     46 
     47 	// public state - ok to modify
     48 	ErrorCount int // number of errors encountered
     49 }
     50 
     51 const bom = 0xFEFF // byte order mark, only permitted as very first character
     52 
     53 // Read the next Unicode char into s.ch.
     54 // s.ch < 0 means end-of-file.
     55 //
     56 func (s *Scanner) next() {
     57 	if s.rdOffset < len(s.src) {
     58 		s.offset = s.rdOffset
     59 		if s.ch == '\n' {
     60 			s.lineOffset = s.offset
     61 			s.file.AddLine(s.offset)
     62 		}
     63 		r, w := rune(s.src[s.rdOffset]), 1
     64 		switch {
     65 		case r == 0:
     66 			s.error(s.offset, "illegal character NUL")
     67 		case r >= utf8.RuneSelf:
     68 			// not ASCII
     69 			r, w = utf8.DecodeRune(s.src[s.rdOffset:])
     70 			if r == utf8.RuneError && w == 1 {
     71 				s.error(s.offset, "illegal UTF-8 encoding")
     72 			} else if r == bom && s.offset > 0 {
     73 				s.error(s.offset, "illegal byte order mark")
     74 			}
     75 		}
     76 		s.rdOffset += w
     77 		s.ch = r
     78 	} else {
     79 		s.offset = len(s.src)
     80 		if s.ch == '\n' {
     81 			s.lineOffset = s.offset
     82 			s.file.AddLine(s.offset)
     83 		}
     84 		s.ch = -1 // eof
     85 	}
     86 }
     87 
     88 // A mode value is a set of flags (or 0).
     89 // They control scanner behavior.
     90 //
     91 type Mode uint
     92 
     93 const (
     94 	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
     95 	dontInsertSemis                  // do not automatically insert semicolons - for testing only
     96 )
     97 
     98 // Init prepares the scanner s to tokenize the text src by setting the
     99 // scanner at the beginning of src. The scanner uses the file set file
    100 // for position information and it adds line information for each line.
    101 // It is ok to re-use the same file when re-scanning the same file as
    102 // line information which is already present is ignored. Init causes a
    103 // panic if the file size does not match the src size.
    104 //
    105 // Calls to Scan will invoke the error handler err if they encounter a
    106 // syntax error and err is not nil. Also, for each error encountered,
    107 // the Scanner field ErrorCount is incremented by one. The mode parameter
    108 // determines how comments are handled.
    109 //
    110 // Note that Init may call err if there is an error in the first character
    111 // of the file.
    112 //
    113 func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
    114 	// Explicitly initialize all fields since a scanner may be reused.
    115 	if file.Size() != len(src) {
    116 		panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src)))
    117 	}
    118 	s.file = file
    119 	s.dir, _ = filepath.Split(file.Name())
    120 	s.src = src
    121 	s.err = err
    122 	s.mode = mode
    123 
    124 	s.ch = ' '
    125 	s.offset = 0
    126 	s.rdOffset = 0
    127 	s.lineOffset = 0
    128 	s.insertSemi = false
    129 	s.ErrorCount = 0
    130 
    131 	s.next()
    132 	if s.ch == bom {
    133 		s.next() // ignore BOM at file beginning
    134 	}
    135 }
    136 
    137 func (s *Scanner) error(offs int, msg string) {
    138 	if s.err != nil {
    139 		s.err(s.file.Position(s.file.Pos(offs)), msg)
    140 	}
    141 	s.ErrorCount++
    142 }
    143 
    144 var prefix = []byte("//line ")
    145 
    146 func (s *Scanner) interpretLineComment(text []byte) {
    147 	if bytes.HasPrefix(text, prefix) {
    148 		// get filename and line number, if any
    149 		if i := bytes.LastIndex(text, []byte{':'}); i > 0 {
    150 			if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
    151 				// valid //line filename:line comment
    152 				filename := string(bytes.TrimSpace(text[len(prefix):i]))
    153 				if filename != "" {
    154 					filename = filepath.Clean(filename)
    155 					if !filepath.IsAbs(filename) {
    156 						// make filename relative to current directory
    157 						filename = filepath.Join(s.dir, filename)
    158 					}
    159 				}
    160 				// update scanner position
    161 				s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line
    162 			}
    163 		}
    164 	}
    165 }
    166 
    167 func (s *Scanner) scanComment() string {
    168 	// initial '/' already consumed; s.ch == '/' || s.ch == '*'
    169 	offs := s.offset - 1 // position of initial '/'
    170 	hasCR := false
    171 
    172 	if s.ch == '/' {
    173 		//-style comment
    174 		s.next()
    175 		for s.ch != '\n' && s.ch >= 0 {
    176 			if s.ch == '\r' {
    177 				hasCR = true
    178 			}
    179 			s.next()
    180 		}
    181 		if offs == s.lineOffset {
    182 			// comment starts at the beginning of the current line
    183 			s.interpretLineComment(s.src[offs:s.offset])
    184 		}
    185 		goto exit
    186 	}
    187 
    188 	/*-style comment */
    189 	s.next()
    190 	for s.ch >= 0 {
    191 		ch := s.ch
    192 		if ch == '\r' {
    193 			hasCR = true
    194 		}
    195 		s.next()
    196 		if ch == '*' && s.ch == '/' {
    197 			s.next()
    198 			goto exit
    199 		}
    200 	}
    201 
    202 	s.error(offs, "comment not terminated")
    203 
    204 exit:
    205 	lit := s.src[offs:s.offset]
    206 	if hasCR {
    207 		lit = stripCR(lit)
    208 	}
    209 
    210 	return string(lit)
    211 }
    212 
    213 func (s *Scanner) findLineEnd() bool {
    214 	// initial '/' already consumed
    215 
    216 	defer func(offs int) {
    217 		// reset scanner state to where it was upon calling findLineEnd
    218 		s.ch = '/'
    219 		s.offset = offs
    220 		s.rdOffset = offs + 1
    221 		s.next() // consume initial '/' again
    222 	}(s.offset - 1)
    223 
    224 	// read ahead until a newline, EOF, or non-comment token is found
    225 	for s.ch == '/' || s.ch == '*' {
    226 		if s.ch == '/' {
    227 			//-style comment always contains a newline
    228 			return true
    229 		}
    230 		/*-style comment: look for newline */
    231 		s.next()
    232 		for s.ch >= 0 {
    233 			ch := s.ch
    234 			if ch == '\n' {
    235 				return true
    236 			}
    237 			s.next()
    238 			if ch == '*' && s.ch == '/' {
    239 				s.next()
    240 				break
    241 			}
    242 		}
    243 		s.skipWhitespace() // s.insertSemi is set
    244 		if s.ch < 0 || s.ch == '\n' {
    245 			return true
    246 		}
    247 		if s.ch != '/' {
    248 			// non-comment token
    249 			return false
    250 		}
    251 		s.next() // consume '/'
    252 	}
    253 
    254 	return false
    255 }
    256 
    257 func isLetter(ch rune) bool {
    258 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
    259 }
    260 
    261 func isDigit(ch rune) bool {
    262 	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
    263 }
    264 
    265 func (s *Scanner) scanIdentifier() string {
    266 	offs := s.offset
    267 	for isLetter(s.ch) || isDigit(s.ch) {
    268 		s.next()
    269 	}
    270 	return string(s.src[offs:s.offset])
    271 }
    272 
    273 func digitVal(ch rune) int {
    274 	switch {
    275 	case '0' <= ch && ch <= '9':
    276 		return int(ch - '0')
    277 	case 'a' <= ch && ch <= 'f':
    278 		return int(ch - 'a' + 10)
    279 	case 'A' <= ch && ch <= 'F':
    280 		return int(ch - 'A' + 10)
    281 	}
    282 	return 16 // larger than any legal digit val
    283 }
    284 
    285 func (s *Scanner) scanMantissa(base int) {
    286 	for digitVal(s.ch) < base {
    287 		s.next()
    288 	}
    289 }
    290 
    291 func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
    292 	// digitVal(s.ch) < 10
    293 	offs := s.offset
    294 	tok := token.INT
    295 
    296 	if seenDecimalPoint {
    297 		offs--
    298 		tok = token.FLOAT
    299 		s.scanMantissa(10)
    300 		goto exponent
    301 	}
    302 
    303 	if s.ch == '0' {
    304 		// int or float
    305 		offs := s.offset
    306 		s.next()
    307 		if s.ch == 'x' || s.ch == 'X' {
    308 			// hexadecimal int
    309 			s.next()
    310 			s.scanMantissa(16)
    311 			if s.offset-offs <= 2 {
    312 				// only scanned "0x" or "0X"
    313 				s.error(offs, "illegal hexadecimal number")
    314 			}
    315 		} else {
    316 			// octal int or float
    317 			seenDecimalDigit := false
    318 			s.scanMantissa(8)
    319 			if s.ch == '8' || s.ch == '9' {
    320 				// illegal octal int or float
    321 				seenDecimalDigit = true
    322 				s.scanMantissa(10)
    323 			}
    324 			if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
    325 				goto fraction
    326 			}
    327 			// octal int
    328 			if seenDecimalDigit {
    329 				s.error(offs, "illegal octal number")
    330 			}
    331 		}
    332 		goto exit
    333 	}
    334 
    335 	// decimal int or float
    336 	s.scanMantissa(10)
    337 
    338 fraction:
    339 	if s.ch == '.' {
    340 		tok = token.FLOAT
    341 		s.next()
    342 		s.scanMantissa(10)
    343 	}
    344 
    345 exponent:
    346 	if s.ch == 'e' || s.ch == 'E' {
    347 		tok = token.FLOAT
    348 		s.next()
    349 		if s.ch == '-' || s.ch == '+' {
    350 			s.next()
    351 		}
    352 		if digitVal(s.ch) < 10 {
    353 			s.scanMantissa(10)
    354 		} else {
    355 			s.error(offs, "illegal floating-point exponent")
    356 		}
    357 	}
    358 
    359 	if s.ch == 'i' {
    360 		tok = token.IMAG
    361 		s.next()
    362 	}
    363 
    364 exit:
    365 	return tok, string(s.src[offs:s.offset])
    366 }
    367 
    368 // scanEscape parses an escape sequence where rune is the accepted
    369 // escaped quote. In case of a syntax error, it stops at the offending
    370 // character (without consuming it) and returns false. Otherwise
    371 // it returns true.
    372 func (s *Scanner) scanEscape(quote rune) bool {
    373 	offs := s.offset
    374 
    375 	var n int
    376 	var base, max uint32
    377 	switch s.ch {
    378 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
    379 		s.next()
    380 		return true
    381 	case '0', '1', '2', '3', '4', '5', '6', '7':
    382 		n, base, max = 3, 8, 255
    383 	case 'x':
    384 		s.next()
    385 		n, base, max = 2, 16, 255
    386 	case 'u':
    387 		s.next()
    388 		n, base, max = 4, 16, unicode.MaxRune
    389 	case 'U':
    390 		s.next()
    391 		n, base, max = 8, 16, unicode.MaxRune
    392 	default:
    393 		msg := "unknown escape sequence"
    394 		if s.ch < 0 {
    395 			msg = "escape sequence not terminated"
    396 		}
    397 		s.error(offs, msg)
    398 		return false
    399 	}
    400 
    401 	var x uint32
    402 	for n > 0 {
    403 		d := uint32(digitVal(s.ch))
    404 		if d >= base {
    405 			msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
    406 			if s.ch < 0 {
    407 				msg = "escape sequence not terminated"
    408 			}
    409 			s.error(s.offset, msg)
    410 			return false
    411 		}
    412 		x = x*base + d
    413 		s.next()
    414 		n--
    415 	}
    416 
    417 	if x > max || 0xD800 <= x && x < 0xE000 {
    418 		s.error(offs, "escape sequence is invalid Unicode code point")
    419 		return false
    420 	}
    421 
    422 	return true
    423 }
    424 
    425 func (s *Scanner) scanRune() string {
    426 	// '\'' opening already consumed
    427 	offs := s.offset - 1
    428 
    429 	valid := true
    430 	n := 0
    431 	for {
    432 		ch := s.ch
    433 		if ch == '\n' || ch < 0 {
    434 			// only report error if we don't have one already
    435 			if valid {
    436 				s.error(offs, "rune literal not terminated")
    437 				valid = false
    438 			}
    439 			break
    440 		}
    441 		s.next()
    442 		if ch == '\'' {
    443 			break
    444 		}
    445 		n++
    446 		if ch == '\\' {
    447 			if !s.scanEscape('\'') {
    448 				valid = false
    449 			}
    450 			// continue to read to closing quote
    451 		}
    452 	}
    453 
    454 	if valid && n != 1 {
    455 		s.error(offs, "illegal rune literal")
    456 	}
    457 
    458 	return string(s.src[offs:s.offset])
    459 }
    460 
    461 func (s *Scanner) scanString() string {
    462 	// '"' opening already consumed
    463 	offs := s.offset - 1
    464 
    465 	for {
    466 		ch := s.ch
    467 		if ch == '\n' || ch < 0 {
    468 			s.error(offs, "string literal not terminated")
    469 			break
    470 		}
    471 		s.next()
    472 		if ch == '"' {
    473 			break
    474 		}
    475 		if ch == '\\' {
    476 			s.scanEscape('"')
    477 		}
    478 	}
    479 
    480 	return string(s.src[offs:s.offset])
    481 }
    482 
    483 func stripCR(b []byte) []byte {
    484 	c := make([]byte, len(b))
    485 	i := 0
    486 	for _, ch := range b {
    487 		if ch != '\r' {
    488 			c[i] = ch
    489 			i++
    490 		}
    491 	}
    492 	return c[:i]
    493 }
    494 
    495 func (s *Scanner) scanRawString() string {
    496 	// '`' opening already consumed
    497 	offs := s.offset - 1
    498 
    499 	hasCR := false
    500 	for {
    501 		ch := s.ch
    502 		if ch < 0 {
    503 			s.error(offs, "raw string literal not terminated")
    504 			break
    505 		}
    506 		s.next()
    507 		if ch == '`' {
    508 			break
    509 		}
    510 		if ch == '\r' {
    511 			hasCR = true
    512 		}
    513 	}
    514 
    515 	lit := s.src[offs:s.offset]
    516 	if hasCR {
    517 		lit = stripCR(lit)
    518 	}
    519 
    520 	return string(lit)
    521 }
    522 
    523 func (s *Scanner) skipWhitespace() {
    524 	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
    525 		s.next()
    526 	}
    527 }
    528 
    529 // Helper functions for scanning multi-byte tokens such as >> += >>= .
    530 // Different routines recognize different length tok_i based on matches
    531 // of ch_i. If a token ends in '=', the result is tok1 or tok3
    532 // respectively. Otherwise, the result is tok0 if there was no other
    533 // matching character, or tok2 if the matching character was ch2.
    534 
    535 func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
    536 	if s.ch == '=' {
    537 		s.next()
    538 		return tok1
    539 	}
    540 	return tok0
    541 }
    542 
    543 func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
    544 	if s.ch == '=' {
    545 		s.next()
    546 		return tok1
    547 	}
    548 	if s.ch == ch2 {
    549 		s.next()
    550 		return tok2
    551 	}
    552 	return tok0
    553 }
    554 
    555 func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
    556 	if s.ch == '=' {
    557 		s.next()
    558 		return tok1
    559 	}
    560 	if s.ch == ch2 {
    561 		s.next()
    562 		if s.ch == '=' {
    563 			s.next()
    564 			return tok3
    565 		}
    566 		return tok2
    567 	}
    568 	return tok0
    569 }
    570 
    571 // Scan scans the next token and returns the token position, the token,
    572 // and its literal string if applicable. The source end is indicated by
    573 // token.EOF.
    574 //
    575 // If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
    576 // token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
    577 // has the corresponding value.
    578 //
    579 // If the returned token is a keyword, the literal string is the keyword.
    580 //
    581 // If the returned token is token.SEMICOLON, the corresponding
    582 // literal string is ";" if the semicolon was present in the source,
    583 // and "\n" if the semicolon was inserted because of a newline or
    584 // at EOF.
    585 //
    586 // If the returned token is token.ILLEGAL, the literal string is the
    587 // offending character.
    588 //
    589 // In all other cases, Scan returns an empty literal string.
    590 //
    591 // For more tolerant parsing, Scan will return a valid token if
    592 // possible even if a syntax error was encountered. Thus, even
    593 // if the resulting token sequence contains no illegal tokens,
    594 // a client may not assume that no error occurred. Instead it
    595 // must check the scanner's ErrorCount or the number of calls
    596 // of the error handler, if there was one installed.
    597 //
    598 // Scan adds line information to the file added to the file
    599 // set with Init. Token positions are relative to that file
    600 // and thus relative to the file set.
    601 //
    602 func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
    603 scanAgain:
    604 	s.skipWhitespace()
    605 
    606 	// current token start
    607 	pos = s.file.Pos(s.offset)
    608 
    609 	// determine token value
    610 	insertSemi := false
    611 	switch ch := s.ch; {
    612 	case isLetter(ch):
    613 		lit = s.scanIdentifier()
    614 		if len(lit) > 1 {
    615 			// keywords are longer than one letter - avoid lookup otherwise
    616 			tok = token.Lookup(lit)
    617 			switch tok {
    618 			case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
    619 				insertSemi = true
    620 			}
    621 		} else {
    622 			insertSemi = true
    623 			tok = token.IDENT
    624 		}
    625 	case '0' <= ch && ch <= '9':
    626 		insertSemi = true
    627 		tok, lit = s.scanNumber(false)
    628 	default:
    629 		s.next() // always make progress
    630 		switch ch {
    631 		case -1:
    632 			if s.insertSemi {
    633 				s.insertSemi = false // EOF consumed
    634 				return pos, token.SEMICOLON, "\n"
    635 			}
    636 			tok = token.EOF
    637 		case '\n':
    638 			// we only reach here if s.insertSemi was
    639 			// set in the first place and exited early
    640 			// from s.skipWhitespace()
    641 			s.insertSemi = false // newline consumed
    642 			return pos, token.SEMICOLON, "\n"
    643 		case '"':
    644 			insertSemi = true
    645 			tok = token.STRING
    646 			lit = s.scanString()
    647 		case '\'':
    648 			insertSemi = true
    649 			tok = token.CHAR
    650 			lit = s.scanRune()
    651 		case '`':
    652 			insertSemi = true
    653 			tok = token.STRING
    654 			lit = s.scanRawString()
    655 		case ':':
    656 			tok = s.switch2(token.COLON, token.DEFINE)
    657 		case '.':
    658 			if '0' <= s.ch && s.ch <= '9' {
    659 				insertSemi = true
    660 				tok, lit = s.scanNumber(true)
    661 			} else if s.ch == '.' {
    662 				s.next()
    663 				if s.ch == '.' {
    664 					s.next()
    665 					tok = token.ELLIPSIS
    666 				}
    667 			} else {
    668 				tok = token.PERIOD
    669 			}
    670 		case ',':
    671 			tok = token.COMMA
    672 		case ';':
    673 			tok = token.SEMICOLON
    674 			lit = ";"
    675 		case '(':
    676 			tok = token.LPAREN
    677 		case ')':
    678 			insertSemi = true
    679 			tok = token.RPAREN
    680 		case '[':
    681 			tok = token.LBRACK
    682 		case ']':
    683 			insertSemi = true
    684 			tok = token.RBRACK
    685 		case '{':
    686 			tok = token.LBRACE
    687 		case '}':
    688 			insertSemi = true
    689 			tok = token.RBRACE
    690 		case '+':
    691 			tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
    692 			if tok == token.INC {
    693 				insertSemi = true
    694 			}
    695 		case '-':
    696 			tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
    697 			if tok == token.DEC {
    698 				insertSemi = true
    699 			}
    700 		case '*':
    701 			tok = s.switch2(token.MUL, token.MUL_ASSIGN)
    702 		case '/':
    703 			if s.ch == '/' || s.ch == '*' {
    704 				// comment
    705 				if s.insertSemi && s.findLineEnd() {
    706 					// reset position to the beginning of the comment
    707 					s.ch = '/'
    708 					s.offset = s.file.Offset(pos)
    709 					s.rdOffset = s.offset + 1
    710 					s.insertSemi = false // newline consumed
    711 					return pos, token.SEMICOLON, "\n"
    712 				}
    713 				comment := s.scanComment()
    714 				if s.mode&ScanComments == 0 {
    715 					// skip comment
    716 					s.insertSemi = false // newline consumed
    717 					goto scanAgain
    718 				}
    719 				tok = token.COMMENT
    720 				lit = comment
    721 			} else {
    722 				tok = s.switch2(token.QUO, token.QUO_ASSIGN)
    723 			}
    724 		case '%':
    725 			tok = s.switch2(token.REM, token.REM_ASSIGN)
    726 		case '^':
    727 			tok = s.switch2(token.XOR, token.XOR_ASSIGN)
    728 		case '<':
    729 			if s.ch == '-' {
    730 				s.next()
    731 				tok = token.ARROW
    732 			} else {
    733 				tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
    734 			}
    735 		case '>':
    736 			tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
    737 		case '=':
    738 			tok = s.switch2(token.ASSIGN, token.EQL)
    739 		case '!':
    740 			tok = s.switch2(token.NOT, token.NEQ)
    741 		case '&':
    742 			if s.ch == '^' {
    743 				s.next()
    744 				tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
    745 			} else {
    746 				tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
    747 			}
    748 		case '|':
    749 			tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
    750 		default:
    751 			// next reports unexpected BOMs - don't repeat
    752 			if ch != bom {
    753 				s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
    754 			}
    755 			insertSemi = s.insertSemi // preserve insertSemi info
    756 			tok = token.ILLEGAL
    757 			lit = string(ch)
    758 		}
    759 	}
    760 	if s.mode&dontInsertSemis == 0 {
    761 		s.insertSemi = insertSemi
    762 	}
    763 
    764 	return
    765 }
    766