Home | History | Annotate | Download | only in syntax
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package syntax
      6 
      7 import (
      8 	"fmt"
      9 	"os"
     10 	"testing"
     11 )
     12 
     13 func TestScanner(t *testing.T) {
     14 	if testing.Short() {
     15 		t.Skip("skipping test in short mode")
     16 	}
     17 
     18 	src, err := os.Open("parser.go")
     19 	if err != nil {
     20 		t.Fatal(err)
     21 	}
     22 	defer src.Close()
     23 
     24 	var s scanner
     25 	s.init(src, nil, nil)
     26 	for {
     27 		s.next()
     28 		if s.tok == _EOF {
     29 			break
     30 		}
     31 		switch s.tok {
     32 		case _Name:
     33 			fmt.Println(s.line, s.tok, "=>", s.lit)
     34 		case _Operator:
     35 			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
     36 		default:
     37 			fmt.Println(s.line, s.tok)
     38 		}
     39 	}
     40 }
     41 
     42 func TestTokens(t *testing.T) {
     43 	// make source
     44 	var buf []byte
     45 	for i, s := range sampleTokens {
     46 		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
     47 		buf = append(buf, s.src...)                // token
     48 		buf = append(buf, "        "[:i&7]...)     // trailing spaces
     49 		buf = append(buf, "/* foo */ // bar\n"...) // comments
     50 	}
     51 
     52 	// scan source
     53 	var got scanner
     54 	got.init(&bytesReader{buf}, nil, nil)
     55 	got.next()
     56 	for i, want := range sampleTokens {
     57 		nlsemi := false
     58 
     59 		if got.line != i+1 {
     60 			t.Errorf("got line %d; want %d", got.line, i+1)
     61 		}
     62 
     63 		if got.tok != want.tok {
     64 			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
     65 			continue
     66 		}
     67 
     68 		switch want.tok {
     69 		case _Name, _Literal:
     70 			if got.lit != want.src {
     71 				t.Errorf("got lit = %q; want %q", got.lit, want.src)
     72 				continue
     73 			}
     74 			nlsemi = true
     75 
     76 		case _Operator, _AssignOp, _IncOp:
     77 			if got.op != want.op {
     78 				t.Errorf("got op = %s; want %s", got.op, want.op)
     79 				continue
     80 			}
     81 			if got.prec != want.prec {
     82 				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
     83 				continue
     84 			}
     85 			nlsemi = want.tok == _IncOp
     86 
     87 		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
     88 			nlsemi = true
     89 		}
     90 
     91 		if nlsemi {
     92 			got.next()
     93 			if got.tok != _Semi {
     94 				t.Errorf("got tok = %s; want ;", got.tok)
     95 				continue
     96 			}
     97 		}
     98 
     99 		got.next()
    100 	}
    101 
    102 	if got.tok != _EOF {
    103 		t.Errorf("got %q; want _EOF", got.tok)
    104 	}
    105 }
    106 
    107 var sampleTokens = [...]struct {
    108 	tok  token
    109 	src  string
    110 	op   Operator
    111 	prec int
    112 }{
    113 	// name samples
    114 	{_Name, "x", 0, 0},
    115 	{_Name, "X123", 0, 0},
    116 	{_Name, "foo", 0, 0},
    117 	{_Name, "Foo123", 0, 0},
    118 	{_Name, "foo_bar", 0, 0},
    119 	{_Name, "_", 0, 0},
    120 	{_Name, "_foobar", 0, 0},
    121 	{_Name, "a", 0, 0},
    122 	{_Name, "foo", 0, 0},
    123 	{_Name, "bar", 0, 0},
    124 	{_Name, "", 0, 0},
    125 	{_Name, "foo", 0, 0},
    126 
    127 	// literal samples
    128 	{_Literal, "0", 0, 0},
    129 	{_Literal, "1", 0, 0},
    130 	{_Literal, "12345", 0, 0},
    131 	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
    132 	{_Literal, "01234567", 0, 0},
    133 	{_Literal, "0x0", 0, 0},
    134 	{_Literal, "0xcafebabe", 0, 0},
    135 	{_Literal, "0.", 0, 0},
    136 	{_Literal, "0.e0", 0, 0},
    137 	{_Literal, "0.e-1", 0, 0},
    138 	{_Literal, "0.e+123", 0, 0},
    139 	{_Literal, ".0", 0, 0},
    140 	{_Literal, ".0E00", 0, 0},
    141 	{_Literal, ".0E-0123", 0, 0},
    142 	{_Literal, ".0E+12345678901234567890", 0, 0},
    143 	{_Literal, ".45e1", 0, 0},
    144 	{_Literal, "3.14159265", 0, 0},
    145 	{_Literal, "1e0", 0, 0},
    146 	{_Literal, "1e+100", 0, 0},
    147 	{_Literal, "1e-100", 0, 0},
    148 	{_Literal, "2.71828e-1000", 0, 0},
    149 	{_Literal, "0i", 0, 0},
    150 	{_Literal, "1i", 0, 0},
    151 	{_Literal, "012345678901234567889i", 0, 0},
    152 	{_Literal, "123456789012345678890i", 0, 0},
    153 	{_Literal, "0.i", 0, 0},
    154 	{_Literal, ".0i", 0, 0},
    155 	{_Literal, "3.14159265i", 0, 0},
    156 	{_Literal, "1e0i", 0, 0},
    157 	{_Literal, "1e+100i", 0, 0},
    158 	{_Literal, "1e-100i", 0, 0},
    159 	{_Literal, "2.71828e-1000i", 0, 0},
    160 	{_Literal, "'a'", 0, 0},
    161 	{_Literal, "'\\000'", 0, 0},
    162 	{_Literal, "'\\xFF'", 0, 0},
    163 	{_Literal, "'\\uff16'", 0, 0},
    164 	{_Literal, "'\\U0000ff16'", 0, 0},
    165 	{_Literal, "`foobar`", 0, 0},
    166 	{_Literal, "`foo\tbar`", 0, 0},
    167 	{_Literal, "`\r`", 0, 0},
    168 
    169 	// operators
    170 	{_Operator, "||", OrOr, precOrOr},
    171 
    172 	{_Operator, "&&", AndAnd, precAndAnd},
    173 
    174 	{_Operator, "==", Eql, precCmp},
    175 	{_Operator, "!=", Neq, precCmp},
    176 	{_Operator, "<", Lss, precCmp},
    177 	{_Operator, "<=", Leq, precCmp},
    178 	{_Operator, ">", Gtr, precCmp},
    179 	{_Operator, ">=", Geq, precCmp},
    180 
    181 	{_Operator, "+", Add, precAdd},
    182 	{_Operator, "-", Sub, precAdd},
    183 	{_Operator, "|", Or, precAdd},
    184 	{_Operator, "^", Xor, precAdd},
    185 
    186 	{_Star, "*", Mul, precMul},
    187 	{_Operator, "/", Div, precMul},
    188 	{_Operator, "%", Rem, precMul},
    189 	{_Operator, "&", And, precMul},
    190 	{_Operator, "&^", AndNot, precMul},
    191 	{_Operator, "<<", Shl, precMul},
    192 	{_Operator, ">>", Shr, precMul},
    193 
    194 	// assignment operations
    195 	{_AssignOp, "+=", Add, precAdd},
    196 	{_AssignOp, "-=", Sub, precAdd},
    197 	{_AssignOp, "|=", Or, precAdd},
    198 	{_AssignOp, "^=", Xor, precAdd},
    199 
    200 	{_AssignOp, "*=", Mul, precMul},
    201 	{_AssignOp, "/=", Div, precMul},
    202 	{_AssignOp, "%=", Rem, precMul},
    203 	{_AssignOp, "&=", And, precMul},
    204 	{_AssignOp, "&^=", AndNot, precMul},
    205 	{_AssignOp, "<<=", Shl, precMul},
    206 	{_AssignOp, ">>=", Shr, precMul},
    207 
    208 	// other operations
    209 	{_IncOp, "++", Add, precAdd},
    210 	{_IncOp, "--", Sub, precAdd},
    211 	{_Assign, "=", 0, 0},
    212 	{_Define, ":=", 0, 0},
    213 	{_Arrow, "<-", 0, 0},
    214 
    215 	// delimiters
    216 	{_Lparen, "(", 0, 0},
    217 	{_Lbrack, "[", 0, 0},
    218 	{_Lbrace, "{", 0, 0},
    219 	{_Rparen, ")", 0, 0},
    220 	{_Rbrack, "]", 0, 0},
    221 	{_Rbrace, "}", 0, 0},
    222 	{_Comma, ",", 0, 0},
    223 	{_Semi, ";", 0, 0},
    224 	{_Colon, ":", 0, 0},
    225 	{_Dot, ".", 0, 0},
    226 	{_DotDotDot, "...", 0, 0},
    227 
    228 	// keywords
    229 	{_Break, "break", 0, 0},
    230 	{_Case, "case", 0, 0},
    231 	{_Chan, "chan", 0, 0},
    232 	{_Const, "const", 0, 0},
    233 	{_Continue, "continue", 0, 0},
    234 	{_Default, "default", 0, 0},
    235 	{_Defer, "defer", 0, 0},
    236 	{_Else, "else", 0, 0},
    237 	{_Fallthrough, "fallthrough", 0, 0},
    238 	{_For, "for", 0, 0},
    239 	{_Func, "func", 0, 0},
    240 	{_Go, "go", 0, 0},
    241 	{_Goto, "goto", 0, 0},
    242 	{_If, "if", 0, 0},
    243 	{_Import, "import", 0, 0},
    244 	{_Interface, "interface", 0, 0},
    245 	{_Map, "map", 0, 0},
    246 	{_Package, "package", 0, 0},
    247 	{_Range, "range", 0, 0},
    248 	{_Return, "return", 0, 0},
    249 	{_Select, "select", 0, 0},
    250 	{_Struct, "struct", 0, 0},
    251 	{_Switch, "switch", 0, 0},
    252 	{_Type, "type", 0, 0},
    253 	{_Var, "var", 0, 0},
    254 }
    255 
    256 func TestScanErrors(t *testing.T) {
    257 	for _, test := range []struct {
    258 		src, msg  string
    259 		pos, line int
    260 	}{
    261 		// Note: Positions for lexical errors are the earliest position
    262 		// where the error is apparent, not the beginning of the respective
    263 		// token.
    264 
    265 		// rune-level errors
    266 		{"fo\x00o", "invalid NUL character", 2, 1},
    267 		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 4, 2},
    268 		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 5, 3},
    269 
    270 		// token-level errors
    271 		{"x + ~y", "bitwise complement operator is ^", 4, 1},
    272 		{"foo$bar = 0", "illegal character U+0024 '$'", 3, 1},
    273 		{"const x = 0xyz", "malformed hex constant", 12, 1},
    274 		{"0123456789", "malformed octal constant", 10, 1},
    275 		{"0123456789. /* foobar", "comment not terminated", 12, 1},   // valid float constant
    276 		{"0123456789e0 /*\nfoobar", "comment not terminated", 13, 1}, // valid float constant
    277 		{"var a, b = 08, 07\n", "malformed octal constant", 13, 1},
    278 		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 10, 1},
    279 
    280 		{`''`, "empty character literal or unescaped ' in character literal", 1, 1},
    281 		{"'\n", "newline in character literal", 1, 1},
    282 		{`'\`, "missing '", 2, 1},
    283 		{`'\'`, "missing '", 3, 1},
    284 		{`'\x`, "missing '", 3, 1},
    285 		{`'\x'`, "non-hex character in escape sequence: '", 3, 1},
    286 		{`'\y'`, "unknown escape sequence", 2, 1},
    287 		{`'\x0'`, "non-hex character in escape sequence: '", 4, 1},
    288 		{`'\00'`, "non-octal character in escape sequence: '", 4, 1},
    289 		{`'\377' /*`, "comment not terminated", 7, 1}, // valid octal escape
    290 		{`'\378`, "non-octal character in escape sequence: 8", 4, 1},
    291 		{`'\400'`, "octal escape value > 255: 256", 5, 1},
    292 		{`'xx`, "missing '", 2, 1},
    293 
    294 		{"\"\n", "newline in string", 1, 1},
    295 		{`"`, "string not terminated", 0, 1},
    296 		{`"foo`, "string not terminated", 0, 1},
    297 		{"`", "string not terminated", 0, 1},
    298 		{"`foo", "string not terminated", 0, 1},
    299 		{"/*/", "comment not terminated", 0, 1},
    300 		{"/*\n\nfoo", "comment not terminated", 0, 1},
    301 		{"/*\n\nfoo", "comment not terminated", 0, 1},
    302 		{`"\`, "string not terminated", 0, 1},
    303 		{`"\"`, "string not terminated", 0, 1},
    304 		{`"\x`, "string not terminated", 0, 1},
    305 		{`"\x"`, "non-hex character in escape sequence: \"", 3, 1},
    306 		{`"\y"`, "unknown escape sequence", 2, 1},
    307 		{`"\x0"`, "non-hex character in escape sequence: \"", 4, 1},
    308 		{`"\00"`, "non-octal character in escape sequence: \"", 4, 1},
    309 		{`"\377" /*`, "comment not terminated", 7, 1}, // valid octal escape
    310 		{`"\378"`, "non-octal character in escape sequence: 8", 4, 1},
    311 		{`"\400"`, "octal escape value > 255: 256", 5, 1},
    312 
    313 		{`s := "foo\z"`, "unknown escape sequence", 10, 1},
    314 		{`s := "foo\z00\nbar"`, "unknown escape sequence", 10, 1},
    315 		{`"\x`, "string not terminated", 0, 1},
    316 		{`"\x"`, "non-hex character in escape sequence: \"", 3, 1},
    317 		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 18, 1},
    318 		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 18, 1},
    319 
    320 		// former problem cases
    321 		{"package p\n\n\xef", "invalid UTF-8 encoding", 11, 3},
    322 	} {
    323 		var s scanner
    324 		nerrors := 0
    325 		s.init(&bytesReader{[]byte(test.src)}, func(err error) {
    326 			nerrors++
    327 			// only check the first error
    328 			e := err.(Error) // we know it's an Error
    329 			if nerrors == 1 {
    330 				if e.Msg != test.msg {
    331 					t.Errorf("%q: got msg = %q; want %q", test.src, e.Msg, test.msg)
    332 				}
    333 				if e.Pos != test.pos {
    334 					t.Errorf("%q: got pos = %d; want %d", test.src, e.Pos, test.pos)
    335 				}
    336 				if e.Line != test.line {
    337 					t.Errorf("%q: got line = %d; want %d", test.src, e.Line, test.line)
    338 				}
    339 			} else if nerrors > 1 {
    340 				t.Errorf("%q: got unexpected %q at pos = %d, line = %d", test.src, e.Msg, e.Pos, e.Line)
    341 			}
    342 		}, nil)
    343 
    344 		for {
    345 			s.next()
    346 			if s.tok == _EOF {
    347 				break
    348 			}
    349 		}
    350 
    351 		if nerrors == 0 {
    352 			t.Errorf("%q: got no error; want %q", test.src, test.msg)
    353 		}
    354 	}
    355 }
    356