Home | History | Annotate | Download | only in syntax
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package syntax
      6 
      7 import (
      8 	"fmt"
      9 	"os"
     10 	"strings"
     11 	"testing"
     12 )
     13 
     14 func TestScanner(t *testing.T) {
     15 	if testing.Short() {
     16 		t.Skip("skipping test in short mode")
     17 	}
     18 
     19 	src, err := os.Open("parser.go")
     20 	if err != nil {
     21 		t.Fatal(err)
     22 	}
     23 	defer src.Close()
     24 
     25 	var s scanner
     26 	s.init(src, nil, nil)
     27 	for {
     28 		s.next()
     29 		if s.tok == _EOF {
     30 			break
     31 		}
     32 		switch s.tok {
     33 		case _Name:
     34 			fmt.Println(s.line, s.tok, "=>", s.lit)
     35 		case _Operator:
     36 			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
     37 		default:
     38 			fmt.Println(s.line, s.tok)
     39 		}
     40 	}
     41 }
     42 
     43 func TestTokens(t *testing.T) {
     44 	// make source
     45 	var buf []byte
     46 	for i, s := range sampleTokens {
     47 		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
     48 		buf = append(buf, s.src...)                // token
     49 		buf = append(buf, "        "[:i&7]...)     // trailing spaces
     50 		buf = append(buf, "/* foo */ // bar\n"...) // comments
     51 	}
     52 
     53 	// scan source
     54 	var got scanner
     55 	got.init(&bytesReader{buf}, nil, nil)
     56 	got.next()
     57 	for i, want := range sampleTokens {
     58 		nlsemi := false
     59 
     60 		if got.line != uint(i+linebase) {
     61 			t.Errorf("got line %d; want %d", got.line, i+linebase)
     62 		}
     63 
     64 		if got.tok != want.tok {
     65 			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
     66 			continue
     67 		}
     68 
     69 		switch want.tok {
     70 		case _Semi:
     71 			if got.lit != "semicolon" {
     72 				t.Errorf("got %s; want semicolon", got.lit)
     73 			}
     74 
     75 		case _Name, _Literal:
     76 			if got.lit != want.src {
     77 				t.Errorf("got lit = %q; want %q", got.lit, want.src)
     78 				continue
     79 			}
     80 			nlsemi = true
     81 
     82 		case _Operator, _AssignOp, _IncOp:
     83 			if got.op != want.op {
     84 				t.Errorf("got op = %s; want %s", got.op, want.op)
     85 				continue
     86 			}
     87 			if got.prec != want.prec {
     88 				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
     89 				continue
     90 			}
     91 			nlsemi = want.tok == _IncOp
     92 
     93 		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
     94 			nlsemi = true
     95 		}
     96 
     97 		if nlsemi {
     98 			got.next()
     99 			if got.tok != _Semi {
    100 				t.Errorf("got tok = %s; want ;", got.tok)
    101 				continue
    102 			}
    103 			if got.lit != "newline" {
    104 				t.Errorf("got %s; want newline", got.lit)
    105 			}
    106 		}
    107 
    108 		got.next()
    109 	}
    110 
    111 	if got.tok != _EOF {
    112 		t.Errorf("got %q; want _EOF", got.tok)
    113 	}
    114 }
    115 
    116 var sampleTokens = [...]struct {
    117 	tok  token
    118 	src  string
    119 	op   Operator
    120 	prec int
    121 }{
    122 	// name samples
    123 	{_Name, "x", 0, 0},
    124 	{_Name, "X123", 0, 0},
    125 	{_Name, "foo", 0, 0},
    126 	{_Name, "Foo123", 0, 0},
    127 	{_Name, "foo_bar", 0, 0},
    128 	{_Name, "_", 0, 0},
    129 	{_Name, "_foobar", 0, 0},
    130 	{_Name, "a", 0, 0},
    131 	{_Name, "foo", 0, 0},
    132 	{_Name, "bar", 0, 0},
    133 	{_Name, "", 0, 0},
    134 	{_Name, "foo", 0, 0},
    135 
    136 	// literal samples
    137 	{_Literal, "0", 0, 0},
    138 	{_Literal, "1", 0, 0},
    139 	{_Literal, "12345", 0, 0},
    140 	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
    141 	{_Literal, "01234567", 0, 0},
    142 	{_Literal, "0x0", 0, 0},
    143 	{_Literal, "0xcafebabe", 0, 0},
    144 	{_Literal, "0.", 0, 0},
    145 	{_Literal, "0.e0", 0, 0},
    146 	{_Literal, "0.e-1", 0, 0},
    147 	{_Literal, "0.e+123", 0, 0},
    148 	{_Literal, ".0", 0, 0},
    149 	{_Literal, ".0E00", 0, 0},
    150 	{_Literal, ".0E-0123", 0, 0},
    151 	{_Literal, ".0E+12345678901234567890", 0, 0},
    152 	{_Literal, ".45e1", 0, 0},
    153 	{_Literal, "3.14159265", 0, 0},
    154 	{_Literal, "1e0", 0, 0},
    155 	{_Literal, "1e+100", 0, 0},
    156 	{_Literal, "1e-100", 0, 0},
    157 	{_Literal, "2.71828e-1000", 0, 0},
    158 	{_Literal, "0i", 0, 0},
    159 	{_Literal, "1i", 0, 0},
    160 	{_Literal, "012345678901234567889i", 0, 0},
    161 	{_Literal, "123456789012345678890i", 0, 0},
    162 	{_Literal, "0.i", 0, 0},
    163 	{_Literal, ".0i", 0, 0},
    164 	{_Literal, "3.14159265i", 0, 0},
    165 	{_Literal, "1e0i", 0, 0},
    166 	{_Literal, "1e+100i", 0, 0},
    167 	{_Literal, "1e-100i", 0, 0},
    168 	{_Literal, "2.71828e-1000i", 0, 0},
    169 	{_Literal, "'a'", 0, 0},
    170 	{_Literal, "'\\000'", 0, 0},
    171 	{_Literal, "'\\xFF'", 0, 0},
    172 	{_Literal, "'\\uff16'", 0, 0},
    173 	{_Literal, "'\\U0000ff16'", 0, 0},
    174 	{_Literal, "`foobar`", 0, 0},
    175 	{_Literal, "`foo\tbar`", 0, 0},
    176 	{_Literal, "`\r`", 0, 0},
    177 
    178 	// operators
    179 	{_Operator, "||", OrOr, precOrOr},
    180 
    181 	{_Operator, "&&", AndAnd, precAndAnd},
    182 
    183 	{_Operator, "==", Eql, precCmp},
    184 	{_Operator, "!=", Neq, precCmp},
    185 	{_Operator, "<", Lss, precCmp},
    186 	{_Operator, "<=", Leq, precCmp},
    187 	{_Operator, ">", Gtr, precCmp},
    188 	{_Operator, ">=", Geq, precCmp},
    189 
    190 	{_Operator, "+", Add, precAdd},
    191 	{_Operator, "-", Sub, precAdd},
    192 	{_Operator, "|", Or, precAdd},
    193 	{_Operator, "^", Xor, precAdd},
    194 
    195 	{_Star, "*", Mul, precMul},
    196 	{_Operator, "/", Div, precMul},
    197 	{_Operator, "%", Rem, precMul},
    198 	{_Operator, "&", And, precMul},
    199 	{_Operator, "&^", AndNot, precMul},
    200 	{_Operator, "<<", Shl, precMul},
    201 	{_Operator, ">>", Shr, precMul},
    202 
    203 	// assignment operations
    204 	{_AssignOp, "+=", Add, precAdd},
    205 	{_AssignOp, "-=", Sub, precAdd},
    206 	{_AssignOp, "|=", Or, precAdd},
    207 	{_AssignOp, "^=", Xor, precAdd},
    208 
    209 	{_AssignOp, "*=", Mul, precMul},
    210 	{_AssignOp, "/=", Div, precMul},
    211 	{_AssignOp, "%=", Rem, precMul},
    212 	{_AssignOp, "&=", And, precMul},
    213 	{_AssignOp, "&^=", AndNot, precMul},
    214 	{_AssignOp, "<<=", Shl, precMul},
    215 	{_AssignOp, ">>=", Shr, precMul},
    216 
    217 	// other operations
    218 	{_IncOp, "++", Add, precAdd},
    219 	{_IncOp, "--", Sub, precAdd},
    220 	{_Assign, "=", 0, 0},
    221 	{_Define, ":=", 0, 0},
    222 	{_Arrow, "<-", 0, 0},
    223 
    224 	// delimiters
    225 	{_Lparen, "(", 0, 0},
    226 	{_Lbrack, "[", 0, 0},
    227 	{_Lbrace, "{", 0, 0},
    228 	{_Rparen, ")", 0, 0},
    229 	{_Rbrack, "]", 0, 0},
    230 	{_Rbrace, "}", 0, 0},
    231 	{_Comma, ",", 0, 0},
    232 	{_Semi, ";", 0, 0},
    233 	{_Colon, ":", 0, 0},
    234 	{_Dot, ".", 0, 0},
    235 	{_DotDotDot, "...", 0, 0},
    236 
    237 	// keywords
    238 	{_Break, "break", 0, 0},
    239 	{_Case, "case", 0, 0},
    240 	{_Chan, "chan", 0, 0},
    241 	{_Const, "const", 0, 0},
    242 	{_Continue, "continue", 0, 0},
    243 	{_Default, "default", 0, 0},
    244 	{_Defer, "defer", 0, 0},
    245 	{_Else, "else", 0, 0},
    246 	{_Fallthrough, "fallthrough", 0, 0},
    247 	{_For, "for", 0, 0},
    248 	{_Func, "func", 0, 0},
    249 	{_Go, "go", 0, 0},
    250 	{_Goto, "goto", 0, 0},
    251 	{_If, "if", 0, 0},
    252 	{_Import, "import", 0, 0},
    253 	{_Interface, "interface", 0, 0},
    254 	{_Map, "map", 0, 0},
    255 	{_Package, "package", 0, 0},
    256 	{_Range, "range", 0, 0},
    257 	{_Return, "return", 0, 0},
    258 	{_Select, "select", 0, 0},
    259 	{_Struct, "struct", 0, 0},
    260 	{_Switch, "switch", 0, 0},
    261 	{_Type, "type", 0, 0},
    262 	{_Var, "var", 0, 0},
    263 }
    264 
    265 func TestScanErrors(t *testing.T) {
    266 	for _, test := range []struct {
    267 		src, msg  string
    268 		line, col uint // 0-based
    269 	}{
    270 		// Note: Positions for lexical errors are the earliest position
    271 		// where the error is apparent, not the beginning of the respective
    272 		// token.
    273 
    274 		// rune-level errors
    275 		{"fo\x00o", "invalid NUL character", 0, 2},
    276 		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
    277 		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 2, 0},
    278 
    279 		// token-level errors
    280 		{"\u00BD" /*  */, "invalid identifier character U+00BD ''", 0, 0},
    281 		{"\U0001d736\U0001d737\U0001d738_" /* _ */, "invalid identifier character U+00BD ''", 0, 13 /* byte offset */},
    282 		{"\U0001d7d8" /*  */, "identifier cannot begin with digit U+1D7D8 ''", 0, 0},
    283 		{"foo\U0001d7d8_" /* foo_ */, "invalid identifier character U+00BD ''", 0, 8 /* byte offset */},
    284 
    285 		{"x + ~y", "bitwise complement operator is ^", 0, 4},
    286 		{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
    287 		{"const x = 0xyz", "malformed hex constant", 0, 12},
    288 		{"0123456789", "malformed octal constant", 0, 10},
    289 		{"0123456789. /* foobar", "comment not terminated", 0, 12},   // valid float constant
    290 		{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
    291 		{"var a, b = 08, 07\n", "malformed octal constant", 0, 13},
    292 		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10},
    293 
    294 		{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
    295 		{"'\n", "newline in character literal", 0, 1},
    296 		{`'\`, "invalid character literal (missing closing ')", 0, 0},
    297 		{`'\'`, "invalid character literal (missing closing ')", 0, 0},
    298 		{`'\x`, "invalid character literal (missing closing ')", 0, 0},
    299 		{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
    300 		{`'\y'`, "unknown escape sequence", 0, 2},
    301 		{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
    302 		{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
    303 		{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
    304 		{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
    305 		{`'\400'`, "octal escape value > 255: 256", 0, 5},
    306 		{`'xx`, "invalid character literal (missing closing ')", 0, 0},
    307 		{`'xx'`, "invalid character literal (more than one character)", 0, 0},
    308 
    309 		{"\"\n", "newline in string", 0, 1},
    310 		{`"`, "string not terminated", 0, 0},
    311 		{`"foo`, "string not terminated", 0, 0},
    312 		{"`", "string not terminated", 0, 0},
    313 		{"`foo", "string not terminated", 0, 0},
    314 		{"/*/", "comment not terminated", 0, 0},
    315 		{"/*\n\nfoo", "comment not terminated", 0, 0},
    316 		{"/*\n\nfoo", "comment not terminated", 0, 0},
    317 		{`"\`, "string not terminated", 0, 0},
    318 		{`"\"`, "string not terminated", 0, 0},
    319 		{`"\x`, "string not terminated", 0, 0},
    320 		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
    321 		{`"\y"`, "unknown escape sequence", 0, 2},
    322 		{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
    323 		{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
    324 		{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
    325 		{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
    326 		{`"\400"`, "octal escape value > 255: 256", 0, 5},
    327 
    328 		{`s := "foo\z"`, "unknown escape sequence", 0, 10},
    329 		{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
    330 		{`"\x`, "string not terminated", 0, 0},
    331 		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
    332 		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
    333 		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
    334 
    335 		// former problem cases
    336 		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
    337 	} {
    338 		var s scanner
    339 		nerrors := 0
    340 		s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
    341 			nerrors++
    342 			// only check the first error
    343 			if nerrors == 1 {
    344 				if msg != test.msg {
    345 					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
    346 				}
    347 				if line != test.line+linebase {
    348 					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
    349 				}
    350 				if col != test.col+colbase {
    351 					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
    352 				}
    353 			} else if nerrors > 1 {
    354 				// TODO(gri) make this use position info
    355 				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
    356 			}
    357 		}, nil)
    358 
    359 		for {
    360 			s.next()
    361 			if s.tok == _EOF {
    362 				break
    363 			}
    364 		}
    365 
    366 		if nerrors == 0 {
    367 			t.Errorf("%q: got no error; want %q", test.src, test.msg)
    368 		}
    369 	}
    370 }
    371 
    372 func TestIssue21938(t *testing.T) {
    373 	s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
    374 
    375 	var got scanner
    376 	got.init(strings.NewReader(s), nil, nil)
    377 	got.next()
    378 
    379 	if got.tok != _Literal || got.lit != ".5" {
    380 		t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
    381 	}
    382 }
    383