Home | History | Annotate | Download | only in lex
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package lex
      6 
      7 import (
      8 	"fmt"
      9 	"os"
     10 	"path/filepath"
     11 	"strconv"
     12 	"strings"
     13 	"text/scanner"
     14 
     15 	"cmd/asm/internal/flags"
     16 	"cmd/internal/objabi"
     17 	"cmd/internal/src"
     18 )
     19 
     20 // Input is the main input: a stack of readers and some macro definitions.
     21 // It also handles #include processing (by pushing onto the input stack)
     22 // and parses and instantiates macro definitions.
     23 type Input struct {
     24 	Stack
     25 	includes        []string
     26 	beginningOfLine bool
     27 	ifdefStack      []bool
     28 	macros          map[string]*Macro
     29 	text            string // Text of last token returned by Next.
     30 	peek            bool
     31 	peekToken       ScanToken
     32 	peekText        string
     33 }
     34 
     35 // NewInput returns an Input from the given path.
     36 func NewInput(name string) *Input {
     37 	return &Input{
     38 		// include directories: look in source dir, then -I directories.
     39 		includes:        append([]string{filepath.Dir(name)}, flags.I...),
     40 		beginningOfLine: true,
     41 		macros:          predefine(flags.D),
     42 	}
     43 }
     44 
     45 // predefine installs the macros set by the -D flag on the command line.
     46 func predefine(defines flags.MultiFlag) map[string]*Macro {
     47 	macros := make(map[string]*Macro)
     48 	for _, name := range defines {
     49 		value := "1"
     50 		i := strings.IndexRune(name, '=')
     51 		if i > 0 {
     52 			name, value = name[:i], name[i+1:]
     53 		}
     54 		tokens := Tokenize(name)
     55 		if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
     56 			fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
     57 			flags.Usage()
     58 		}
     59 		macros[name] = &Macro{
     60 			name:   name,
     61 			args:   nil,
     62 			tokens: Tokenize(value),
     63 		}
     64 	}
     65 	return macros
     66 }
     67 
     68 var panicOnError bool // For testing.
     69 
     70 func (in *Input) Error(args ...interface{}) {
     71 	if panicOnError {
     72 		panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)))
     73 	}
     74 	fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
     75 	os.Exit(1)
     76 }
     77 
     78 // expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token.
     79 func (in *Input) expectText(args ...interface{}) {
     80 	in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...)
     81 }
     82 
     83 // enabled reports whether the input is enabled by an ifdef, or is at the top level.
     84 func (in *Input) enabled() bool {
     85 	return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1]
     86 }
     87 
     88 func (in *Input) expectNewline(directive string) {
     89 	tok := in.Stack.Next()
     90 	if tok != '\n' {
     91 		in.expectText("expected newline after", directive)
     92 	}
     93 }
     94 
     95 func (in *Input) Next() ScanToken {
     96 	if in.peek {
     97 		in.peek = false
     98 		tok := in.peekToken
     99 		in.text = in.peekText
    100 		return tok
    101 	}
    102 	// If we cannot generate a token after 100 macro invocations, we're in trouble.
    103 	// The usual case is caught by Push, below, but be safe.
    104 	for nesting := 0; nesting < 100; {
    105 		tok := in.Stack.Next()
    106 		switch tok {
    107 		case '#':
    108 			if !in.beginningOfLine {
    109 				in.Error("'#' must be first item on line")
    110 			}
    111 			in.beginningOfLine = in.hash()
    112 		case scanner.Ident:
    113 			// Is it a macro name?
    114 			name := in.Stack.Text()
    115 			macro := in.macros[name]
    116 			if macro != nil {
    117 				nesting++
    118 				in.invokeMacro(macro)
    119 				continue
    120 			}
    121 			fallthrough
    122 		default:
    123 			if tok == scanner.EOF && len(in.ifdefStack) > 0 {
    124 				// We're skipping text but have run out of input with no #endif.
    125 				in.Error("unclosed #ifdef or #ifndef")
    126 			}
    127 			in.beginningOfLine = tok == '\n'
    128 			if in.enabled() {
    129 				in.text = in.Stack.Text()
    130 				return tok
    131 			}
    132 		}
    133 	}
    134 	in.Error("recursive macro invocation")
    135 	return 0
    136 }
    137 
    138 func (in *Input) Text() string {
    139 	return in.text
    140 }
    141 
    142 // hash processes a # preprocessor directive. It returns true iff it completes.
    143 func (in *Input) hash() bool {
    144 	// We have a '#'; it must be followed by a known word (define, include, etc.).
    145 	tok := in.Stack.Next()
    146 	if tok != scanner.Ident {
    147 		in.expectText("expected identifier after '#'")
    148 	}
    149 	if !in.enabled() {
    150 		// Can only start including again if we are at #else or #endif but also
    151 		// need to keep track of nested #if[n]defs.
    152 		// We let #line through because it might affect errors.
    153 		switch in.Stack.Text() {
    154 		case "else", "endif", "ifdef", "ifndef", "line":
    155 			// Press on.
    156 		default:
    157 			return false
    158 		}
    159 	}
    160 	switch in.Stack.Text() {
    161 	case "define":
    162 		in.define()
    163 	case "else":
    164 		in.else_()
    165 	case "endif":
    166 		in.endif()
    167 	case "ifdef":
    168 		in.ifdef(true)
    169 	case "ifndef":
    170 		in.ifdef(false)
    171 	case "include":
    172 		in.include()
    173 	case "line":
    174 		in.line()
    175 	case "undef":
    176 		in.undef()
    177 	default:
    178 		in.Error("unexpected token after '#':", in.Stack.Text())
    179 	}
    180 	return true
    181 }
    182 
    183 // macroName returns the name for the macro being referenced.
    184 func (in *Input) macroName() string {
    185 	// We use the Stack's input method; no macro processing at this stage.
    186 	tok := in.Stack.Next()
    187 	if tok != scanner.Ident {
    188 		in.expectText("expected identifier after # directive")
    189 	}
    190 	// Name is alphanumeric by definition.
    191 	return in.Stack.Text()
    192 }
    193 
    194 // #define processing.
    195 func (in *Input) define() {
    196 	name := in.macroName()
    197 	args, tokens := in.macroDefinition(name)
    198 	in.defineMacro(name, args, tokens)
    199 }
    200 
    201 // defineMacro stores the macro definition in the Input.
    202 func (in *Input) defineMacro(name string, args []string, tokens []Token) {
    203 	if in.macros[name] != nil {
    204 		in.Error("redefinition of macro:", name)
    205 	}
    206 	in.macros[name] = &Macro{
    207 		name:   name,
    208 		args:   args,
    209 		tokens: tokens,
    210 	}
    211 }
    212 
    213 // macroDefinition returns the list of formals and the tokens of the definition.
    214 // The argument list is nil for no parens on the definition; otherwise a list of
    215 // formal argument names.
    216 func (in *Input) macroDefinition(name string) ([]string, []Token) {
    217 	prevCol := in.Stack.Col()
    218 	tok := in.Stack.Next()
    219 	if tok == '\n' || tok == scanner.EOF {
    220 		return nil, nil // No definition for macro
    221 	}
    222 	var args []string
    223 	// The C preprocessor treats
    224 	//	#define A(x)
    225 	// and
    226 	//	#define A (x)
    227 	// distinctly: the first is a macro with arguments, the second without.
    228 	// Distinguish these cases using the column number, since we don't
    229 	// see the space itself. Note that text/scanner reports the position at the
    230 	// end of the token. It's where you are now, and you just read this token.
    231 	if tok == '(' && in.Stack.Col() == prevCol+1 {
    232 		// Macro has arguments. Scan list of formals.
    233 		acceptArg := true
    234 		args = []string{} // Zero length but not nil.
    235 	Loop:
    236 		for {
    237 			tok = in.Stack.Next()
    238 			switch tok {
    239 			case ')':
    240 				tok = in.Stack.Next() // First token of macro definition.
    241 				break Loop
    242 			case ',':
    243 				if acceptArg {
    244 					in.Error("bad syntax in definition for macro:", name)
    245 				}
    246 				acceptArg = true
    247 			case scanner.Ident:
    248 				if !acceptArg {
    249 					in.Error("bad syntax in definition for macro:", name)
    250 				}
    251 				arg := in.Stack.Text()
    252 				if i := lookup(args, arg); i >= 0 {
    253 					in.Error("duplicate argument", arg, "in definition for macro:", name)
    254 				}
    255 				args = append(args, arg)
    256 				acceptArg = false
    257 			default:
    258 				in.Error("bad definition for macro:", name)
    259 			}
    260 		}
    261 	}
    262 	var tokens []Token
    263 	// Scan to newline. Backslashes escape newlines.
    264 	for tok != '\n' {
    265 		if tok == scanner.EOF {
    266 			in.Error("missing newline in definition for macro:", name)
    267 		}
    268 		if tok == '\\' {
    269 			tok = in.Stack.Next()
    270 			if tok != '\n' && tok != '\\' {
    271 				in.Error(`can only escape \ or \n in definition for macro:`, name)
    272 			}
    273 		}
    274 		tokens = append(tokens, Make(tok, in.Stack.Text()))
    275 		tok = in.Stack.Next()
    276 	}
    277 	return args, tokens
    278 }
    279 
    280 func lookup(args []string, arg string) int {
    281 	for i, a := range args {
    282 		if a == arg {
    283 			return i
    284 		}
    285 	}
    286 	return -1
    287 }
    288 
    289 // invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual
    290 // parameters substituted for the formals.
    291 // Invoking a macro does not touch the PC/line history.
    292 func (in *Input) invokeMacro(macro *Macro) {
    293 	// If the macro has no arguments, just substitute the text.
    294 	if macro.args == nil {
    295 		in.Push(NewSlice(in.Base(), in.Line(), macro.tokens))
    296 		return
    297 	}
    298 	tok := in.Stack.Next()
    299 	if tok != '(' {
    300 		// If the macro has arguments but is invoked without them, all we push is the macro name.
    301 		// First, put back the token.
    302 		in.peekToken = tok
    303 		in.peekText = in.text
    304 		in.peek = true
    305 		in.Push(NewSlice(in.Base(), in.Line(), []Token{Make(macroName, macro.name)}))
    306 		return
    307 	}
    308 	actuals := in.argsFor(macro)
    309 	var tokens []Token
    310 	for _, tok := range macro.tokens {
    311 		if tok.ScanToken != scanner.Ident {
    312 			tokens = append(tokens, tok)
    313 			continue
    314 		}
    315 		substitution := actuals[tok.text]
    316 		if substitution == nil {
    317 			tokens = append(tokens, tok)
    318 			continue
    319 		}
    320 		tokens = append(tokens, substitution...)
    321 	}
    322 	in.Push(NewSlice(in.Base(), in.Line(), tokens))
    323 }
    324 
    325 // argsFor returns a map from formal name to actual value for this argumented macro invocation.
    326 // The opening parenthesis has been absorbed.
    327 func (in *Input) argsFor(macro *Macro) map[string][]Token {
    328 	var args [][]Token
    329 	// One macro argument per iteration. Collect them all and check counts afterwards.
    330 	for argNum := 0; ; argNum++ {
    331 		tokens, tok := in.collectArgument(macro)
    332 		args = append(args, tokens)
    333 		if tok == ')' {
    334 			break
    335 		}
    336 	}
    337 	// Zero-argument macros are tricky.
    338 	if len(macro.args) == 0 && len(args) == 1 && args[0] == nil {
    339 		args = nil
    340 	} else if len(args) != len(macro.args) {
    341 		in.Error("wrong arg count for macro", macro.name)
    342 	}
    343 	argMap := make(map[string][]Token)
    344 	for i, arg := range args {
    345 		argMap[macro.args[i]] = arg
    346 	}
    347 	return argMap
    348 }
    349 
    350 // collectArgument returns the actual tokens for a single argument of a macro.
    351 // It also returns the token that terminated the argument, which will always
    352 // be either ',' or ')'. The starting '(' has been scanned.
    353 func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) {
    354 	nesting := 0
    355 	var tokens []Token
    356 	for {
    357 		tok := in.Stack.Next()
    358 		if tok == scanner.EOF || tok == '\n' {
    359 			in.Error("unterminated arg list invoking macro:", macro.name)
    360 		}
    361 		if nesting == 0 && (tok == ')' || tok == ',') {
    362 			return tokens, tok
    363 		}
    364 		if tok == '(' {
    365 			nesting++
    366 		}
    367 		if tok == ')' {
    368 			nesting--
    369 		}
    370 		tokens = append(tokens, Make(tok, in.Stack.Text()))
    371 	}
    372 }
    373 
    374 // #ifdef and #ifndef processing.
    375 func (in *Input) ifdef(truth bool) {
    376 	name := in.macroName()
    377 	in.expectNewline("#if[n]def")
    378 	if !in.enabled() {
    379 		truth = false
    380 	} else if _, defined := in.macros[name]; !defined {
    381 		truth = !truth
    382 	}
    383 	in.ifdefStack = append(in.ifdefStack, truth)
    384 }
    385 
    386 // #else processing
    387 func (in *Input) else_() {
    388 	in.expectNewline("#else")
    389 	if len(in.ifdefStack) == 0 {
    390 		in.Error("unmatched #else")
    391 	}
    392 	if len(in.ifdefStack) == 1 || in.ifdefStack[len(in.ifdefStack)-2] {
    393 		in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1]
    394 	}
    395 }
    396 
    397 // #endif processing.
    398 func (in *Input) endif() {
    399 	in.expectNewline("#endif")
    400 	if len(in.ifdefStack) == 0 {
    401 		in.Error("unmatched #endif")
    402 	}
    403 	in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1]
    404 }
    405 
    406 // #include processing.
    407 func (in *Input) include() {
    408 	// Find and parse string.
    409 	tok := in.Stack.Next()
    410 	if tok != scanner.String {
    411 		in.expectText("expected string after #include")
    412 	}
    413 	name, err := strconv.Unquote(in.Stack.Text())
    414 	if err != nil {
    415 		in.Error("unquoting include file name: ", err)
    416 	}
    417 	in.expectNewline("#include")
    418 	// Push tokenizer for file onto stack.
    419 	fd, err := os.Open(name)
    420 	if err != nil {
    421 		for _, dir := range in.includes {
    422 			fd, err = os.Open(filepath.Join(dir, name))
    423 			if err == nil {
    424 				break
    425 			}
    426 		}
    427 		if err != nil {
    428 			in.Error("#include:", err)
    429 		}
    430 	}
    431 	in.Push(NewTokenizer(name, fd, fd))
    432 }
    433 
    434 // #line processing.
    435 func (in *Input) line() {
    436 	// Only need to handle Plan 9 format: #line 337 "filename"
    437 	tok := in.Stack.Next()
    438 	if tok != scanner.Int {
    439 		in.expectText("expected line number after #line")
    440 	}
    441 	line, err := strconv.Atoi(in.Stack.Text())
    442 	if err != nil {
    443 		in.Error("error parsing #line (cannot happen):", err)
    444 	}
    445 	tok = in.Stack.Next()
    446 	if tok != scanner.String {
    447 		in.expectText("expected file name in #line")
    448 	}
    449 	file, err := strconv.Unquote(in.Stack.Text())
    450 	if err != nil {
    451 		in.Error("unquoting #line file name: ", err)
    452 	}
    453 	tok = in.Stack.Next()
    454 	if tok != '\n' {
    455 		in.Error("unexpected token at end of #line: ", tok)
    456 	}
    457 	pos := src.MakePos(in.Base(), uint(in.Line()), uint(in.Col()))
    458 	in.Stack.SetBase(src.NewLinePragmaBase(pos, file, objabi.AbsFile(objabi.WorkingDir(), file, *flags.TrimPath), uint(line)))
    459 }
    460 
    461 // #undef processing
    462 func (in *Input) undef() {
    463 	name := in.macroName()
    464 	if in.macros[name] == nil {
    465 		in.Error("#undef for undefined macro:", name)
    466 	}
    467 	// Newline must be next.
    468 	tok := in.Stack.Next()
    469 	if tok != '\n' {
    470 		in.Error("syntax error in #undef for macro:", name)
    471 	}
    472 	delete(in.macros, name)
    473 }
    474 
    475 func (in *Input) Push(r TokenReader) {
    476 	if len(in.tr) > 100 {
    477 		in.Error("input recursion")
    478 	}
    479 	in.Stack.Push(r)
    480 }
    481 
    482 func (in *Input) Close() {
    483 }
    484