Home | History | Annotate | Download | only in lex
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package lex
      6 
      7 import (
      8 	"fmt"
      9 	"os"
     10 	"path/filepath"
     11 	"strconv"
     12 	"strings"
     13 	"text/scanner"
     14 
     15 	"cmd/asm/internal/flags"
     16 )
     17 
     18 // Input is the main input: a stack of readers and some macro definitions.
     19 // It also handles #include processing (by pushing onto the input stack)
     20 // and parses and instantiates macro definitions.
     21 type Input struct {
     22 	Stack
     23 	includes        []string
     24 	beginningOfLine bool
     25 	ifdefStack      []bool
     26 	macros          map[string]*Macro
     27 	text            string // Text of last token returned by Next.
     28 	peek            bool
     29 	peekToken       ScanToken
     30 	peekText        string
     31 }
     32 
     33 // NewInput returns an Input from the given path.
     34 func NewInput(name string) *Input {
     35 	return &Input{
     36 		// include directories: look in source dir, then -I directories.
     37 		includes:        append([]string{filepath.Dir(name)}, flags.I...),
     38 		beginningOfLine: true,
     39 		macros:          predefine(flags.D),
     40 	}
     41 }
     42 
     43 // predefine installs the macros set by the -D flag on the command line.
     44 func predefine(defines flags.MultiFlag) map[string]*Macro {
     45 	macros := make(map[string]*Macro)
     46 	for _, name := range defines {
     47 		value := "1"
     48 		i := strings.IndexRune(name, '=')
     49 		if i > 0 {
     50 			name, value = name[:i], name[i+1:]
     51 		}
     52 		tokens := Tokenize(name)
     53 		if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
     54 			fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
     55 			flags.Usage()
     56 		}
     57 		macros[name] = &Macro{
     58 			name:   name,
     59 			args:   nil,
     60 			tokens: Tokenize(value),
     61 		}
     62 	}
     63 	return macros
     64 }
     65 
     66 var panicOnError bool // For testing.
     67 
     68 func (in *Input) Error(args ...interface{}) {
     69 	if panicOnError {
     70 		panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)))
     71 	}
     72 	fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
     73 	os.Exit(1)
     74 }
     75 
     76 // expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token.
     77 func (in *Input) expectText(args ...interface{}) {
     78 	in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...)
     79 }
     80 
     81 // enabled reports whether the input is enabled by an ifdef, or is at the top level.
     82 func (in *Input) enabled() bool {
     83 	return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1]
     84 }
     85 
     86 func (in *Input) expectNewline(directive string) {
     87 	tok := in.Stack.Next()
     88 	if tok != '\n' {
     89 		in.expectText("expected newline after", directive)
     90 	}
     91 }
     92 
     93 func (in *Input) Next() ScanToken {
     94 	if in.peek {
     95 		in.peek = false
     96 		tok := in.peekToken
     97 		in.text = in.peekText
     98 		return tok
     99 	}
    100 	// If we cannot generate a token after 100 macro invocations, we're in trouble.
    101 	// The usual case is caught by Push, below, but be safe.
    102 	for nesting := 0; nesting < 100; {
    103 		tok := in.Stack.Next()
    104 		switch tok {
    105 		case '#':
    106 			if !in.beginningOfLine {
    107 				in.Error("'#' must be first item on line")
    108 			}
    109 			in.beginningOfLine = in.hash()
    110 		case scanner.Ident:
    111 			// Is it a macro name?
    112 			name := in.Stack.Text()
    113 			macro := in.macros[name]
    114 			if macro != nil {
    115 				nesting++
    116 				in.invokeMacro(macro)
    117 				continue
    118 			}
    119 			fallthrough
    120 		default:
    121 			if tok == scanner.EOF && len(in.ifdefStack) > 0 {
    122 				// We're skipping text but have run out of input with no #endif.
    123 				in.Error("unclosed #ifdef or #ifndef")
    124 			}
    125 			in.beginningOfLine = tok == '\n'
    126 			if in.enabled() {
    127 				in.text = in.Stack.Text()
    128 				return tok
    129 			}
    130 		}
    131 	}
    132 	in.Error("recursive macro invocation")
    133 	return 0
    134 }
    135 
    136 func (in *Input) Text() string {
    137 	return in.text
    138 }
    139 
    140 // hash processes a # preprocessor directive. It returns true iff it completes.
    141 func (in *Input) hash() bool {
    142 	// We have a '#'; it must be followed by a known word (define, include, etc.).
    143 	tok := in.Stack.Next()
    144 	if tok != scanner.Ident {
    145 		in.expectText("expected identifier after '#'")
    146 	}
    147 	if !in.enabled() {
    148 		// Can only start including again if we are at #else or #endif but also
    149 		// need to keep track of nested #if[n]defs.
    150 		// We let #line through because it might affect errors.
    151 		switch in.Stack.Text() {
    152 		case "else", "endif", "ifdef", "ifndef", "line":
    153 			// Press on.
    154 		default:
    155 			return false
    156 		}
    157 	}
    158 	switch in.Stack.Text() {
    159 	case "define":
    160 		in.define()
    161 	case "else":
    162 		in.else_()
    163 	case "endif":
    164 		in.endif()
    165 	case "ifdef":
    166 		in.ifdef(true)
    167 	case "ifndef":
    168 		in.ifdef(false)
    169 	case "include":
    170 		in.include()
    171 	case "line":
    172 		in.line()
    173 	case "undef":
    174 		in.undef()
    175 	default:
    176 		in.Error("unexpected token after '#':", in.Stack.Text())
    177 	}
    178 	return true
    179 }
    180 
    181 // macroName returns the name for the macro being referenced.
    182 func (in *Input) macroName() string {
    183 	// We use the Stack's input method; no macro processing at this stage.
    184 	tok := in.Stack.Next()
    185 	if tok != scanner.Ident {
    186 		in.expectText("expected identifier after # directive")
    187 	}
    188 	// Name is alphanumeric by definition.
    189 	return in.Stack.Text()
    190 }
    191 
    192 // #define processing.
    193 func (in *Input) define() {
    194 	name := in.macroName()
    195 	args, tokens := in.macroDefinition(name)
    196 	in.defineMacro(name, args, tokens)
    197 }
    198 
    199 // defineMacro stores the macro definition in the Input.
    200 func (in *Input) defineMacro(name string, args []string, tokens []Token) {
    201 	if in.macros[name] != nil {
    202 		in.Error("redefinition of macro:", name)
    203 	}
    204 	in.macros[name] = &Macro{
    205 		name:   name,
    206 		args:   args,
    207 		tokens: tokens,
    208 	}
    209 }
    210 
    211 // macroDefinition returns the list of formals and the tokens of the definition.
    212 // The argument list is nil for no parens on the definition; otherwise a list of
    213 // formal argument names.
    214 func (in *Input) macroDefinition(name string) ([]string, []Token) {
    215 	prevCol := in.Stack.Col()
    216 	tok := in.Stack.Next()
    217 	if tok == '\n' || tok == scanner.EOF {
    218 		return nil, nil // No definition for macro
    219 	}
    220 	var args []string
    221 	// The C preprocessor treats
    222 	//	#define A(x)
    223 	// and
    224 	//	#define A (x)
    225 	// distinctly: the first is a macro with arguments, the second without.
    226 	// Distinguish these cases using the column number, since we don't
    227 	// see the space itself. Note that text/scanner reports the position at the
    228 	// end of the token. It's where you are now, and you just read this token.
    229 	if tok == '(' && in.Stack.Col() == prevCol+1 {
    230 		// Macro has arguments. Scan list of formals.
    231 		acceptArg := true
    232 		args = []string{} // Zero length but not nil.
    233 	Loop:
    234 		for {
    235 			tok = in.Stack.Next()
    236 			switch tok {
    237 			case ')':
    238 				tok = in.Stack.Next() // First token of macro definition.
    239 				break Loop
    240 			case ',':
    241 				if acceptArg {
    242 					in.Error("bad syntax in definition for macro:", name)
    243 				}
    244 				acceptArg = true
    245 			case scanner.Ident:
    246 				if !acceptArg {
    247 					in.Error("bad syntax in definition for macro:", name)
    248 				}
    249 				arg := in.Stack.Text()
    250 				if i := lookup(args, arg); i >= 0 {
    251 					in.Error("duplicate argument", arg, "in definition for macro:", name)
    252 				}
    253 				args = append(args, arg)
    254 				acceptArg = false
    255 			default:
    256 				in.Error("bad definition for macro:", name)
    257 			}
    258 		}
    259 	}
    260 	var tokens []Token
    261 	// Scan to newline. Backslashes escape newlines.
    262 	for tok != '\n' {
    263 		if tok == scanner.EOF {
    264 			in.Error("missing newline in definition for macro:", name)
    265 		}
    266 		if tok == '\\' {
    267 			tok = in.Stack.Next()
    268 			if tok != '\n' && tok != '\\' {
    269 				in.Error(`can only escape \ or \n in definition for macro:`, name)
    270 			}
    271 		}
    272 		tokens = append(tokens, Make(tok, in.Stack.Text()))
    273 		tok = in.Stack.Next()
    274 	}
    275 	return args, tokens
    276 }
    277 
    278 func lookup(args []string, arg string) int {
    279 	for i, a := range args {
    280 		if a == arg {
    281 			return i
    282 		}
    283 	}
    284 	return -1
    285 }
    286 
    287 // invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual
    288 // parameters substituted for the formals.
    289 // Invoking a macro does not touch the PC/line history.
    290 func (in *Input) invokeMacro(macro *Macro) {
    291 	// If the macro has no arguments, just substitute the text.
    292 	if macro.args == nil {
    293 		in.Push(NewSlice(in.File(), in.Line(), macro.tokens))
    294 		return
    295 	}
    296 	tok := in.Stack.Next()
    297 	if tok != '(' {
    298 		// If the macro has arguments but is invoked without them, all we push is the macro name.
    299 		// First, put back the token.
    300 		in.peekToken = tok
    301 		in.peekText = in.text
    302 		in.peek = true
    303 		in.Push(NewSlice(in.File(), in.Line(), []Token{Make(macroName, macro.name)}))
    304 		return
    305 	}
    306 	actuals := in.argsFor(macro)
    307 	var tokens []Token
    308 	for _, tok := range macro.tokens {
    309 		if tok.ScanToken != scanner.Ident {
    310 			tokens = append(tokens, tok)
    311 			continue
    312 		}
    313 		substitution := actuals[tok.text]
    314 		if substitution == nil {
    315 			tokens = append(tokens, tok)
    316 			continue
    317 		}
    318 		tokens = append(tokens, substitution...)
    319 	}
    320 	in.Push(NewSlice(in.File(), in.Line(), tokens))
    321 }
    322 
    323 // argsFor returns a map from formal name to actual value for this argumented macro invocation.
    324 // The opening parenthesis has been absorbed.
    325 func (in *Input) argsFor(macro *Macro) map[string][]Token {
    326 	var args [][]Token
    327 	// One macro argument per iteration. Collect them all and check counts afterwards.
    328 	for argNum := 0; ; argNum++ {
    329 		tokens, tok := in.collectArgument(macro)
    330 		args = append(args, tokens)
    331 		if tok == ')' {
    332 			break
    333 		}
    334 	}
    335 	// Zero-argument macros are tricky.
    336 	if len(macro.args) == 0 && len(args) == 1 && args[0] == nil {
    337 		args = nil
    338 	} else if len(args) != len(macro.args) {
    339 		in.Error("wrong arg count for macro", macro.name)
    340 	}
    341 	argMap := make(map[string][]Token)
    342 	for i, arg := range args {
    343 		argMap[macro.args[i]] = arg
    344 	}
    345 	return argMap
    346 }
    347 
    348 // collectArgument returns the actual tokens for a single argument of a macro.
    349 // It also returns the token that terminated the argument, which will always
    350 // be either ',' or ')'. The starting '(' has been scanned.
    351 func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) {
    352 	nesting := 0
    353 	var tokens []Token
    354 	for {
    355 		tok := in.Stack.Next()
    356 		if tok == scanner.EOF || tok == '\n' {
    357 			in.Error("unterminated arg list invoking macro:", macro.name)
    358 		}
    359 		if nesting == 0 && (tok == ')' || tok == ',') {
    360 			return tokens, tok
    361 		}
    362 		if tok == '(' {
    363 			nesting++
    364 		}
    365 		if tok == ')' {
    366 			nesting--
    367 		}
    368 		tokens = append(tokens, Make(tok, in.Stack.Text()))
    369 	}
    370 }
    371 
    372 // #ifdef and #ifndef processing.
    373 func (in *Input) ifdef(truth bool) {
    374 	name := in.macroName()
    375 	in.expectNewline("#if[n]def")
    376 	if !in.enabled() {
    377 		truth = false
    378 	} else if _, defined := in.macros[name]; !defined {
    379 		truth = !truth
    380 	}
    381 	in.ifdefStack = append(in.ifdefStack, truth)
    382 }
    383 
    384 // #else processing
    385 func (in *Input) else_() {
    386 	in.expectNewline("#else")
    387 	if len(in.ifdefStack) == 0 {
    388 		in.Error("unmatched #else")
    389 	}
    390 	if len(in.ifdefStack) == 1 || in.ifdefStack[len(in.ifdefStack)-2] {
    391 		in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1]
    392 	}
    393 }
    394 
    395 // #endif processing.
    396 func (in *Input) endif() {
    397 	in.expectNewline("#endif")
    398 	if len(in.ifdefStack) == 0 {
    399 		in.Error("unmatched #endif")
    400 	}
    401 	in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1]
    402 }
    403 
    404 // #include processing.
    405 func (in *Input) include() {
    406 	// Find and parse string.
    407 	tok := in.Stack.Next()
    408 	if tok != scanner.String {
    409 		in.expectText("expected string after #include")
    410 	}
    411 	name, err := strconv.Unquote(in.Stack.Text())
    412 	if err != nil {
    413 		in.Error("unquoting include file name: ", err)
    414 	}
    415 	in.expectNewline("#include")
    416 	// Push tokenizer for file onto stack.
    417 	fd, err := os.Open(name)
    418 	if err != nil {
    419 		for _, dir := range in.includes {
    420 			fd, err = os.Open(filepath.Join(dir, name))
    421 			if err == nil {
    422 				break
    423 			}
    424 		}
    425 		if err != nil {
    426 			in.Error("#include:", err)
    427 		}
    428 	}
    429 	in.Push(NewTokenizer(name, fd, fd))
    430 }
    431 
    432 // #line processing.
    433 func (in *Input) line() {
    434 	// Only need to handle Plan 9 format: #line 337 "filename"
    435 	tok := in.Stack.Next()
    436 	if tok != scanner.Int {
    437 		in.expectText("expected line number after #line")
    438 	}
    439 	line, err := strconv.Atoi(in.Stack.Text())
    440 	if err != nil {
    441 		in.Error("error parsing #line (cannot happen):", err)
    442 	}
    443 	tok = in.Stack.Next()
    444 	if tok != scanner.String {
    445 		in.expectText("expected file name in #line")
    446 	}
    447 	file, err := strconv.Unquote(in.Stack.Text())
    448 	if err != nil {
    449 		in.Error("unquoting #line file name: ", err)
    450 	}
    451 	tok = in.Stack.Next()
    452 	if tok != '\n' {
    453 		in.Error("unexpected token at end of #line: ", tok)
    454 	}
    455 	linkCtxt.LineHist.Update(histLine, file, line)
    456 	in.Stack.SetPos(line, file)
    457 }
    458 
    459 // #undef processing
    460 func (in *Input) undef() {
    461 	name := in.macroName()
    462 	if in.macros[name] == nil {
    463 		in.Error("#undef for undefined macro:", name)
    464 	}
    465 	// Newline must be next.
    466 	tok := in.Stack.Next()
    467 	if tok != '\n' {
    468 		in.Error("syntax error in #undef for macro:", name)
    469 	}
    470 	delete(in.macros, name)
    471 }
    472 
    473 func (in *Input) Push(r TokenReader) {
    474 	if len(in.tr) > 100 {
    475 		in.Error("input recursion")
    476 	}
    477 	in.Stack.Push(r)
    478 }
    479 
    480 func (in *Input) Close() {
    481 }
    482