Home | History | Annotate | Download | only in lex
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package lex
      6 
      7 import (
      8 	"fmt"
      9 	"os"
     10 	"path/filepath"
     11 	"strconv"
     12 	"strings"
     13 	"text/scanner"
     14 
     15 	"cmd/asm/internal/flags"
     16 )
     17 
     18 // Input is the main input: a stack of readers and some macro definitions.
     19 // It also handles #include processing (by pushing onto the input stack)
     20 // and parses and instantiates macro definitions.
     21 type Input struct {
     22 	Stack
     23 	includes        []string
     24 	beginningOfLine bool
     25 	ifdefStack      []bool
     26 	macros          map[string]*Macro
     27 	text            string // Text of last token returned by Next.
     28 	peek            bool
     29 	peekToken       ScanToken
     30 	peekText        string
     31 }
     32 
     33 // NewInput returns a
     34 func NewInput(name string) *Input {
     35 	return &Input{
     36 		// include directories: look in source dir, then -I directories.
     37 		includes:        append([]string{filepath.Dir(name)}, flags.I...),
     38 		beginningOfLine: true,
     39 		macros:          predefine(flags.D),
     40 	}
     41 }
     42 
     43 // predefine installs the macros set by the -D flag on the command line.
     44 func predefine(defines flags.MultiFlag) map[string]*Macro {
     45 	macros := make(map[string]*Macro)
     46 	for _, name := range defines {
     47 		value := "1"
     48 		i := strings.IndexRune(name, '=')
     49 		if i > 0 {
     50 			name, value = name[:i], name[i+1:]
     51 		}
     52 		tokens := Tokenize(name)
     53 		if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
     54 			fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
     55 			flags.Usage()
     56 		}
     57 		macros[name] = &Macro{
     58 			name:   name,
     59 			args:   nil,
     60 			tokens: Tokenize(value),
     61 		}
     62 	}
     63 	return macros
     64 }
     65 
     66 func (in *Input) Error(args ...interface{}) {
     67 	fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
     68 	os.Exit(1)
     69 }
     70 
     71 // expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token.
     72 func (in *Input) expectText(args ...interface{}) {
     73 	in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...)
     74 }
     75 
     76 // enabled reports whether the input is enabled by an ifdef, or is at the top level.
     77 func (in *Input) enabled() bool {
     78 	return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1]
     79 }
     80 
     81 func (in *Input) expectNewline(directive string) {
     82 	tok := in.Stack.Next()
     83 	if tok != '\n' {
     84 		in.expectText("expected newline after", directive)
     85 	}
     86 }
     87 
     88 func (in *Input) Next() ScanToken {
     89 	if in.peek {
     90 		in.peek = false
     91 		tok := in.peekToken
     92 		in.text = in.peekText
     93 		return tok
     94 	}
     95 	// If we cannot generate a token after 100 macro invocations, we're in trouble.
     96 	// The usual case is caught by Push, below, but be safe.
     97 	for nesting := 0; nesting < 100; {
     98 		tok := in.Stack.Next()
     99 		switch tok {
    100 		case '#':
    101 			if !in.beginningOfLine {
    102 				in.Error("'#' must be first item on line")
    103 			}
    104 			in.beginningOfLine = in.hash()
    105 		case scanner.Ident:
    106 			// Is it a macro name?
    107 			name := in.Stack.Text()
    108 			macro := in.macros[name]
    109 			if macro != nil {
    110 				nesting++
    111 				in.invokeMacro(macro)
    112 				continue
    113 			}
    114 			fallthrough
    115 		default:
    116 			in.beginningOfLine = tok == '\n'
    117 			if in.enabled() {
    118 				in.text = in.Stack.Text()
    119 				return tok
    120 			}
    121 		}
    122 	}
    123 	in.Error("recursive macro invocation")
    124 	return 0
    125 }
    126 
    127 func (in *Input) Text() string {
    128 	return in.text
    129 }
    130 
    131 // hash processes a # preprocessor directive. It returns true iff it completes.
    132 func (in *Input) hash() bool {
    133 	// We have a '#'; it must be followed by a known word (define, include, etc.).
    134 	tok := in.Stack.Next()
    135 	if tok != scanner.Ident {
    136 		in.expectText("expected identifier after '#'")
    137 	}
    138 	if !in.enabled() {
    139 		// Can only start including again if we are at #else or #endif.
    140 		// We let #line through because it might affect errors.
    141 		switch in.Stack.Text() {
    142 		case "else", "endif", "line":
    143 			// Press on.
    144 		default:
    145 			return false
    146 		}
    147 	}
    148 	switch in.Stack.Text() {
    149 	case "define":
    150 		in.define()
    151 	case "else":
    152 		in.else_()
    153 	case "endif":
    154 		in.endif()
    155 	case "ifdef":
    156 		in.ifdef(true)
    157 	case "ifndef":
    158 		in.ifdef(false)
    159 	case "include":
    160 		in.include()
    161 	case "line":
    162 		in.line()
    163 	case "undef":
    164 		in.undef()
    165 	default:
    166 		in.Error("unexpected token after '#':", in.Stack.Text())
    167 	}
    168 	return true
    169 }
    170 
    171 // macroName returns the name for the macro being referenced.
    172 func (in *Input) macroName() string {
    173 	// We use the Stack's input method; no macro processing at this stage.
    174 	tok := in.Stack.Next()
    175 	if tok != scanner.Ident {
    176 		in.expectText("expected identifier after # directive")
    177 	}
    178 	// Name is alphanumeric by definition.
    179 	return in.Stack.Text()
    180 }
    181 
    182 // #define processing.
    183 func (in *Input) define() {
    184 	name := in.macroName()
    185 	args, tokens := in.macroDefinition(name)
    186 	in.defineMacro(name, args, tokens)
    187 }
    188 
    189 // defineMacro stores the macro definition in the Input.
    190 func (in *Input) defineMacro(name string, args []string, tokens []Token) {
    191 	if in.macros[name] != nil {
    192 		in.Error("redefinition of macro:", name)
    193 	}
    194 	in.macros[name] = &Macro{
    195 		name:   name,
    196 		args:   args,
    197 		tokens: tokens,
    198 	}
    199 }
    200 
    201 // macroDefinition returns the list of formals and the tokens of the definition.
    202 // The argument list is nil for no parens on the definition; otherwise a list of
    203 // formal argument names.
    204 func (in *Input) macroDefinition(name string) ([]string, []Token) {
    205 	prevCol := in.Stack.Col()
    206 	tok := in.Stack.Next()
    207 	if tok == '\n' || tok == scanner.EOF {
    208 		return nil, nil // No definition for macro
    209 	}
    210 	var args []string
    211 	// The C preprocessor treats
    212 	//	#define A(x)
    213 	// and
    214 	//	#define A (x)
    215 	// distinctly: the first is a macro with arguments, the second without.
    216 	// Distinguish these cases using the column number, since we don't
    217 	// see the space itself. Note that text/scanner reports the position at the
    218 	// end of the token. It's where you are now, and you just read this token.
    219 	if tok == '(' && in.Stack.Col() == prevCol+1 {
    220 		// Macro has arguments. Scan list of formals.
    221 		acceptArg := true
    222 		args = []string{} // Zero length but not nil.
    223 	Loop:
    224 		for {
    225 			tok = in.Stack.Next()
    226 			switch tok {
    227 			case ')':
    228 				tok = in.Stack.Next() // First token of macro definition.
    229 				break Loop
    230 			case ',':
    231 				if acceptArg {
    232 					in.Error("bad syntax in definition for macro:", name)
    233 				}
    234 				acceptArg = true
    235 			case scanner.Ident:
    236 				if !acceptArg {
    237 					in.Error("bad syntax in definition for macro:", name)
    238 				}
    239 				arg := in.Stack.Text()
    240 				if i := lookup(args, arg); i >= 0 {
    241 					in.Error("duplicate argument", arg, "in definition for macro:", name)
    242 				}
    243 				args = append(args, arg)
    244 				acceptArg = false
    245 			default:
    246 				in.Error("bad definition for macro:", name)
    247 			}
    248 		}
    249 	}
    250 	var tokens []Token
    251 	// Scan to newline. Backslashes escape newlines.
    252 	for tok != '\n' {
    253 		if tok == '\\' {
    254 			tok = in.Stack.Next()
    255 			if tok != '\n' && tok != '\\' {
    256 				in.Error(`can only escape \ or \n in definition for macro:`, name)
    257 			}
    258 		}
    259 		tokens = append(tokens, Make(tok, in.Stack.Text()))
    260 		tok = in.Stack.Next()
    261 	}
    262 	return args, tokens
    263 }
    264 
    265 func lookup(args []string, arg string) int {
    266 	for i, a := range args {
    267 		if a == arg {
    268 			return i
    269 		}
    270 	}
    271 	return -1
    272 }
    273 
    274 // invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual
    275 // parameters substituted for the formals.
    276 // Invoking a macro does not touch the PC/line history.
    277 func (in *Input) invokeMacro(macro *Macro) {
    278 	// If the macro has no arguments, just substitute the text.
    279 	if macro.args == nil {
    280 		in.Push(NewSlice(in.File(), in.Line(), macro.tokens))
    281 		return
    282 	}
    283 	tok := in.Stack.Next()
    284 	if tok != '(' {
    285 		// If the macro has arguments but is invoked without them, all we push is the macro name.
    286 		// First, put back the token.
    287 		in.peekToken = tok
    288 		in.peekText = in.text
    289 		in.peek = true
    290 		in.Push(NewSlice(in.File(), in.Line(), []Token{Make(macroName, macro.name)}))
    291 		return
    292 	}
    293 	actuals := in.argsFor(macro)
    294 	var tokens []Token
    295 	for _, tok := range macro.tokens {
    296 		if tok.ScanToken != scanner.Ident {
    297 			tokens = append(tokens, tok)
    298 			continue
    299 		}
    300 		substitution := actuals[tok.text]
    301 		if substitution == nil {
    302 			tokens = append(tokens, tok)
    303 			continue
    304 		}
    305 		tokens = append(tokens, substitution...)
    306 	}
    307 	in.Push(NewSlice(in.File(), in.Line(), tokens))
    308 }
    309 
    310 // argsFor returns a map from formal name to actual value for this argumented macro invocation.
    311 // The opening parenthesis has been absorbed.
    312 func (in *Input) argsFor(macro *Macro) map[string][]Token {
    313 	var args [][]Token
    314 	// One macro argument per iteration. Collect them all and check counts afterwards.
    315 	for argNum := 0; ; argNum++ {
    316 		tokens, tok := in.collectArgument(macro)
    317 		args = append(args, tokens)
    318 		if tok == ')' {
    319 			break
    320 		}
    321 	}
    322 	// Zero-argument macros are tricky.
    323 	if len(macro.args) == 0 && len(args) == 1 && args[0] == nil {
    324 		args = nil
    325 	} else if len(args) != len(macro.args) {
    326 		in.Error("wrong arg count for macro", macro.name)
    327 	}
    328 	argMap := make(map[string][]Token)
    329 	for i, arg := range args {
    330 		argMap[macro.args[i]] = arg
    331 	}
    332 	return argMap
    333 }
    334 
    335 // collectArgument returns the actual tokens for a single argument of a macro.
    336 // It also returns the token that terminated the argument, which will always
    337 // be either ',' or ')'. The starting '(' has been scanned.
    338 func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) {
    339 	nesting := 0
    340 	var tokens []Token
    341 	for {
    342 		tok := in.Stack.Next()
    343 		if tok == scanner.EOF || tok == '\n' {
    344 			in.Error("unterminated arg list invoking macro:", macro.name)
    345 		}
    346 		if nesting == 0 && (tok == ')' || tok == ',') {
    347 			return tokens, tok
    348 		}
    349 		if tok == '(' {
    350 			nesting++
    351 		}
    352 		if tok == ')' {
    353 			nesting--
    354 		}
    355 		tokens = append(tokens, Make(tok, in.Stack.Text()))
    356 	}
    357 }
    358 
    359 // #ifdef and #ifndef processing.
    360 func (in *Input) ifdef(truth bool) {
    361 	name := in.macroName()
    362 	in.expectNewline("#if[n]def")
    363 	if _, defined := in.macros[name]; !defined {
    364 		truth = !truth
    365 	}
    366 	in.ifdefStack = append(in.ifdefStack, truth)
    367 }
    368 
    369 // #else processing
    370 func (in *Input) else_() {
    371 	in.expectNewline("#else")
    372 	if len(in.ifdefStack) == 0 {
    373 		in.Error("unmatched #else")
    374 	}
    375 	in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1]
    376 }
    377 
    378 // #endif processing.
    379 func (in *Input) endif() {
    380 	in.expectNewline("#endif")
    381 	if len(in.ifdefStack) == 0 {
    382 		in.Error("unmatched #endif")
    383 	}
    384 	in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1]
    385 }
    386 
    387 // #include processing.
    388 func (in *Input) include() {
    389 	// Find and parse string.
    390 	tok := in.Stack.Next()
    391 	if tok != scanner.String {
    392 		in.expectText("expected string after #include")
    393 	}
    394 	name, err := strconv.Unquote(in.Stack.Text())
    395 	if err != nil {
    396 		in.Error("unquoting include file name: ", err)
    397 	}
    398 	in.expectNewline("#include")
    399 	// Push tokenizer for file onto stack.
    400 	fd, err := os.Open(name)
    401 	if err != nil {
    402 		for _, dir := range in.includes {
    403 			fd, err = os.Open(filepath.Join(dir, name))
    404 			if err == nil {
    405 				break
    406 			}
    407 		}
    408 		if err != nil {
    409 			in.Error("#include:", err)
    410 		}
    411 	}
    412 	in.Push(NewTokenizer(name, fd, fd))
    413 }
    414 
    415 // #line processing.
    416 func (in *Input) line() {
    417 	// Only need to handle Plan 9 format: #line 337 "filename"
    418 	tok := in.Stack.Next()
    419 	if tok != scanner.Int {
    420 		in.expectText("expected line number after #line")
    421 	}
    422 	line, err := strconv.Atoi(in.Stack.Text())
    423 	if err != nil {
    424 		in.Error("error parsing #line (cannot happen):", err)
    425 	}
    426 	tok = in.Stack.Next()
    427 	if tok != scanner.String {
    428 		in.expectText("expected file name in #line")
    429 	}
    430 	file, err := strconv.Unquote(in.Stack.Text())
    431 	if err != nil {
    432 		in.Error("unquoting #line file name: ", err)
    433 	}
    434 	tok = in.Stack.Next()
    435 	if tok != '\n' {
    436 		in.Error("unexpected token at end of #line: ", tok)
    437 	}
    438 	linkCtxt.LineHist.Update(histLine, file, line)
    439 	in.Stack.SetPos(line, file)
    440 }
    441 
    442 // #undef processing
    443 func (in *Input) undef() {
    444 	name := in.macroName()
    445 	if in.macros[name] == nil {
    446 		in.Error("#undef for undefined macro:", name)
    447 	}
    448 	// Newline must be next.
    449 	tok := in.Stack.Next()
    450 	if tok != '\n' {
    451 		in.Error("syntax error in #undef for macro:", name)
    452 	}
    453 	delete(in.macros, name)
    454 }
    455 
    456 func (in *Input) Push(r TokenReader) {
    457 	if len(in.tr) > 100 {
    458 		in.Error("input recursion")
    459 	}
    460 	in.Stack.Push(r)
    461 }
    462 
    463 func (in *Input) Close() {
    464 }
    465