Home | History | Annotate | Download | only in parser
      1 // Copyright 2017 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package parser
     16 
     17 import (
     18 	"errors"
     19 	"fmt"
     20 	"io"
     21 	"sort"
     22 	"text/scanner"
     23 )
     24 
     25 var errTooManyErrors = errors.New("too many errors")
     26 
     27 const maxErrors = 100
     28 
     29 type ParseError struct {
     30 	Err error
     31 	Pos scanner.Position
     32 }
     33 
     34 func (e *ParseError) Error() string {
     35 	return fmt.Sprintf("%s: %s", e.Pos, e.Err)
     36 }
     37 
     38 const builtinDollar = "__builtin_dollar"
     39 
     40 var builtinDollarName = SimpleMakeString(builtinDollar, NoPos)
     41 
     42 func (p *parser) Parse() ([]Node, []error) {
     43 	defer func() {
     44 		if r := recover(); r != nil {
     45 			if r == errTooManyErrors {
     46 				return
     47 			}
     48 			panic(r)
     49 		}
     50 	}()
     51 
     52 	p.parseLines()
     53 	p.accept(scanner.EOF)
     54 	p.nodes = append(p.nodes, p.comments...)
     55 	sort.Sort(byPosition(p.nodes))
     56 
     57 	return p.nodes, p.errors
     58 }
     59 
     60 type parser struct {
     61 	scanner  scanner.Scanner
     62 	tok      rune
     63 	errors   []error
     64 	comments []Node
     65 	nodes    []Node
     66 	lines    []int
     67 }
     68 
     69 func NewParser(filename string, r io.Reader) *parser {
     70 	p := &parser{}
     71 	p.lines = []int{0}
     72 	p.scanner.Init(r)
     73 	p.scanner.Error = func(sc *scanner.Scanner, msg string) {
     74 		p.errorf(msg)
     75 	}
     76 	p.scanner.Whitespace = 0
     77 	p.scanner.IsIdentRune = func(ch rune, i int) bool {
     78 		return ch > 0 && ch != ':' && ch != '#' && ch != '=' && ch != '+' && ch != '$' &&
     79 			ch != '\\' && ch != '(' && ch != ')' && ch != '{' && ch != '}' && ch != ';' &&
     80 			ch != '|' && ch != '?' && ch != '\r' && !isWhitespace(ch)
     81 	}
     82 	p.scanner.Mode = scanner.ScanIdents
     83 	p.scanner.Filename = filename
     84 	p.next()
     85 	return p
     86 }
     87 
     88 func (p *parser) Unpack(pos Pos) scanner.Position {
     89 	offset := int(pos)
     90 	line := sort.Search(len(p.lines), func(i int) bool { return p.lines[i] > offset }) - 1
     91 	return scanner.Position{
     92 		Filename: p.scanner.Filename,
     93 		Line:     line + 1,
     94 		Column:   offset - p.lines[line] + 1,
     95 		Offset:   offset,
     96 	}
     97 }
     98 
     99 func (p *parser) pos() Pos {
    100 	pos := p.scanner.Position
    101 	if !pos.IsValid() {
    102 		pos = p.scanner.Pos()
    103 	}
    104 	return Pos(pos.Offset)
    105 }
    106 
    107 func (p *parser) errorf(format string, args ...interface{}) {
    108 	err := &ParseError{
    109 		Err: fmt.Errorf(format, args...),
    110 		Pos: p.scanner.Position,
    111 	}
    112 	p.errors = append(p.errors, err)
    113 	if len(p.errors) >= maxErrors {
    114 		panic(errTooManyErrors)
    115 	}
    116 }
    117 
    118 func (p *parser) accept(toks ...rune) bool {
    119 	for _, tok := range toks {
    120 		if p.tok != tok {
    121 			p.errorf("expected %s, found %s", scanner.TokenString(tok),
    122 				scanner.TokenString(p.tok))
    123 			return false
    124 		}
    125 		p.next()
    126 	}
    127 	return true
    128 }
    129 
    130 func (p *parser) next() {
    131 	if p.tok != scanner.EOF {
    132 		p.tok = p.scanner.Scan()
    133 		for p.tok == '\r' {
    134 			p.tok = p.scanner.Scan()
    135 		}
    136 	}
    137 	if p.tok == '\n' {
    138 		p.lines = append(p.lines, p.scanner.Position.Offset+1)
    139 	}
    140 }
    141 
    142 func (p *parser) parseLines() {
    143 	for {
    144 		p.ignoreWhitespace()
    145 
    146 		if p.parseDirective() {
    147 			continue
    148 		}
    149 
    150 		ident := p.parseExpression('=', '?', ':', '#', '\n')
    151 
    152 		p.ignoreSpaces()
    153 
    154 		switch p.tok {
    155 		case '?':
    156 			p.accept('?')
    157 			if p.tok == '=' {
    158 				p.parseAssignment("?=", nil, ident)
    159 			} else {
    160 				p.errorf("expected = after ?")
    161 			}
    162 		case '+':
    163 			p.accept('+')
    164 			if p.tok == '=' {
    165 				p.parseAssignment("+=", nil, ident)
    166 			} else {
    167 				p.errorf("expected = after +")
    168 			}
    169 		case ':':
    170 			p.accept(':')
    171 			switch p.tok {
    172 			case '=':
    173 				p.parseAssignment(":=", nil, ident)
    174 			default:
    175 				p.parseRule(ident)
    176 			}
    177 		case '=':
    178 			p.parseAssignment("=", nil, ident)
    179 		case '#', '\n', scanner.EOF:
    180 			ident.TrimRightSpaces()
    181 			if v, ok := toVariable(ident); ok {
    182 				p.nodes = append(p.nodes, &v)
    183 			} else if !ident.Empty() {
    184 				p.errorf("expected directive, rule, or assignment after ident " + ident.Dump())
    185 			}
    186 			switch p.tok {
    187 			case scanner.EOF:
    188 				return
    189 			case '\n':
    190 				p.accept('\n')
    191 			case '#':
    192 				p.parseComment()
    193 			}
    194 		default:
    195 			p.errorf("expected assignment or rule definition, found %s\n",
    196 				p.scanner.TokenText())
    197 			return
    198 		}
    199 	}
    200 }
    201 
    202 func (p *parser) parseDirective() bool {
    203 	if p.tok != scanner.Ident || !isDirective(p.scanner.TokenText()) {
    204 		return false
    205 	}
    206 
    207 	d := p.scanner.TokenText()
    208 	pos := p.pos()
    209 	p.accept(scanner.Ident)
    210 	endPos := NoPos
    211 
    212 	expression := SimpleMakeString("", pos)
    213 
    214 	switch d {
    215 	case "endif", "endef", "else":
    216 		// Nothing
    217 	case "define":
    218 		expression, endPos = p.parseDefine()
    219 	default:
    220 		p.ignoreSpaces()
    221 		expression = p.parseExpression()
    222 	}
    223 
    224 	p.nodes = append(p.nodes, &Directive{
    225 		NamePos: pos,
    226 		Name:    d,
    227 		Args:    expression,
    228 		EndPos:  endPos,
    229 	})
    230 	return true
    231 }
    232 
    233 func (p *parser) parseDefine() (*MakeString, Pos) {
    234 	value := SimpleMakeString("", p.pos())
    235 
    236 loop:
    237 	for {
    238 		switch p.tok {
    239 		case scanner.Ident:
    240 			value.appendString(p.scanner.TokenText())
    241 			if p.scanner.TokenText() == "endef" {
    242 				p.accept(scanner.Ident)
    243 				break loop
    244 			}
    245 			p.accept(scanner.Ident)
    246 		case '\\':
    247 			p.parseEscape()
    248 			switch p.tok {
    249 			case '\n':
    250 				value.appendString(" ")
    251 			case scanner.EOF:
    252 				p.errorf("expected escaped character, found %s",
    253 					scanner.TokenString(p.tok))
    254 				break loop
    255 			default:
    256 				value.appendString(`\` + string(p.tok))
    257 			}
    258 			p.accept(p.tok)
    259 		//TODO: handle variables inside defines?  result depends if
    260 		//define is used in make or rule context
    261 		//case '$':
    262 		//	variable := p.parseVariable()
    263 		//	value.appendVariable(variable)
    264 		case scanner.EOF:
    265 			p.errorf("unexpected EOF while looking for endef")
    266 			break loop
    267 		default:
    268 			value.appendString(p.scanner.TokenText())
    269 			p.accept(p.tok)
    270 		}
    271 	}
    272 
    273 	return value, p.pos()
    274 }
    275 
    276 func (p *parser) parseEscape() {
    277 	p.scanner.Mode = 0
    278 	p.accept('\\')
    279 	p.scanner.Mode = scanner.ScanIdents
    280 }
    281 
    282 func (p *parser) parseExpression(end ...rune) *MakeString {
    283 	value := SimpleMakeString("", p.pos())
    284 
    285 	endParen := false
    286 	for _, r := range end {
    287 		if r == ')' {
    288 			endParen = true
    289 		}
    290 	}
    291 	parens := 0
    292 
    293 loop:
    294 	for {
    295 		if endParen && parens > 0 && p.tok == ')' {
    296 			parens--
    297 			value.appendString(")")
    298 			p.accept(')')
    299 			continue
    300 		}
    301 
    302 		for _, r := range end {
    303 			if p.tok == r {
    304 				break loop
    305 			}
    306 		}
    307 
    308 		switch p.tok {
    309 		case '\n':
    310 			break loop
    311 		case scanner.Ident:
    312 			value.appendString(p.scanner.TokenText())
    313 			p.accept(scanner.Ident)
    314 		case '\\':
    315 			p.parseEscape()
    316 			switch p.tok {
    317 			case '\n':
    318 				value.appendString(" ")
    319 			case scanner.EOF:
    320 				p.errorf("expected escaped character, found %s",
    321 					scanner.TokenString(p.tok))
    322 				return value
    323 			default:
    324 				value.appendString(`\` + string(p.tok))
    325 			}
    326 			p.accept(p.tok)
    327 		case '#':
    328 			p.parseComment()
    329 			break loop
    330 		case '$':
    331 			var variable Variable
    332 			variable = p.parseVariable()
    333 			if variable.Name == builtinDollarName {
    334 				value.appendString("$")
    335 			} else {
    336 				value.appendVariable(variable)
    337 			}
    338 		case scanner.EOF:
    339 			break loop
    340 		case '(':
    341 			if endParen {
    342 				parens++
    343 			}
    344 			value.appendString("(")
    345 			p.accept('(')
    346 		default:
    347 			value.appendString(p.scanner.TokenText())
    348 			p.accept(p.tok)
    349 		}
    350 	}
    351 
    352 	if parens > 0 {
    353 		p.errorf("expected closing paren %s", value.Dump())
    354 	}
    355 	return value
    356 }
    357 
    358 func (p *parser) parseVariable() Variable {
    359 	pos := p.pos()
    360 	p.accept('$')
    361 	var name *MakeString
    362 	switch p.tok {
    363 	case '(':
    364 		return p.parseBracketedVariable('(', ')', pos)
    365 	case '{':
    366 		return p.parseBracketedVariable('{', '}', pos)
    367 	case '$':
    368 		name = builtinDollarName
    369 		p.accept(p.tok)
    370 	case scanner.EOF:
    371 		p.errorf("expected variable name, found %s",
    372 			scanner.TokenString(p.tok))
    373 	default:
    374 		name = p.parseExpression(variableNameEndRunes...)
    375 	}
    376 
    377 	return p.nameToVariable(name)
    378 }
    379 
    380 func (p *parser) parseBracketedVariable(start, end rune, pos Pos) Variable {
    381 	p.accept(start)
    382 	name := p.parseExpression(end)
    383 	p.accept(end)
    384 	return p.nameToVariable(name)
    385 }
    386 
    387 func (p *parser) nameToVariable(name *MakeString) Variable {
    388 	return Variable{
    389 		Name: name,
    390 	}
    391 }
    392 
    393 func (p *parser) parseRule(target *MakeString) {
    394 	prerequisites, newLine := p.parseRulePrerequisites(target)
    395 
    396 	recipe := ""
    397 	recipePos := p.pos()
    398 loop:
    399 	for {
    400 		if newLine {
    401 			if p.tok == '\t' {
    402 				p.accept('\t')
    403 				newLine = false
    404 				continue loop
    405 			} else if p.parseDirective() {
    406 				newLine = false
    407 				continue
    408 			} else {
    409 				break loop
    410 			}
    411 		}
    412 
    413 		newLine = false
    414 		switch p.tok {
    415 		case '\\':
    416 			p.parseEscape()
    417 			recipe += string(p.tok)
    418 			p.accept(p.tok)
    419 		case '\n':
    420 			newLine = true
    421 			recipe += "\n"
    422 			p.accept('\n')
    423 		case scanner.EOF:
    424 			break loop
    425 		default:
    426 			recipe += p.scanner.TokenText()
    427 			p.accept(p.tok)
    428 		}
    429 	}
    430 
    431 	if prerequisites != nil {
    432 		p.nodes = append(p.nodes, &Rule{
    433 			Target:        target,
    434 			Prerequisites: prerequisites,
    435 			Recipe:        recipe,
    436 			RecipePos:     recipePos,
    437 		})
    438 	}
    439 }
    440 
    441 func (p *parser) parseRulePrerequisites(target *MakeString) (*MakeString, bool) {
    442 	newLine := false
    443 
    444 	p.ignoreSpaces()
    445 
    446 	prerequisites := p.parseExpression('#', '\n', ';', ':', '=')
    447 
    448 	switch p.tok {
    449 	case '\n':
    450 		p.accept('\n')
    451 		newLine = true
    452 	case '#':
    453 		p.parseComment()
    454 		newLine = true
    455 	case ';':
    456 		p.accept(';')
    457 	case ':':
    458 		p.accept(':')
    459 		if p.tok == '=' {
    460 			p.parseAssignment(":=", target, prerequisites)
    461 			return nil, true
    462 		} else {
    463 			more := p.parseExpression('#', '\n', ';')
    464 			prerequisites.appendMakeString(more)
    465 		}
    466 	case '=':
    467 		p.parseAssignment("=", target, prerequisites)
    468 		return nil, true
    469 	case scanner.EOF:
    470 		// do nothing
    471 	default:
    472 		p.errorf("unexpected token %s after rule prerequisites", scanner.TokenString(p.tok))
    473 	}
    474 
    475 	return prerequisites, newLine
    476 }
    477 
    478 func (p *parser) parseComment() {
    479 	pos := p.pos()
    480 	p.accept('#')
    481 	comment := ""
    482 loop:
    483 	for {
    484 		switch p.tok {
    485 		case '\\':
    486 			p.parseEscape()
    487 			if p.tok == '\n' {
    488 				// Special case: '\' does not "escape" newline in comment (b/127521510)
    489 				comment += "\\"
    490 				p.accept(p.tok)
    491 				break loop
    492 			}
    493 			comment += "\\" + p.scanner.TokenText()
    494 			p.accept(p.tok)
    495 		case '\n':
    496 			p.accept('\n')
    497 			break loop
    498 		case scanner.EOF:
    499 			break loop
    500 		default:
    501 			comment += p.scanner.TokenText()
    502 			p.accept(p.tok)
    503 		}
    504 	}
    505 
    506 	p.comments = append(p.comments, &Comment{
    507 		CommentPos: pos,
    508 		Comment:    comment,
    509 	})
    510 }
    511 
    512 func (p *parser) parseAssignment(t string, target *MakeString, ident *MakeString) {
    513 	// The value of an assignment is everything including and after the first
    514 	// non-whitespace character after the = until the end of the logical line,
    515 	// which may included escaped newlines
    516 	p.accept('=')
    517 	value := p.parseExpression()
    518 	value.TrimLeftSpaces()
    519 	if ident.EndsWith('+') && t == "=" {
    520 		ident.TrimRightOne()
    521 		t = "+="
    522 	}
    523 
    524 	ident.TrimRightSpaces()
    525 
    526 	p.nodes = append(p.nodes, &Assignment{
    527 		Name:   ident,
    528 		Value:  value,
    529 		Target: target,
    530 		Type:   t,
    531 	})
    532 }
    533 
    534 type androidMkModule struct {
    535 	assignments map[string]string
    536 }
    537 
    538 type androidMkFile struct {
    539 	assignments map[string]string
    540 	modules     []androidMkModule
    541 	includes    []string
    542 }
    543 
    544 var directives = [...]string{
    545 	"define",
    546 	"else",
    547 	"endef",
    548 	"endif",
    549 	"ifdef",
    550 	"ifeq",
    551 	"ifndef",
    552 	"ifneq",
    553 	"include",
    554 	"-include",
    555 }
    556 
    557 var functions = [...]string{
    558 	"abspath",
    559 	"addprefix",
    560 	"addsuffix",
    561 	"basename",
    562 	"dir",
    563 	"notdir",
    564 	"subst",
    565 	"suffix",
    566 	"filter",
    567 	"filter-out",
    568 	"findstring",
    569 	"firstword",
    570 	"flavor",
    571 	"join",
    572 	"lastword",
    573 	"patsubst",
    574 	"realpath",
    575 	"shell",
    576 	"sort",
    577 	"strip",
    578 	"wildcard",
    579 	"word",
    580 	"wordlist",
    581 	"words",
    582 	"origin",
    583 	"foreach",
    584 	"call",
    585 	"info",
    586 	"error",
    587 	"warning",
    588 	"if",
    589 	"or",
    590 	"and",
    591 	"value",
    592 	"eval",
    593 	"file",
    594 }
    595 
    596 func init() {
    597 	sort.Strings(directives[:])
    598 	sort.Strings(functions[:])
    599 }
    600 
    601 func isDirective(s string) bool {
    602 	for _, d := range directives {
    603 		if s == d {
    604 			return true
    605 		} else if s < d {
    606 			return false
    607 		}
    608 	}
    609 	return false
    610 }
    611 
    612 func isFunctionName(s string) bool {
    613 	for _, f := range functions {
    614 		if s == f {
    615 			return true
    616 		} else if s < f {
    617 			return false
    618 		}
    619 	}
    620 	return false
    621 }
    622 
    623 func isWhitespace(ch rune) bool {
    624 	return ch == ' ' || ch == '\t' || ch == '\n'
    625 }
    626 
    627 func isValidVariableRune(ch rune) bool {
    628 	return ch != scanner.Ident && ch != ':' && ch != '=' && ch != '#'
    629 }
    630 
    631 var whitespaceRunes = []rune{' ', '\t', '\n'}
    632 var variableNameEndRunes = append([]rune{':', '=', '#', ')', '}'}, whitespaceRunes...)
    633 
    634 func (p *parser) ignoreSpaces() int {
    635 	skipped := 0
    636 	for p.tok == ' ' || p.tok == '\t' {
    637 		p.accept(p.tok)
    638 		skipped++
    639 	}
    640 	return skipped
    641 }
    642 
    643 func (p *parser) ignoreWhitespace() {
    644 	for isWhitespace(p.tok) {
    645 		p.accept(p.tok)
    646 	}
    647 }
    648