Home | History | Annotate | Download | only in parser
      1 // Copyright 2017 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package parser
     16 
     17 import (
     18 	"errors"
     19 	"fmt"
     20 	"io"
     21 	"sort"
     22 	"text/scanner"
     23 )
     24 
     25 var errTooManyErrors = errors.New("too many errors")
     26 
     27 const maxErrors = 100
     28 
     29 type ParseError struct {
     30 	Err error
     31 	Pos scanner.Position
     32 }
     33 
     34 func (e *ParseError) Error() string {
     35 	return fmt.Sprintf("%s: %s", e.Pos, e.Err)
     36 }
     37 
     38 func (p *parser) Parse() ([]Node, []error) {
     39 	defer func() {
     40 		if r := recover(); r != nil {
     41 			if r == errTooManyErrors {
     42 				return
     43 			}
     44 			panic(r)
     45 		}
     46 	}()
     47 
     48 	p.parseLines()
     49 	p.accept(scanner.EOF)
     50 	p.nodes = append(p.nodes, p.comments...)
     51 	sort.Sort(byPosition(p.nodes))
     52 
     53 	return p.nodes, p.errors
     54 }
     55 
     56 type parser struct {
     57 	scanner  scanner.Scanner
     58 	tok      rune
     59 	errors   []error
     60 	comments []Node
     61 	nodes    []Node
     62 	lines    []int
     63 }
     64 
     65 func NewParser(filename string, r io.Reader) *parser {
     66 	p := &parser{}
     67 	p.lines = []int{0}
     68 	p.scanner.Init(r)
     69 	p.scanner.Error = func(sc *scanner.Scanner, msg string) {
     70 		p.errorf(msg)
     71 	}
     72 	p.scanner.Whitespace = 0
     73 	p.scanner.IsIdentRune = func(ch rune, i int) bool {
     74 		return ch > 0 && ch != ':' && ch != '#' && ch != '=' && ch != '+' && ch != '$' &&
     75 			ch != '\\' && ch != '(' && ch != ')' && ch != '{' && ch != '}' && ch != ';' &&
     76 			ch != '|' && ch != '?' && ch != '\r' && !isWhitespace(ch)
     77 	}
     78 	p.scanner.Mode = scanner.ScanIdents
     79 	p.scanner.Filename = filename
     80 	p.next()
     81 	return p
     82 }
     83 
     84 func (p *parser) Unpack(pos Pos) scanner.Position {
     85 	offset := int(pos)
     86 	line := sort.Search(len(p.lines), func(i int) bool { return p.lines[i] > offset }) - 1
     87 	return scanner.Position{
     88 		Filename: p.scanner.Filename,
     89 		Line:     line + 1,
     90 		Column:   offset - p.lines[line] + 1,
     91 		Offset:   offset,
     92 	}
     93 }
     94 
     95 func (p *parser) pos() Pos {
     96 	pos := p.scanner.Position
     97 	if !pos.IsValid() {
     98 		pos = p.scanner.Pos()
     99 	}
    100 	return Pos(pos.Offset)
    101 }
    102 
    103 func (p *parser) errorf(format string, args ...interface{}) {
    104 	err := &ParseError{
    105 		Err: fmt.Errorf(format, args...),
    106 		Pos: p.scanner.Position,
    107 	}
    108 	p.errors = append(p.errors, err)
    109 	if len(p.errors) >= maxErrors {
    110 		panic(errTooManyErrors)
    111 	}
    112 }
    113 
    114 func (p *parser) accept(toks ...rune) bool {
    115 	for _, tok := range toks {
    116 		if p.tok != tok {
    117 			p.errorf("expected %s, found %s", scanner.TokenString(tok),
    118 				scanner.TokenString(p.tok))
    119 			return false
    120 		}
    121 		p.next()
    122 	}
    123 	return true
    124 }
    125 
    126 func (p *parser) next() {
    127 	if p.tok != scanner.EOF {
    128 		p.tok = p.scanner.Scan()
    129 		for p.tok == '\r' {
    130 			p.tok = p.scanner.Scan()
    131 		}
    132 	}
    133 	if p.tok == '\n' {
    134 		p.lines = append(p.lines, p.scanner.Position.Offset+1)
    135 	}
    136 }
    137 
    138 func (p *parser) parseLines() {
    139 	for {
    140 		p.ignoreWhitespace()
    141 
    142 		if p.parseDirective() {
    143 			continue
    144 		}
    145 
    146 		ident := p.parseExpression('=', '?', ':', '#', '\n')
    147 
    148 		p.ignoreSpaces()
    149 
    150 		switch p.tok {
    151 		case '?':
    152 			p.accept('?')
    153 			if p.tok == '=' {
    154 				p.parseAssignment("?=", nil, ident)
    155 			} else {
    156 				p.errorf("expected = after ?")
    157 			}
    158 		case '+':
    159 			p.accept('+')
    160 			if p.tok == '=' {
    161 				p.parseAssignment("+=", nil, ident)
    162 			} else {
    163 				p.errorf("expected = after +")
    164 			}
    165 		case ':':
    166 			p.accept(':')
    167 			switch p.tok {
    168 			case '=':
    169 				p.parseAssignment(":=", nil, ident)
    170 			default:
    171 				p.parseRule(ident)
    172 			}
    173 		case '=':
    174 			p.parseAssignment("=", nil, ident)
    175 		case '#', '\n', scanner.EOF:
    176 			ident.TrimRightSpaces()
    177 			if v, ok := toVariable(ident); ok {
    178 				p.nodes = append(p.nodes, &v)
    179 			} else if !ident.Empty() {
    180 				p.errorf("expected directive, rule, or assignment after ident " + ident.Dump())
    181 			}
    182 			switch p.tok {
    183 			case scanner.EOF:
    184 				return
    185 			case '\n':
    186 				p.accept('\n')
    187 			case '#':
    188 				p.parseComment()
    189 			}
    190 		default:
    191 			p.errorf("expected assignment or rule definition, found %s\n",
    192 				p.scanner.TokenText())
    193 			return
    194 		}
    195 	}
    196 }
    197 
    198 func (p *parser) parseDirective() bool {
    199 	if p.tok != scanner.Ident || !isDirective(p.scanner.TokenText()) {
    200 		return false
    201 	}
    202 
    203 	d := p.scanner.TokenText()
    204 	pos := p.pos()
    205 	p.accept(scanner.Ident)
    206 	endPos := NoPos
    207 
    208 	expression := SimpleMakeString("", pos)
    209 
    210 	switch d {
    211 	case "endif", "endef", "else":
    212 		// Nothing
    213 	case "define":
    214 		expression, endPos = p.parseDefine()
    215 	default:
    216 		p.ignoreSpaces()
    217 		expression = p.parseExpression()
    218 	}
    219 
    220 	p.nodes = append(p.nodes, &Directive{
    221 		NamePos: pos,
    222 		Name:    d,
    223 		Args:    expression,
    224 		EndPos:  endPos,
    225 	})
    226 	return true
    227 }
    228 
    229 func (p *parser) parseDefine() (*MakeString, Pos) {
    230 	value := SimpleMakeString("", p.pos())
    231 
    232 loop:
    233 	for {
    234 		switch p.tok {
    235 		case scanner.Ident:
    236 			value.appendString(p.scanner.TokenText())
    237 			if p.scanner.TokenText() == "endef" {
    238 				p.accept(scanner.Ident)
    239 				break loop
    240 			}
    241 			p.accept(scanner.Ident)
    242 		case '\\':
    243 			p.parseEscape()
    244 			switch p.tok {
    245 			case '\n':
    246 				value.appendString(" ")
    247 			case scanner.EOF:
    248 				p.errorf("expected escaped character, found %s",
    249 					scanner.TokenString(p.tok))
    250 				break loop
    251 			default:
    252 				value.appendString(`\` + string(p.tok))
    253 			}
    254 			p.accept(p.tok)
    255 		//TODO: handle variables inside defines?  result depends if
    256 		//define is used in make or rule context
    257 		//case '$':
    258 		//	variable := p.parseVariable()
    259 		//	value.appendVariable(variable)
    260 		case scanner.EOF:
    261 			p.errorf("unexpected EOF while looking for endef")
    262 			break loop
    263 		default:
    264 			value.appendString(p.scanner.TokenText())
    265 			p.accept(p.tok)
    266 		}
    267 	}
    268 
    269 	return value, p.pos()
    270 }
    271 
    272 func (p *parser) parseEscape() {
    273 	p.scanner.Mode = 0
    274 	p.accept('\\')
    275 	p.scanner.Mode = scanner.ScanIdents
    276 }
    277 
    278 func (p *parser) parseExpression(end ...rune) *MakeString {
    279 	value := SimpleMakeString("", p.pos())
    280 
    281 	endParen := false
    282 	for _, r := range end {
    283 		if r == ')' {
    284 			endParen = true
    285 		}
    286 	}
    287 	parens := 0
    288 
    289 loop:
    290 	for {
    291 		if endParen && parens > 0 && p.tok == ')' {
    292 			parens--
    293 			value.appendString(")")
    294 			p.accept(')')
    295 			continue
    296 		}
    297 
    298 		for _, r := range end {
    299 			if p.tok == r {
    300 				break loop
    301 			}
    302 		}
    303 
    304 		switch p.tok {
    305 		case '\n':
    306 			break loop
    307 		case scanner.Ident:
    308 			value.appendString(p.scanner.TokenText())
    309 			p.accept(scanner.Ident)
    310 		case '\\':
    311 			p.parseEscape()
    312 			switch p.tok {
    313 			case '\n':
    314 				value.appendString(" ")
    315 			case scanner.EOF:
    316 				p.errorf("expected escaped character, found %s",
    317 					scanner.TokenString(p.tok))
    318 				return value
    319 			default:
    320 				value.appendString(`\` + string(p.tok))
    321 			}
    322 			p.accept(p.tok)
    323 		case '#':
    324 			p.parseComment()
    325 			break loop
    326 		case '$':
    327 			var variable Variable
    328 			variable = p.parseVariable()
    329 			value.appendVariable(variable)
    330 		case scanner.EOF:
    331 			break loop
    332 		case '(':
    333 			if endParen {
    334 				parens++
    335 			}
    336 			value.appendString("(")
    337 			p.accept('(')
    338 		default:
    339 			value.appendString(p.scanner.TokenText())
    340 			p.accept(p.tok)
    341 		}
    342 	}
    343 
    344 	if parens > 0 {
    345 		p.errorf("expected closing paren %s", value.Dump())
    346 	}
    347 	return value
    348 }
    349 
    350 func (p *parser) parseVariable() Variable {
    351 	pos := p.pos()
    352 	p.accept('$')
    353 	var name *MakeString
    354 	switch p.tok {
    355 	case '(':
    356 		return p.parseBracketedVariable('(', ')', pos)
    357 	case '{':
    358 		return p.parseBracketedVariable('{', '}', pos)
    359 	case '$':
    360 		name = SimpleMakeString("__builtin_dollar", NoPos)
    361 	case scanner.EOF:
    362 		p.errorf("expected variable name, found %s",
    363 			scanner.TokenString(p.tok))
    364 	default:
    365 		name = p.parseExpression(variableNameEndRunes...)
    366 	}
    367 
    368 	return p.nameToVariable(name)
    369 }
    370 
    371 func (p *parser) parseBracketedVariable(start, end rune, pos Pos) Variable {
    372 	p.accept(start)
    373 	name := p.parseExpression(end)
    374 	p.accept(end)
    375 	return p.nameToVariable(name)
    376 }
    377 
    378 func (p *parser) nameToVariable(name *MakeString) Variable {
    379 	return Variable{
    380 		Name: name,
    381 	}
    382 }
    383 
    384 func (p *parser) parseRule(target *MakeString) {
    385 	prerequisites, newLine := p.parseRulePrerequisites(target)
    386 
    387 	recipe := ""
    388 	recipePos := p.pos()
    389 loop:
    390 	for {
    391 		if newLine {
    392 			if p.tok == '\t' {
    393 				p.accept('\t')
    394 				newLine = false
    395 				continue loop
    396 			} else if p.parseDirective() {
    397 				newLine = false
    398 				continue
    399 			} else {
    400 				break loop
    401 			}
    402 		}
    403 
    404 		newLine = false
    405 		switch p.tok {
    406 		case '\\':
    407 			p.parseEscape()
    408 			recipe += string(p.tok)
    409 			p.accept(p.tok)
    410 		case '\n':
    411 			newLine = true
    412 			recipe += "\n"
    413 			p.accept('\n')
    414 		case scanner.EOF:
    415 			break loop
    416 		default:
    417 			recipe += p.scanner.TokenText()
    418 			p.accept(p.tok)
    419 		}
    420 	}
    421 
    422 	if prerequisites != nil {
    423 		p.nodes = append(p.nodes, &Rule{
    424 			Target:        target,
    425 			Prerequisites: prerequisites,
    426 			Recipe:        recipe,
    427 			RecipePos:     recipePos,
    428 		})
    429 	}
    430 }
    431 
    432 func (p *parser) parseRulePrerequisites(target *MakeString) (*MakeString, bool) {
    433 	newLine := false
    434 
    435 	p.ignoreSpaces()
    436 
    437 	prerequisites := p.parseExpression('#', '\n', ';', ':', '=')
    438 
    439 	switch p.tok {
    440 	case '\n':
    441 		p.accept('\n')
    442 		newLine = true
    443 	case '#':
    444 		p.parseComment()
    445 		newLine = true
    446 	case ';':
    447 		p.accept(';')
    448 	case ':':
    449 		p.accept(':')
    450 		if p.tok == '=' {
    451 			p.parseAssignment(":=", target, prerequisites)
    452 			return nil, true
    453 		} else {
    454 			more := p.parseExpression('#', '\n', ';')
    455 			prerequisites.appendMakeString(more)
    456 		}
    457 	case '=':
    458 		p.parseAssignment("=", target, prerequisites)
    459 		return nil, true
    460 	default:
    461 		p.errorf("unexpected token %s after rule prerequisites", scanner.TokenString(p.tok))
    462 	}
    463 
    464 	return prerequisites, newLine
    465 }
    466 
    467 func (p *parser) parseComment() {
    468 	pos := p.pos()
    469 	p.accept('#')
    470 	comment := ""
    471 loop:
    472 	for {
    473 		switch p.tok {
    474 		case '\\':
    475 			p.parseEscape()
    476 			if p.tok == '\n' {
    477 				comment += "\n"
    478 			} else {
    479 				comment += "\\" + p.scanner.TokenText()
    480 			}
    481 			p.accept(p.tok)
    482 		case '\n':
    483 			p.accept('\n')
    484 			break loop
    485 		case scanner.EOF:
    486 			break loop
    487 		default:
    488 			comment += p.scanner.TokenText()
    489 			p.accept(p.tok)
    490 		}
    491 	}
    492 
    493 	p.comments = append(p.comments, &Comment{
    494 		CommentPos: pos,
    495 		Comment:    comment,
    496 	})
    497 }
    498 
    499 func (p *parser) parseAssignment(t string, target *MakeString, ident *MakeString) {
    500 	// The value of an assignment is everything including and after the first
    501 	// non-whitespace character after the = until the end of the logical line,
    502 	// which may included escaped newlines
    503 	p.accept('=')
    504 	value := p.parseExpression()
    505 	value.TrimLeftSpaces()
    506 	if ident.EndsWith('+') && t == "=" {
    507 		ident.TrimRightOne()
    508 		t = "+="
    509 	}
    510 
    511 	ident.TrimRightSpaces()
    512 
    513 	p.nodes = append(p.nodes, &Assignment{
    514 		Name:   ident,
    515 		Value:  value,
    516 		Target: target,
    517 		Type:   t,
    518 	})
    519 }
    520 
    521 type androidMkModule struct {
    522 	assignments map[string]string
    523 }
    524 
    525 type androidMkFile struct {
    526 	assignments map[string]string
    527 	modules     []androidMkModule
    528 	includes    []string
    529 }
    530 
    531 var directives = [...]string{
    532 	"define",
    533 	"else",
    534 	"endef",
    535 	"endif",
    536 	"ifdef",
    537 	"ifeq",
    538 	"ifndef",
    539 	"ifneq",
    540 	"include",
    541 	"-include",
    542 }
    543 
    544 var functions = [...]string{
    545 	"abspath",
    546 	"addprefix",
    547 	"addsuffix",
    548 	"basename",
    549 	"dir",
    550 	"notdir",
    551 	"subst",
    552 	"suffix",
    553 	"filter",
    554 	"filter-out",
    555 	"findstring",
    556 	"firstword",
    557 	"flavor",
    558 	"join",
    559 	"lastword",
    560 	"patsubst",
    561 	"realpath",
    562 	"shell",
    563 	"sort",
    564 	"strip",
    565 	"wildcard",
    566 	"word",
    567 	"wordlist",
    568 	"words",
    569 	"origin",
    570 	"foreach",
    571 	"call",
    572 	"info",
    573 	"error",
    574 	"warning",
    575 	"if",
    576 	"or",
    577 	"and",
    578 	"value",
    579 	"eval",
    580 	"file",
    581 }
    582 
    583 func init() {
    584 	sort.Strings(directives[:])
    585 	sort.Strings(functions[:])
    586 }
    587 
    588 func isDirective(s string) bool {
    589 	for _, d := range directives {
    590 		if s == d {
    591 			return true
    592 		} else if s < d {
    593 			return false
    594 		}
    595 	}
    596 	return false
    597 }
    598 
    599 func isFunctionName(s string) bool {
    600 	for _, f := range functions {
    601 		if s == f {
    602 			return true
    603 		} else if s < f {
    604 			return false
    605 		}
    606 	}
    607 	return false
    608 }
    609 
    610 func isWhitespace(ch rune) bool {
    611 	return ch == ' ' || ch == '\t' || ch == '\n'
    612 }
    613 
    614 func isValidVariableRune(ch rune) bool {
    615 	return ch != scanner.Ident && ch != ':' && ch != '=' && ch != '#'
    616 }
    617 
    618 var whitespaceRunes = []rune{' ', '\t', '\n'}
    619 var variableNameEndRunes = append([]rune{':', '=', '#', ')', '}'}, whitespaceRunes...)
    620 
    621 func (p *parser) ignoreSpaces() int {
    622 	skipped := 0
    623 	for p.tok == ' ' || p.tok == '\t' {
    624 		p.accept(p.tok)
    625 		skipped++
    626 	}
    627 	return skipped
    628 }
    629 
    630 func (p *parser) ignoreWhitespace() {
    631 	for isWhitespace(p.tok) {
    632 		p.accept(p.tok)
    633 	}
    634 }
    635