Home | History | Annotate | Download | only in asm
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package asm implements the parser and instruction generator for the assembler.
      6 // TODO: Split apart?
      7 package asm
      8 
      9 import (
     10 	"fmt"
     11 	"io"
     12 	"log"
     13 	"os"
     14 	"strconv"
     15 	"text/scanner"
     16 	"unicode/utf8"
     17 
     18 	"cmd/asm/internal/arch"
     19 	"cmd/asm/internal/flags"
     20 	"cmd/asm/internal/lex"
     21 	"cmd/internal/obj"
     22 	"cmd/internal/sys"
     23 )
     24 
     25 type Parser struct {
     26 	lex           lex.TokenReader
     27 	lineNum       int   // Line number in source file.
     28 	histLineNum   int32 // Cumulative line number across source files.
     29 	errorLine     int32 // (Cumulative) line number of last error.
     30 	errorCount    int   // Number of errors.
     31 	pc            int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
     32 	input         []lex.Token
     33 	inputPos      int
     34 	pendingLabels []string // Labels to attach to next instruction.
     35 	labels        map[string]*obj.Prog
     36 	toPatch       []Patch
     37 	addr          []obj.Addr
     38 	arch          *arch.Arch
     39 	ctxt          *obj.Link
     40 	firstProg     *obj.Prog
     41 	lastProg      *obj.Prog
     42 	dataAddr      map[string]int64 // Most recent address for DATA for this symbol.
     43 	isJump        bool             // Instruction being assembled is a jump.
     44 	errorWriter   io.Writer
     45 }
     46 
     47 type Patch struct {
     48 	prog  *obj.Prog
     49 	label string
     50 }
     51 
     52 func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser {
     53 	return &Parser{
     54 		ctxt:        ctxt,
     55 		arch:        ar,
     56 		lex:         lexer,
     57 		labels:      make(map[string]*obj.Prog),
     58 		dataAddr:    make(map[string]int64),
     59 		errorWriter: os.Stderr,
     60 	}
     61 }
     62 
     63 // panicOnError is enable when testing to abort execution on the first error
     64 // and turn it into a recoverable panic.
     65 var panicOnError bool
     66 
     67 func (p *Parser) errorf(format string, args ...interface{}) {
     68 	if panicOnError {
     69 		panic(fmt.Errorf(format, args...))
     70 	}
     71 	if p.histLineNum == p.errorLine {
     72 		// Only one error per line.
     73 		return
     74 	}
     75 	p.errorLine = p.histLineNum
     76 	if p.lex != nil {
     77 		// Put file and line information on head of message.
     78 		format = "%s:%d: " + format + "\n"
     79 		args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
     80 	}
     81 	fmt.Fprintf(p.errorWriter, format, args...)
     82 	p.errorCount++
     83 	if p.errorCount > 10 && !*flags.AllErrors {
     84 		log.Fatal("too many errors")
     85 	}
     86 }
     87 
     88 func (p *Parser) Parse() (*obj.Prog, bool) {
     89 	for p.line() {
     90 	}
     91 	if p.errorCount > 0 {
     92 		return nil, false
     93 	}
     94 	p.patch()
     95 	return p.firstProg, true
     96 }
     97 
     98 // WORD [ arg {, arg} ] (';' | '\n')
     99 func (p *Parser) line() bool {
    100 	// Skip newlines.
    101 	var tok lex.ScanToken
    102 	for {
    103 		tok = p.lex.Next()
    104 		// We save the line number here so error messages from this instruction
    105 		// are labeled with this line. Otherwise we complain after we've absorbed
    106 		// the terminating newline and the line numbers are off by one in errors.
    107 		p.lineNum = p.lex.Line()
    108 		p.histLineNum = lex.HistLine()
    109 		switch tok {
    110 		case '\n', ';':
    111 			continue
    112 		case scanner.EOF:
    113 			return false
    114 		}
    115 		break
    116 	}
    117 	// First item must be an identifier.
    118 	if tok != scanner.Ident {
    119 		p.errorf("expected identifier, found %q", p.lex.Text())
    120 		return false // Might as well stop now.
    121 	}
    122 	word := p.lex.Text()
    123 	var cond string
    124 	operands := make([][]lex.Token, 0, 3)
    125 	// Zero or more comma-separated operands, one per loop.
    126 	nesting := 0
    127 	colon := -1
    128 	for tok != '\n' && tok != ';' {
    129 		// Process one operand.
    130 		items := make([]lex.Token, 0, 3)
    131 		for {
    132 			tok = p.lex.Next()
    133 			if len(operands) == 0 && len(items) == 0 {
    134 				if p.arch.InFamily(sys.ARM, sys.ARM64) && tok == '.' {
    135 					// ARM conditionals.
    136 					tok = p.lex.Next()
    137 					str := p.lex.Text()
    138 					if tok != scanner.Ident {
    139 						p.errorf("ARM condition expected identifier, found %s", str)
    140 					}
    141 					cond = cond + "." + str
    142 					continue
    143 				}
    144 				if tok == ':' {
    145 					// Labels.
    146 					p.pendingLabels = append(p.pendingLabels, word)
    147 					return true
    148 				}
    149 			}
    150 			if tok == scanner.EOF {
    151 				p.errorf("unexpected EOF")
    152 				return false
    153 			}
    154 			// Split operands on comma. Also, the old syntax on x86 for a "register pair"
    155 			// was AX:DX, for which the new syntax is DX, AX. Note the reordering.
    156 			if tok == '\n' || tok == ';' || (nesting == 0 && (tok == ',' || tok == ':')) {
    157 				if tok == ':' {
    158 					// Remember this location so we can swap the operands below.
    159 					if colon >= 0 {
    160 						p.errorf("invalid ':' in operand")
    161 						return true
    162 					}
    163 					colon = len(operands)
    164 				}
    165 				break
    166 			}
    167 			if tok == '(' || tok == '[' {
    168 				nesting++
    169 			}
    170 			if tok == ')' || tok == ']' {
    171 				nesting--
    172 			}
    173 			items = append(items, lex.Make(tok, p.lex.Text()))
    174 		}
    175 		if len(items) > 0 {
    176 			operands = append(operands, items)
    177 			if colon >= 0 && len(operands) == colon+2 {
    178 				// AX:DX becomes DX, AX.
    179 				operands[colon], operands[colon+1] = operands[colon+1], operands[colon]
    180 				colon = -1
    181 			}
    182 		} else if len(operands) > 0 || tok == ',' || colon >= 0 {
    183 			// Had a separator with nothing after.
    184 			p.errorf("missing operand")
    185 		}
    186 	}
    187 	if p.pseudo(word, operands) {
    188 		return true
    189 	}
    190 	i, present := p.arch.Instructions[word]
    191 	if present {
    192 		p.instruction(i, word, cond, operands)
    193 		return true
    194 	}
    195 	p.errorf("unrecognized instruction %q", word)
    196 	return true
    197 }
    198 
    199 func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) {
    200 	p.addr = p.addr[0:0]
    201 	p.isJump = p.arch.IsJump(word)
    202 	for _, op := range operands {
    203 		addr := p.address(op)
    204 		if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo.
    205 			p.errorf("illegal use of pseudo-register in %s", word)
    206 		}
    207 		p.addr = append(p.addr, addr)
    208 	}
    209 	if p.isJump {
    210 		p.asmJump(op, cond, p.addr)
    211 		return
    212 	}
    213 	p.asmInstruction(op, cond, p.addr)
    214 }
    215 
    216 func (p *Parser) pseudo(word string, operands [][]lex.Token) bool {
    217 	switch word {
    218 	case "DATA":
    219 		p.asmData(word, operands)
    220 	case "FUNCDATA":
    221 		p.asmFuncData(word, operands)
    222 	case "GLOBL":
    223 		p.asmGlobl(word, operands)
    224 	case "PCDATA":
    225 		p.asmPCData(word, operands)
    226 	case "TEXT":
    227 		p.asmText(word, operands)
    228 	default:
    229 		return false
    230 	}
    231 	return true
    232 }
    233 
    234 func (p *Parser) start(operand []lex.Token) {
    235 	p.input = operand
    236 	p.inputPos = 0
    237 }
    238 
    239 // address parses the operand into a link address structure.
    240 func (p *Parser) address(operand []lex.Token) obj.Addr {
    241 	p.start(operand)
    242 	addr := obj.Addr{}
    243 	p.operand(&addr)
    244 	return addr
    245 }
    246 
    247 // parseScale converts a decimal string into a valid scale factor.
    248 func (p *Parser) parseScale(s string) int8 {
    249 	switch s {
    250 	case "1", "2", "4", "8":
    251 		return int8(s[0] - '0')
    252 	}
    253 	p.errorf("bad scale: %s", s)
    254 	return 0
    255 }
    256 
    257 // operand parses a general operand and stores the result in *a.
    258 func (p *Parser) operand(a *obj.Addr) bool {
    259 	//fmt.Printf("Operand: %v\n", p.input)
    260 	if len(p.input) == 0 {
    261 		p.errorf("empty operand: cannot happen")
    262 		return false
    263 	}
    264 	// General address (with a few exceptions) looks like
    265 	//	$symoffset(SB)(reg)(index*scale)
    266 	// Exceptions are:
    267 	//
    268 	//	R1
    269 	//	offset
    270 	//	$offset
    271 	// Every piece is optional, so we scan left to right and what
    272 	// we discover tells us where we are.
    273 
    274 	// Prefix: $.
    275 	var prefix rune
    276 	switch tok := p.peek(); tok {
    277 	case '$', '*':
    278 		prefix = rune(tok)
    279 		p.next()
    280 	}
    281 
    282 	// Symbol: symoffset(SB)
    283 	tok := p.next()
    284 	name := tok.String()
    285 	if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) {
    286 		// We have a symbol. Parse $symoffset(symkind)
    287 		p.symbolReference(a, name, prefix)
    288 		// fmt.Printf("SYM %s\n", obj.Dconv(&emptyProg, 0, a))
    289 		if p.peek() == scanner.EOF {
    290 			return true
    291 		}
    292 	}
    293 
    294 	// Special register list syntax for arm: [R1,R3-R7]
    295 	if tok.ScanToken == '[' {
    296 		if prefix != 0 {
    297 			p.errorf("illegal use of register list")
    298 		}
    299 		p.registerList(a)
    300 		p.expectOperandEnd()
    301 		return true
    302 	}
    303 
    304 	// Register: R1
    305 	if tok.ScanToken == scanner.Ident && p.atStartOfRegister(name) {
    306 		if p.atRegisterShift() {
    307 			// ARM shifted register such as R1<<R2 or R1>>2.
    308 			a.Type = obj.TYPE_SHIFT
    309 			a.Offset = p.registerShift(tok.String(), prefix)
    310 			if p.peek() == '(' {
    311 				// Can only be a literal register here.
    312 				p.next()
    313 				tok := p.next()
    314 				name := tok.String()
    315 				if !p.atStartOfRegister(name) {
    316 					p.errorf("expected register; found %s", name)
    317 				}
    318 				a.Reg, _ = p.registerReference(name)
    319 				p.get(')')
    320 			}
    321 		} else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok {
    322 			if scale != 0 {
    323 				p.errorf("expected simple register reference")
    324 			}
    325 			a.Type = obj.TYPE_REG
    326 			a.Reg = r1
    327 			if r2 != 0 {
    328 				// Form is R1:R2. It is on RHS and the second register
    329 				// needs to go into the LHS.
    330 				panic("cannot happen (Addr.Reg2)")
    331 			}
    332 		}
    333 		// fmt.Printf("REG %s\n", obj.Dconv(&emptyProg, 0, a))
    334 		p.expectOperandEnd()
    335 		return true
    336 	}
    337 
    338 	// Constant.
    339 	haveConstant := false
    340 	switch tok.ScanToken {
    341 	case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~':
    342 		haveConstant = true
    343 	case '(':
    344 		// Could be parenthesized expression or (R). Must be something, though.
    345 		tok := p.next()
    346 		if tok.ScanToken == scanner.EOF {
    347 			p.errorf("missing right parenthesis")
    348 			return false
    349 		}
    350 		rname := tok.String()
    351 		p.back()
    352 		haveConstant = !p.atStartOfRegister(rname)
    353 		if !haveConstant {
    354 			p.back() // Put back the '('.
    355 		}
    356 	}
    357 	if haveConstant {
    358 		p.back()
    359 		if p.have(scanner.Float) {
    360 			if prefix != '$' {
    361 				p.errorf("floating-point constant must be an immediate")
    362 			}
    363 			a.Type = obj.TYPE_FCONST
    364 			a.Val = p.floatExpr()
    365 			// fmt.Printf("FCONST %s\n", obj.Dconv(&emptyProg, 0, a))
    366 			p.expectOperandEnd()
    367 			return true
    368 		}
    369 		if p.have(scanner.String) {
    370 			if prefix != '$' {
    371 				p.errorf("string constant must be an immediate")
    372 				return false
    373 			}
    374 			str, err := strconv.Unquote(p.get(scanner.String).String())
    375 			if err != nil {
    376 				p.errorf("string parse error: %s", err)
    377 			}
    378 			a.Type = obj.TYPE_SCONST
    379 			a.Val = str
    380 			// fmt.Printf("SCONST %s\n", obj.Dconv(&emptyProg, 0, a))
    381 			p.expectOperandEnd()
    382 			return true
    383 		}
    384 		a.Offset = int64(p.expr())
    385 		if p.peek() != '(' {
    386 			switch prefix {
    387 			case '$':
    388 				a.Type = obj.TYPE_CONST
    389 			case '*':
    390 				a.Type = obj.TYPE_INDIR // Can appear but is illegal, will be rejected by the linker.
    391 			default:
    392 				a.Type = obj.TYPE_MEM
    393 			}
    394 			// fmt.Printf("CONST %d %s\n", a.Offset, obj.Dconv(&emptyProg, 0, a))
    395 			p.expectOperandEnd()
    396 			return true
    397 		}
    398 		// fmt.Printf("offset %d \n", a.Offset)
    399 	}
    400 
    401 	// Register indirection: (reg) or (index*scale). We are on the opening paren.
    402 	p.registerIndirect(a, prefix)
    403 	// fmt.Printf("DONE %s\n", p.arch.Dconv(&emptyProg, 0, a))
    404 
    405 	p.expectOperandEnd()
    406 	return true
    407 }
    408 
    409 // atStartOfRegister reports whether the parser is at the start of a register definition.
    410 func (p *Parser) atStartOfRegister(name string) bool {
    411 	// Simple register: R10.
    412 	_, present := p.arch.Register[name]
    413 	if present {
    414 		return true
    415 	}
    416 	// Parenthesized register: R(10).
    417 	return p.arch.RegisterPrefix[name] && p.peek() == '('
    418 }
    419 
    420 // atRegisterShift reports whether we are at the start of an ARM shifted register.
    421 // We have consumed the register or R prefix.
    422 func (p *Parser) atRegisterShift() bool {
    423 	// ARM only.
    424 	if p.arch.Family != sys.ARM {
    425 		return false
    426 	}
    427 	// R1<<...
    428 	if lex.IsRegisterShift(p.peek()) {
    429 		return true
    430 	}
    431 	// R(1)<<...   Ugly check. TODO: Rethink how we handle ARM register shifts to be
    432 	// less special.
    433 	if p.peek() != '(' || len(p.input)-p.inputPos < 4 {
    434 		return false
    435 	}
    436 	return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken)
    437 }
    438 
    439 // registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10).
    440 func (p *Parser) registerReference(name string) (int16, bool) {
    441 	r, present := p.arch.Register[name]
    442 	if present {
    443 		return r, true
    444 	}
    445 	if !p.arch.RegisterPrefix[name] {
    446 		p.errorf("expected register; found %s", name)
    447 		return 0, false
    448 	}
    449 	p.get('(')
    450 	tok := p.get(scanner.Int)
    451 	num, err := strconv.ParseInt(tok.String(), 10, 16)
    452 	p.get(')')
    453 	if err != nil {
    454 		p.errorf("parsing register list: %s", err)
    455 		return 0, false
    456 	}
    457 	r, ok := p.arch.RegisterNumber(name, int16(num))
    458 	if !ok {
    459 		p.errorf("illegal register %s(%d)", name, r)
    460 		return 0, false
    461 	}
    462 	return r, true
    463 }
    464 
    465 // register parses a full register reference where there is no symbol present (as in 4(R0) or R(10) but not sym(SB))
    466 // including forms involving multiple registers such as R1:R2.
    467 func (p *Parser) register(name string, prefix rune) (r1, r2 int16, scale int8, ok bool) {
    468 	// R1 or R(1) R1:R2 R1,R2 R1+R2, or R1*scale.
    469 	r1, ok = p.registerReference(name)
    470 	if !ok {
    471 		return
    472 	}
    473 	if prefix != 0 && prefix != '*' { // *AX is OK.
    474 		p.errorf("prefix %c not allowed for register: %c%s", prefix, prefix, name)
    475 	}
    476 	c := p.peek()
    477 	if c == ':' || c == ',' || c == '+' {
    478 		// 2nd register; syntax (R1+R2) etc. No two architectures agree.
    479 		// Check the architectures match the syntax.
    480 		switch p.next().ScanToken {
    481 		case ',':
    482 			if !p.arch.InFamily(sys.ARM, sys.ARM64) {
    483 				p.errorf("(register,register) not supported on this architecture")
    484 				return
    485 			}
    486 		case '+':
    487 			if p.arch.Family != sys.PPC64 {
    488 				p.errorf("(register+register) not supported on this architecture")
    489 				return
    490 			}
    491 		}
    492 		name := p.next().String()
    493 		r2, ok = p.registerReference(name)
    494 		if !ok {
    495 			return
    496 		}
    497 	}
    498 	if p.peek() == '*' {
    499 		// Scale
    500 		p.next()
    501 		scale = p.parseScale(p.next().String())
    502 	}
    503 	return r1, r2, scale, true
    504 }
    505 
    506 // registerShift parses an ARM shifted register reference and returns the encoded representation.
    507 // There is known to be a register (current token) and a shift operator (peeked token).
    508 func (p *Parser) registerShift(name string, prefix rune) int64 {
    509 	if prefix != 0 {
    510 		p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name)
    511 	}
    512 	// R1 op R2 or r1 op constant.
    513 	// op is:
    514 	//	"<<" == 0
    515 	//	">>" == 1
    516 	//	"->" == 2
    517 	//	"@>" == 3
    518 	r1, ok := p.registerReference(name)
    519 	if !ok {
    520 		return 0
    521 	}
    522 	var op int16
    523 	switch p.next().ScanToken {
    524 	case lex.LSH:
    525 		op = 0
    526 	case lex.RSH:
    527 		op = 1
    528 	case lex.ARR:
    529 		op = 2
    530 	case lex.ROT:
    531 		op = 3
    532 	}
    533 	tok := p.next()
    534 	str := tok.String()
    535 	var count int16
    536 	switch tok.ScanToken {
    537 	case scanner.Ident:
    538 		r2, ok := p.registerReference(str)
    539 		if !ok {
    540 			p.errorf("rhs of shift must be register or integer: %s", str)
    541 		}
    542 		count = (r2&15)<<8 | 1<<4
    543 	case scanner.Int, '(':
    544 		p.back()
    545 		x := int64(p.expr())
    546 		if x >= 32 {
    547 			p.errorf("register shift count too large: %s", str)
    548 		}
    549 		count = int16((x & 31) << 7)
    550 	default:
    551 		p.errorf("unexpected %s in register shift", tok.String())
    552 	}
    553 	return int64((r1 & 15) | op<<5 | count)
    554 }
    555 
    556 // symbolReference parses a symbol that is known not to be a register.
    557 func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) {
    558 	// Identifier is a name.
    559 	switch prefix {
    560 	case 0:
    561 		a.Type = obj.TYPE_MEM
    562 	case '$':
    563 		a.Type = obj.TYPE_ADDR
    564 	case '*':
    565 		a.Type = obj.TYPE_INDIR
    566 	}
    567 	// Weirdness with statics: Might now have "<>".
    568 	isStatic := 0 // TODO: Really a boolean, but Linklookup wants a "version" integer.
    569 	if p.peek() == '<' {
    570 		isStatic = 1
    571 		p.next()
    572 		p.get('>')
    573 	}
    574 	if p.peek() == '+' || p.peek() == '-' {
    575 		a.Offset = int64(p.expr())
    576 	}
    577 	a.Sym = obj.Linklookup(p.ctxt, name, isStatic)
    578 	if p.peek() == scanner.EOF {
    579 		if prefix == 0 && p.isJump {
    580 			// Symbols without prefix or suffix are jump labels.
    581 			return
    582 		}
    583 		p.errorf("illegal or missing addressing mode for symbol %s", name)
    584 		return
    585 	}
    586 	// Expect (SB), (FP), (PC), or (SP)
    587 	p.get('(')
    588 	reg := p.get(scanner.Ident).String()
    589 	p.get(')')
    590 	p.setPseudoRegister(a, reg, isStatic != 0, prefix)
    591 }
    592 
    593 // setPseudoRegister sets the NAME field of addr for a pseudo-register reference such as (SB).
    594 func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, prefix rune) {
    595 	if addr.Reg != 0 {
    596 		p.errorf("internal error: reg %s already set in pseudo", reg)
    597 	}
    598 	switch reg {
    599 	case "FP":
    600 		addr.Name = obj.NAME_PARAM
    601 	case "PC":
    602 		if prefix != 0 {
    603 			p.errorf("illegal addressing mode for PC")
    604 		}
    605 		addr.Type = obj.TYPE_BRANCH // We set the type and leave NAME untouched. See asmJump.
    606 	case "SB":
    607 		addr.Name = obj.NAME_EXTERN
    608 		if isStatic {
    609 			addr.Name = obj.NAME_STATIC
    610 		}
    611 	case "SP":
    612 		addr.Name = obj.NAME_AUTO // The pseudo-stack.
    613 	default:
    614 		p.errorf("expected pseudo-register; found %s", reg)
    615 	}
    616 	if prefix == '$' {
    617 		addr.Type = obj.TYPE_ADDR
    618 	}
    619 }
    620 
    621 // registerIndirect parses the general form of a register indirection.
    622 // It is can be (R1), (R2*scale), or (R1)(R2*scale) where R1 may be a simple
    623 // register or register pair R:R or (R, R) or (R+R).
    624 // Or it might be a pseudo-indirection like (FP).
    625 // We are sitting on the opening parenthesis.
    626 func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) {
    627 	p.get('(')
    628 	tok := p.next()
    629 	name := tok.String()
    630 	r1, r2, scale, ok := p.register(name, 0)
    631 	if !ok {
    632 		p.errorf("indirect through non-register %s", tok)
    633 	}
    634 	p.get(')')
    635 	a.Type = obj.TYPE_MEM
    636 	if r1 < 0 {
    637 		// Pseudo-register reference.
    638 		if r2 != 0 {
    639 			p.errorf("cannot use pseudo-register in pair")
    640 			return
    641 		}
    642 		// For SB, SP, and FP, there must be a name here. 0(FP) is not legal.
    643 		if name != "PC" && a.Name == obj.NAME_NONE {
    644 			p.errorf("cannot reference %s without a symbol", name)
    645 		}
    646 		p.setPseudoRegister(a, name, false, prefix)
    647 		return
    648 	}
    649 	a.Reg = r1
    650 	if r2 != 0 {
    651 		// TODO: Consistency in the encoding would be nice here.
    652 		if p.arch.InFamily(sys.ARM, sys.ARM64) {
    653 			// Special form
    654 			// ARM: destination register pair (R1, R2).
    655 			// ARM64: register pair (R1, R2) for LDP/STP.
    656 			if prefix != 0 || scale != 0 {
    657 				p.errorf("illegal address mode for register pair")
    658 				return
    659 			}
    660 			a.Type = obj.TYPE_REGREG
    661 			a.Offset = int64(r2)
    662 			// Nothing may follow
    663 			return
    664 		}
    665 		if p.arch.Family == sys.PPC64 {
    666 			// Special form for PPC64: (R1+R2); alias for (R1)(R2*1).
    667 			if prefix != 0 || scale != 0 {
    668 				p.errorf("illegal address mode for register+register")
    669 				return
    670 			}
    671 			a.Type = obj.TYPE_MEM
    672 			a.Scale = 1
    673 			a.Index = r2
    674 			// Nothing may follow.
    675 			return
    676 		}
    677 	}
    678 	if r2 != 0 {
    679 		p.errorf("indirect through register pair")
    680 	}
    681 	if prefix == '$' {
    682 		a.Type = obj.TYPE_ADDR
    683 	}
    684 	if r1 == arch.RPC && prefix != 0 {
    685 		p.errorf("illegal addressing mode for PC")
    686 	}
    687 	if scale == 0 && p.peek() == '(' {
    688 		// General form (R)(R*scale).
    689 		p.next()
    690 		tok := p.next()
    691 		r1, r2, scale, ok = p.register(tok.String(), 0)
    692 		if !ok {
    693 			p.errorf("indirect through non-register %s", tok)
    694 		}
    695 		if r2 != 0 {
    696 			p.errorf("unimplemented two-register form")
    697 		}
    698 		a.Index = r1
    699 		a.Scale = int16(scale)
    700 		p.get(')')
    701 	} else if scale != 0 {
    702 		// First (R) was missing, all we have is (R*scale).
    703 		a.Reg = 0
    704 		a.Index = r1
    705 		a.Scale = int16(scale)
    706 	}
    707 }
    708 
    709 // registerList parses an ARM register list expression, a list of registers in [].
    710 // There may be comma-separated ranges or individual registers, as in
    711 // [R1,R3-R5]. Only R0 through R15 may appear.
    712 // The opening bracket has been consumed.
    713 func (p *Parser) registerList(a *obj.Addr) {
    714 	// One range per loop.
    715 	const maxReg = 16
    716 	var bits uint16
    717 ListLoop:
    718 	for {
    719 		tok := p.next()
    720 		switch tok.ScanToken {
    721 		case ']':
    722 			break ListLoop
    723 		case scanner.EOF:
    724 			p.errorf("missing ']' in register list")
    725 			return
    726 		}
    727 		// Parse the upper and lower bounds.
    728 		lo := p.registerNumber(tok.String())
    729 		hi := lo
    730 		if p.peek() == '-' {
    731 			p.next()
    732 			hi = p.registerNumber(p.next().String())
    733 		}
    734 		if hi < lo {
    735 			lo, hi = hi, lo
    736 		}
    737 		// Check there are no duplicates in the register list.
    738 		for i := 0; lo <= hi && i < maxReg; i++ {
    739 			if bits&(1<<lo) != 0 {
    740 				p.errorf("register R%d already in list", lo)
    741 			}
    742 			bits |= 1 << lo
    743 			lo++
    744 		}
    745 		if p.peek() != ']' {
    746 			p.get(',')
    747 		}
    748 	}
    749 	a.Type = obj.TYPE_REGLIST
    750 	a.Offset = int64(bits)
    751 }
    752 
    753 // register number is ARM-specific. It returns the number of the specified register.
    754 func (p *Parser) registerNumber(name string) uint16 {
    755 	if p.arch.Family == sys.ARM && name == "g" {
    756 		return 10
    757 	}
    758 	if name[0] != 'R' {
    759 		p.errorf("expected g or R0 through R15; found %s", name)
    760 		return 0
    761 	}
    762 	r, ok := p.registerReference(name)
    763 	if !ok {
    764 		return 0
    765 	}
    766 	reg := r - p.arch.Register["R0"]
    767 	if reg < 0 {
    768 		// Could happen for an architecture having other registers prefixed by R
    769 		p.errorf("expected g or R0 through R15; found %s", name)
    770 		return 0
    771 	}
    772 	return uint16(reg)
    773 }
    774 
    775 // Note: There are two changes in the expression handling here
    776 // compared to the old yacc/C implementations. Neither has
    777 // much practical consequence because the expressions we
    778 // see in assembly code are simple, but for the record:
    779 //
    780 // 1) Evaluation uses uint64; the old one used int64.
    781 // 2) Precedence uses Go rules not C rules.
    782 
    783 // expr = term | term ('+' | '-' | '|' | '^') term.
    784 func (p *Parser) expr() uint64 {
    785 	value := p.term()
    786 	for {
    787 		switch p.peek() {
    788 		case '+':
    789 			p.next()
    790 			value += p.term()
    791 		case '-':
    792 			p.next()
    793 			value -= p.term()
    794 		case '|':
    795 			p.next()
    796 			value |= p.term()
    797 		case '^':
    798 			p.next()
    799 			value ^= p.term()
    800 		default:
    801 			return value
    802 		}
    803 	}
    804 }
    805 
    806 // floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')'
    807 func (p *Parser) floatExpr() float64 {
    808 	tok := p.next()
    809 	switch tok.ScanToken {
    810 	case '(':
    811 		v := p.floatExpr()
    812 		if p.next().ScanToken != ')' {
    813 			p.errorf("missing closing paren")
    814 		}
    815 		return v
    816 	case '+':
    817 		return +p.floatExpr()
    818 	case '-':
    819 		return -p.floatExpr()
    820 	case scanner.Float:
    821 		return p.atof(tok.String())
    822 	}
    823 	p.errorf("unexpected %s evaluating float expression", tok)
    824 	return 0
    825 }
    826 
    827 // term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor
    828 func (p *Parser) term() uint64 {
    829 	value := p.factor()
    830 	for {
    831 		switch p.peek() {
    832 		case '*':
    833 			p.next()
    834 			value *= p.factor()
    835 		case '/':
    836 			p.next()
    837 			if int64(value) < 0 {
    838 				p.errorf("divide of value with high bit set")
    839 			}
    840 			divisor := p.factor()
    841 			if divisor == 0 {
    842 				p.errorf("division by zero")
    843 			} else {
    844 				value /= divisor
    845 			}
    846 		case '%':
    847 			p.next()
    848 			divisor := p.factor()
    849 			if int64(value) < 0 {
    850 				p.errorf("modulo of value with high bit set")
    851 			}
    852 			if divisor == 0 {
    853 				p.errorf("modulo by zero")
    854 			} else {
    855 				value %= divisor
    856 			}
    857 		case lex.LSH:
    858 			p.next()
    859 			shift := p.factor()
    860 			if int64(shift) < 0 {
    861 				p.errorf("negative left shift count")
    862 			}
    863 			return value << shift
    864 		case lex.RSH:
    865 			p.next()
    866 			shift := p.term()
    867 			if int64(shift) < 0 {
    868 				p.errorf("negative right shift count")
    869 			}
    870 			if int64(value) < 0 {
    871 				p.errorf("right shift of value with high bit set")
    872 			}
    873 			value >>= shift
    874 		case '&':
    875 			p.next()
    876 			value &= p.factor()
    877 		default:
    878 			return value
    879 		}
    880 	}
    881 }
    882 
    883 // factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')'
    884 func (p *Parser) factor() uint64 {
    885 	tok := p.next()
    886 	switch tok.ScanToken {
    887 	case scanner.Int:
    888 		return p.atoi(tok.String())
    889 	case scanner.Char:
    890 		str, err := strconv.Unquote(tok.String())
    891 		if err != nil {
    892 			p.errorf("%s", err)
    893 		}
    894 		r, w := utf8.DecodeRuneInString(str)
    895 		if w == 1 && r == utf8.RuneError {
    896 			p.errorf("illegal UTF-8 encoding for character constant")
    897 		}
    898 		return uint64(r)
    899 	case '+':
    900 		return +p.factor()
    901 	case '-':
    902 		return -p.factor()
    903 	case '~':
    904 		return ^p.factor()
    905 	case '(':
    906 		v := p.expr()
    907 		if p.next().ScanToken != ')' {
    908 			p.errorf("missing closing paren")
    909 		}
    910 		return v
    911 	}
    912 	p.errorf("unexpected %s evaluating expression", tok)
    913 	return 0
    914 }
    915 
    916 // positiveAtoi returns an int64 that must be >= 0.
    917 func (p *Parser) positiveAtoi(str string) int64 {
    918 	value, err := strconv.ParseInt(str, 0, 64)
    919 	if err != nil {
    920 		p.errorf("%s", err)
    921 	}
    922 	if value < 0 {
    923 		p.errorf("%s overflows int64", str)
    924 	}
    925 	return value
    926 }
    927 
    928 func (p *Parser) atoi(str string) uint64 {
    929 	value, err := strconv.ParseUint(str, 0, 64)
    930 	if err != nil {
    931 		p.errorf("%s", err)
    932 	}
    933 	return value
    934 }
    935 
    936 func (p *Parser) atof(str string) float64 {
    937 	value, err := strconv.ParseFloat(str, 64)
    938 	if err != nil {
    939 		p.errorf("%s", err)
    940 	}
    941 	return value
    942 }
    943 
    944 // EOF represents the end of input.
    945 var EOF = lex.Make(scanner.EOF, "EOF")
    946 
    947 func (p *Parser) next() lex.Token {
    948 	if !p.more() {
    949 		return EOF
    950 	}
    951 	tok := p.input[p.inputPos]
    952 	p.inputPos++
    953 	return tok
    954 }
    955 
    956 func (p *Parser) back() {
    957 	if p.inputPos == 0 {
    958 		p.errorf("internal error: backing up before BOL")
    959 	} else {
    960 		p.inputPos--
    961 	}
    962 }
    963 
    964 func (p *Parser) peek() lex.ScanToken {
    965 	if p.more() {
    966 		return p.input[p.inputPos].ScanToken
    967 	}
    968 	return scanner.EOF
    969 }
    970 
    971 func (p *Parser) more() bool {
    972 	return p.inputPos < len(p.input)
    973 }
    974 
    975 // get verifies that the next item has the expected type and returns it.
    976 func (p *Parser) get(expected lex.ScanToken) lex.Token {
    977 	p.expect(expected, expected.String())
    978 	return p.next()
    979 }
    980 
    981 // expectOperandEnd verifies that the parsing state is properly at the end of an operand.
    982 func (p *Parser) expectOperandEnd() {
    983 	p.expect(scanner.EOF, "end of operand")
    984 }
    985 
    986 // expect verifies that the next item has the expected type. It does not consume it.
    987 func (p *Parser) expect(expectedToken lex.ScanToken, expectedMessage string) {
    988 	if p.peek() != expectedToken {
    989 		p.errorf("expected %s, found %s", expectedMessage, p.next())
    990 	}
    991 }
    992 
    993 // have reports whether the remaining tokens (including the current one) contain the specified token.
    994 func (p *Parser) have(token lex.ScanToken) bool {
    995 	for i := p.inputPos; i < len(p.input); i++ {
    996 		if p.input[i].ScanToken == token {
    997 			return true
    998 		}
    999 	}
   1000 	return false
   1001 }
   1002 
   1003 // at reports whether the next tokens are as requested.
   1004 func (p *Parser) at(next ...lex.ScanToken) bool {
   1005 	if len(p.input)-p.inputPos < len(next) {
   1006 		return false
   1007 	}
   1008 	for i, r := range next {
   1009 		if p.input[p.inputPos+i].ScanToken != r {
   1010 			return false
   1011 		}
   1012 	}
   1013 	return true
   1014 }
   1015