Home | History | Annotate | Download | only in asm
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package asm implements the parser and instruction generator for the assembler.
      6 // TODO: Split apart?
      7 package asm
      8 
      9 import (
     10 	"fmt"
     11 	"io"
     12 	"log"
     13 	"os"
     14 	"strconv"
     15 	"text/scanner"
     16 	"unicode/utf8"
     17 
     18 	"cmd/asm/internal/arch"
     19 	"cmd/asm/internal/flags"
     20 	"cmd/asm/internal/lex"
     21 	"cmd/internal/obj"
     22 	"cmd/internal/src"
     23 	"cmd/internal/sys"
     24 )
     25 
     26 type Parser struct {
     27 	lex           lex.TokenReader
     28 	lineNum       int   // Line number in source file.
     29 	errorLine     int   // Line number of last error.
     30 	errorCount    int   // Number of errors.
     31 	pc            int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
     32 	input         []lex.Token
     33 	inputPos      int
     34 	pendingLabels []string // Labels to attach to next instruction.
     35 	labels        map[string]*obj.Prog
     36 	toPatch       []Patch
     37 	addr          []obj.Addr
     38 	arch          *arch.Arch
     39 	ctxt          *obj.Link
     40 	firstProg     *obj.Prog
     41 	lastProg      *obj.Prog
     42 	dataAddr      map[string]int64 // Most recent address for DATA for this symbol.
     43 	isJump        bool             // Instruction being assembled is a jump.
     44 	errorWriter   io.Writer
     45 }
     46 
     47 type Patch struct {
     48 	prog  *obj.Prog
     49 	label string
     50 }
     51 
     52 func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser {
     53 	return &Parser{
     54 		ctxt:        ctxt,
     55 		arch:        ar,
     56 		lex:         lexer,
     57 		labels:      make(map[string]*obj.Prog),
     58 		dataAddr:    make(map[string]int64),
     59 		errorWriter: os.Stderr,
     60 	}
     61 }
     62 
     63 // panicOnError is enabled when testing to abort execution on the first error
     64 // and turn it into a recoverable panic.
     65 var panicOnError bool
     66 
     67 func (p *Parser) errorf(format string, args ...interface{}) {
     68 	if panicOnError {
     69 		panic(fmt.Errorf(format, args...))
     70 	}
     71 	if p.lineNum == p.errorLine {
     72 		// Only one error per line.
     73 		return
     74 	}
     75 	p.errorLine = p.lineNum
     76 	if p.lex != nil {
     77 		// Put file and line information on head of message.
     78 		format = "%s:%d: " + format + "\n"
     79 		args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
     80 	}
     81 	fmt.Fprintf(p.errorWriter, format, args...)
     82 	p.errorCount++
     83 	if p.errorCount > 10 && !*flags.AllErrors {
     84 		log.Fatal("too many errors")
     85 	}
     86 }
     87 
     88 func (p *Parser) pos() src.XPos {
     89 	return p.ctxt.PosTable.XPos(src.MakePos(p.lex.Base(), uint(p.lineNum), 0))
     90 }
     91 
     92 func (p *Parser) Parse() (*obj.Prog, bool) {
     93 	for p.line() {
     94 	}
     95 	if p.errorCount > 0 {
     96 		return nil, false
     97 	}
     98 	p.patch()
     99 	return p.firstProg, true
    100 }
    101 
    102 // WORD [ arg {, arg} ] (';' | '\n')
    103 func (p *Parser) line() bool {
    104 	// Skip newlines.
    105 	var tok lex.ScanToken
    106 	for {
    107 		tok = p.lex.Next()
    108 		// We save the line number here so error messages from this instruction
    109 		// are labeled with this line. Otherwise we complain after we've absorbed
    110 		// the terminating newline and the line numbers are off by one in errors.
    111 		p.lineNum = p.lex.Line()
    112 		switch tok {
    113 		case '\n', ';':
    114 			continue
    115 		case scanner.EOF:
    116 			return false
    117 		}
    118 		break
    119 	}
    120 	// First item must be an identifier.
    121 	if tok != scanner.Ident {
    122 		p.errorf("expected identifier, found %q", p.lex.Text())
    123 		return false // Might as well stop now.
    124 	}
    125 	word := p.lex.Text()
    126 	var cond string
    127 	operands := make([][]lex.Token, 0, 3)
    128 	// Zero or more comma-separated operands, one per loop.
    129 	nesting := 0
    130 	colon := -1
    131 	for tok != '\n' && tok != ';' {
    132 		// Process one operand.
    133 		items := make([]lex.Token, 0, 3)
    134 		for {
    135 			tok = p.lex.Next()
    136 			if len(operands) == 0 && len(items) == 0 {
    137 				if p.arch.InFamily(sys.ARM, sys.ARM64) && tok == '.' {
    138 					// ARM conditionals.
    139 					tok = p.lex.Next()
    140 					str := p.lex.Text()
    141 					if tok != scanner.Ident {
    142 						p.errorf("ARM condition expected identifier, found %s", str)
    143 					}
    144 					cond = cond + "." + str
    145 					continue
    146 				}
    147 				if tok == ':' {
    148 					// Labels.
    149 					p.pendingLabels = append(p.pendingLabels, word)
    150 					return true
    151 				}
    152 			}
    153 			if tok == scanner.EOF {
    154 				p.errorf("unexpected EOF")
    155 				return false
    156 			}
    157 			// Split operands on comma. Also, the old syntax on x86 for a "register pair"
    158 			// was AX:DX, for which the new syntax is DX, AX. Note the reordering.
    159 			if tok == '\n' || tok == ';' || (nesting == 0 && (tok == ',' || tok == ':')) {
    160 				if tok == ':' {
    161 					// Remember this location so we can swap the operands below.
    162 					if colon >= 0 {
    163 						p.errorf("invalid ':' in operand")
    164 						return true
    165 					}
    166 					colon = len(operands)
    167 				}
    168 				break
    169 			}
    170 			if tok == '(' || tok == '[' {
    171 				nesting++
    172 			}
    173 			if tok == ')' || tok == ']' {
    174 				nesting--
    175 			}
    176 			items = append(items, lex.Make(tok, p.lex.Text()))
    177 		}
    178 		if len(items) > 0 {
    179 			operands = append(operands, items)
    180 			if colon >= 0 && len(operands) == colon+2 {
    181 				// AX:DX becomes DX, AX.
    182 				operands[colon], operands[colon+1] = operands[colon+1], operands[colon]
    183 				colon = -1
    184 			}
    185 		} else if len(operands) > 0 || tok == ',' || colon >= 0 {
    186 			// Had a separator with nothing after.
    187 			p.errorf("missing operand")
    188 		}
    189 	}
    190 	if p.pseudo(word, operands) {
    191 		return true
    192 	}
    193 	i, present := p.arch.Instructions[word]
    194 	if present {
    195 		p.instruction(i, word, cond, operands)
    196 		return true
    197 	}
    198 	p.errorf("unrecognized instruction %q", word)
    199 	return true
    200 }
    201 
    202 func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) {
    203 	p.addr = p.addr[0:0]
    204 	p.isJump = p.arch.IsJump(word)
    205 	for _, op := range operands {
    206 		addr := p.address(op)
    207 		if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo.
    208 			p.errorf("illegal use of pseudo-register in %s", word)
    209 		}
    210 		p.addr = append(p.addr, addr)
    211 	}
    212 	if p.isJump {
    213 		p.asmJump(op, cond, p.addr)
    214 		return
    215 	}
    216 	p.asmInstruction(op, cond, p.addr)
    217 }
    218 
    219 func (p *Parser) pseudo(word string, operands [][]lex.Token) bool {
    220 	switch word {
    221 	case "DATA":
    222 		p.asmData(word, operands)
    223 	case "FUNCDATA":
    224 		p.asmFuncData(word, operands)
    225 	case "GLOBL":
    226 		p.asmGlobl(word, operands)
    227 	case "PCDATA":
    228 		p.asmPCData(word, operands)
    229 	case "TEXT":
    230 		p.asmText(word, operands)
    231 	default:
    232 		return false
    233 	}
    234 	return true
    235 }
    236 
    237 func (p *Parser) start(operand []lex.Token) {
    238 	p.input = operand
    239 	p.inputPos = 0
    240 }
    241 
    242 // address parses the operand into a link address structure.
    243 func (p *Parser) address(operand []lex.Token) obj.Addr {
    244 	p.start(operand)
    245 	addr := obj.Addr{}
    246 	p.operand(&addr)
    247 	return addr
    248 }
    249 
    250 // parseScale converts a decimal string into a valid scale factor.
    251 func (p *Parser) parseScale(s string) int8 {
    252 	switch s {
    253 	case "1", "2", "4", "8":
    254 		return int8(s[0] - '0')
    255 	}
    256 	p.errorf("bad scale: %s", s)
    257 	return 0
    258 }
    259 
    260 // operand parses a general operand and stores the result in *a.
    261 func (p *Parser) operand(a *obj.Addr) {
    262 	//fmt.Printf("Operand: %v\n", p.input)
    263 	if len(p.input) == 0 {
    264 		p.errorf("empty operand: cannot happen")
    265 		return
    266 	}
    267 	// General address (with a few exceptions) looks like
    268 	//	$symoffset(SB)(reg)(index*scale)
    269 	// Exceptions are:
    270 	//
    271 	//	R1
    272 	//	offset
    273 	//	$offset
    274 	// Every piece is optional, so we scan left to right and what
    275 	// we discover tells us where we are.
    276 
    277 	// Prefix: $.
    278 	var prefix rune
    279 	switch tok := p.peek(); tok {
    280 	case '$', '*':
    281 		prefix = rune(tok)
    282 		p.next()
    283 	}
    284 
    285 	// Symbol: symoffset(SB)
    286 	tok := p.next()
    287 	name := tok.String()
    288 	if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) {
    289 		// We have a symbol. Parse $symoffset(symkind)
    290 		p.symbolReference(a, name, prefix)
    291 		// fmt.Printf("SYM %s\n", obj.Dconv(&emptyProg, 0, a))
    292 		if p.peek() == scanner.EOF {
    293 			return
    294 		}
    295 	}
    296 
    297 	// Special register list syntax for arm: [R1,R3-R7]
    298 	if tok.ScanToken == '[' {
    299 		if prefix != 0 {
    300 			p.errorf("illegal use of register list")
    301 		}
    302 		p.registerList(a)
    303 		p.expectOperandEnd()
    304 		return
    305 	}
    306 
    307 	// Register: R1
    308 	if tok.ScanToken == scanner.Ident && p.atStartOfRegister(name) {
    309 		if p.atRegisterShift() {
    310 			// ARM shifted register such as R1<<R2 or R1>>2.
    311 			a.Type = obj.TYPE_SHIFT
    312 			a.Offset = p.registerShift(tok.String(), prefix)
    313 			if p.peek() == '(' {
    314 				// Can only be a literal register here.
    315 				p.next()
    316 				tok := p.next()
    317 				name := tok.String()
    318 				if !p.atStartOfRegister(name) {
    319 					p.errorf("expected register; found %s", name)
    320 				}
    321 				a.Reg, _ = p.registerReference(name)
    322 				p.get(')')
    323 			}
    324 		} else if p.atRegisterExtension() {
    325 			p.registerExtension(a, tok.String(), prefix)
    326 			p.expectOperandEnd()
    327 			return
    328 		} else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok {
    329 			if scale != 0 {
    330 				p.errorf("expected simple register reference")
    331 			}
    332 			a.Type = obj.TYPE_REG
    333 			a.Reg = r1
    334 			if r2 != 0 {
    335 				// Form is R1:R2. It is on RHS and the second register
    336 				// needs to go into the LHS.
    337 				panic("cannot happen (Addr.Reg2)")
    338 			}
    339 		}
    340 		// fmt.Printf("REG %s\n", obj.Dconv(&emptyProg, 0, a))
    341 		p.expectOperandEnd()
    342 		return
    343 	}
    344 
    345 	// Constant.
    346 	haveConstant := false
    347 	switch tok.ScanToken {
    348 	case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~':
    349 		haveConstant = true
    350 	case '(':
    351 		// Could be parenthesized expression or (R). Must be something, though.
    352 		tok := p.next()
    353 		if tok.ScanToken == scanner.EOF {
    354 			p.errorf("missing right parenthesis")
    355 			return
    356 		}
    357 		rname := tok.String()
    358 		p.back()
    359 		haveConstant = !p.atStartOfRegister(rname)
    360 		if !haveConstant {
    361 			p.back() // Put back the '('.
    362 		}
    363 	}
    364 	if haveConstant {
    365 		p.back()
    366 		if p.have(scanner.Float) {
    367 			if prefix != '$' {
    368 				p.errorf("floating-point constant must be an immediate")
    369 			}
    370 			a.Type = obj.TYPE_FCONST
    371 			a.Val = p.floatExpr()
    372 			// fmt.Printf("FCONST %s\n", obj.Dconv(&emptyProg, 0, a))
    373 			p.expectOperandEnd()
    374 			return
    375 		}
    376 		if p.have(scanner.String) {
    377 			if prefix != '$' {
    378 				p.errorf("string constant must be an immediate")
    379 				return
    380 			}
    381 			str, err := strconv.Unquote(p.get(scanner.String).String())
    382 			if err != nil {
    383 				p.errorf("string parse error: %s", err)
    384 			}
    385 			a.Type = obj.TYPE_SCONST
    386 			a.Val = str
    387 			// fmt.Printf("SCONST %s\n", obj.Dconv(&emptyProg, 0, a))
    388 			p.expectOperandEnd()
    389 			return
    390 		}
    391 		a.Offset = int64(p.expr())
    392 		if p.peek() != '(' {
    393 			switch prefix {
    394 			case '$':
    395 				a.Type = obj.TYPE_CONST
    396 			case '*':
    397 				a.Type = obj.TYPE_INDIR // Can appear but is illegal, will be rejected by the linker.
    398 			default:
    399 				a.Type = obj.TYPE_MEM
    400 			}
    401 			// fmt.Printf("CONST %d %s\n", a.Offset, obj.Dconv(&emptyProg, 0, a))
    402 			p.expectOperandEnd()
    403 			return
    404 		}
    405 		// fmt.Printf("offset %d \n", a.Offset)
    406 	}
    407 
    408 	// Register indirection: (reg) or (index*scale). We are on the opening paren.
    409 	p.registerIndirect(a, prefix)
    410 	// fmt.Printf("DONE %s\n", p.arch.Dconv(&emptyProg, 0, a))
    411 
    412 	p.expectOperandEnd()
    413 	return
    414 }
    415 
    416 // atStartOfRegister reports whether the parser is at the start of a register definition.
    417 func (p *Parser) atStartOfRegister(name string) bool {
    418 	// Simple register: R10.
    419 	_, present := p.arch.Register[name]
    420 	if present {
    421 		return true
    422 	}
    423 	// Parenthesized register: R(10).
    424 	return p.arch.RegisterPrefix[name] && p.peek() == '('
    425 }
    426 
    427 // atRegisterShift reports whether we are at the start of an ARM shifted register.
    428 // We have consumed the register or R prefix.
    429 func (p *Parser) atRegisterShift() bool {
    430 	// ARM only.
    431 	if !p.arch.InFamily(sys.ARM, sys.ARM64) {
    432 		return false
    433 	}
    434 	// R1<<...
    435 	if lex.IsRegisterShift(p.peek()) {
    436 		return true
    437 	}
    438 	// R(1)<<...   Ugly check. TODO: Rethink how we handle ARM register shifts to be
    439 	// less special.
    440 	if p.peek() != '(' || len(p.input)-p.inputPos < 4 {
    441 		return false
    442 	}
    443 	return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken)
    444 }
    445 
    446 // atRegisterExtension reports whether we are at the start of an ARM64 extended register.
    447 // We have consumed the register or R prefix.
    448 func (p *Parser) atRegisterExtension() bool {
    449 	// ARM64 only.
    450 	if p.arch.Family != sys.ARM64 {
    451 		return false
    452 	}
    453 	// R1.xxx
    454 	if p.peek() == '.' {
    455 		return true
    456 	}
    457 	return false
    458 }
    459 
    460 // registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10).
    461 func (p *Parser) registerReference(name string) (int16, bool) {
    462 	r, present := p.arch.Register[name]
    463 	if present {
    464 		return r, true
    465 	}
    466 	if !p.arch.RegisterPrefix[name] {
    467 		p.errorf("expected register; found %s", name)
    468 		return 0, false
    469 	}
    470 	p.get('(')
    471 	tok := p.get(scanner.Int)
    472 	num, err := strconv.ParseInt(tok.String(), 10, 16)
    473 	p.get(')')
    474 	if err != nil {
    475 		p.errorf("parsing register list: %s", err)
    476 		return 0, false
    477 	}
    478 	r, ok := p.arch.RegisterNumber(name, int16(num))
    479 	if !ok {
    480 		p.errorf("illegal register %s(%d)", name, r)
    481 		return 0, false
    482 	}
    483 	return r, true
    484 }
    485 
    486 // register parses a full register reference where there is no symbol present (as in 4(R0) or R(10) but not sym(SB))
    487 // including forms involving multiple registers such as R1:R2.
    488 func (p *Parser) register(name string, prefix rune) (r1, r2 int16, scale int8, ok bool) {
    489 	// R1 or R(1) R1:R2 R1,R2 R1+R2, or R1*scale.
    490 	r1, ok = p.registerReference(name)
    491 	if !ok {
    492 		return
    493 	}
    494 	if prefix != 0 && prefix != '*' { // *AX is OK.
    495 		p.errorf("prefix %c not allowed for register: %c%s", prefix, prefix, name)
    496 	}
    497 	c := p.peek()
    498 	if c == ':' || c == ',' || c == '+' {
    499 		// 2nd register; syntax (R1+R2) etc. No two architectures agree.
    500 		// Check the architectures match the syntax.
    501 		switch p.next().ScanToken {
    502 		case ',':
    503 			if !p.arch.InFamily(sys.ARM, sys.ARM64) {
    504 				p.errorf("(register,register) not supported on this architecture")
    505 				return
    506 			}
    507 		case '+':
    508 			if p.arch.Family != sys.PPC64 {
    509 				p.errorf("(register+register) not supported on this architecture")
    510 				return
    511 			}
    512 		}
    513 		name := p.next().String()
    514 		r2, ok = p.registerReference(name)
    515 		if !ok {
    516 			return
    517 		}
    518 	}
    519 	if p.peek() == '*' {
    520 		// Scale
    521 		p.next()
    522 		scale = p.parseScale(p.next().String())
    523 	}
    524 	return r1, r2, scale, true
    525 }
    526 
    527 // registerShift parses an ARM/ARM64 shifted register reference and returns the encoded representation.
    528 // There is known to be a register (current token) and a shift operator (peeked token).
    529 func (p *Parser) registerShift(name string, prefix rune) int64 {
    530 	if prefix != 0 {
    531 		p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name)
    532 	}
    533 	// R1 op R2 or r1 op constant.
    534 	// op is:
    535 	//	"<<" == 0
    536 	//	">>" == 1
    537 	//	"->" == 2
    538 	//	"@>" == 3
    539 	r1, ok := p.registerReference(name)
    540 	if !ok {
    541 		return 0
    542 	}
    543 	var op int16
    544 	switch p.next().ScanToken {
    545 	case lex.LSH:
    546 		op = 0
    547 	case lex.RSH:
    548 		op = 1
    549 	case lex.ARR:
    550 		op = 2
    551 	case lex.ROT:
    552 		// following instructions on ARM64 support rotate right
    553 		// AND, ANDS, TST, BIC, BICS, EON, EOR, ORR, MVN, ORN
    554 		op = 3
    555 	}
    556 	tok := p.next()
    557 	str := tok.String()
    558 	var count int16
    559 	switch tok.ScanToken {
    560 	case scanner.Ident:
    561 		if p.arch.Family == sys.ARM64 {
    562 			p.errorf("rhs of shift must be integer: %s", str)
    563 		} else {
    564 			r2, ok := p.registerReference(str)
    565 			if !ok {
    566 				p.errorf("rhs of shift must be register or integer: %s", str)
    567 			}
    568 			count = (r2&15)<<8 | 1<<4
    569 		}
    570 	case scanner.Int, '(':
    571 		p.back()
    572 		x := int64(p.expr())
    573 		if p.arch.Family == sys.ARM64 {
    574 			if x >= 64 {
    575 				p.errorf("register shift count too large: %s", str)
    576 			}
    577 			count = int16((x & 63) << 10)
    578 		} else {
    579 			if x >= 32 {
    580 				p.errorf("register shift count too large: %s", str)
    581 			}
    582 			count = int16((x & 31) << 7)
    583 		}
    584 	default:
    585 		p.errorf("unexpected %s in register shift", tok.String())
    586 	}
    587 	if p.arch.Family == sys.ARM64 {
    588 		return int64(int64(r1&31)<<16 | int64(op)<<22 | int64(uint16(count)))
    589 	} else {
    590 		return int64((r1 & 15) | op<<5 | count)
    591 	}
    592 }
    593 
    594 // registerExtension parses a register with extension or arrangment.
    595 // There is known to be a register (current token) and an extension operator (peeked token).
    596 func (p *Parser) registerExtension(a *obj.Addr, name string, prefix rune) {
    597 	if prefix != 0 {
    598 		p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name)
    599 	}
    600 
    601 	reg, ok := p.registerReference(name)
    602 	if !ok {
    603 		p.errorf("unexpected %s in register extension", name)
    604 		return
    605 	}
    606 
    607 	p.get('.')
    608 	tok := p.next()
    609 	ext := tok.String()
    610 	isIndex := false
    611 	num := int16(0)
    612 	isAmount := true // Amount is zero by default
    613 	if p.peek() == lex.LSH {
    614 		// parses left shift amount applied after extension: <<Amount
    615 		p.get(lex.LSH)
    616 		tok := p.get(scanner.Int)
    617 		amount, err := strconv.ParseInt(tok.String(), 10, 16)
    618 		if err != nil {
    619 			p.errorf("parsing left shift amount: %s", err)
    620 		}
    621 		num = int16(amount)
    622 	} else if p.peek() == '[' {
    623 		// parses an element: [Index]
    624 		p.get('[')
    625 		tok := p.get(scanner.Int)
    626 		index, err := strconv.ParseInt(tok.String(), 10, 16)
    627 		p.get(']')
    628 		if err != nil {
    629 			p.errorf("parsing element index: %s", err)
    630 		}
    631 		isIndex = true
    632 		isAmount = false
    633 		num = int16(index)
    634 	}
    635 
    636 	switch p.arch.Family {
    637 	case sys.ARM64:
    638 		err := arch.ARM64RegisterExtension(a, ext, reg, num, isAmount, isIndex)
    639 		if err != nil {
    640 			p.errorf(err.Error())
    641 		}
    642 	default:
    643 		p.errorf("register extension not supported on this architecture")
    644 	}
    645 }
    646 
    647 // symbolReference parses a symbol that is known not to be a register.
    648 func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) {
    649 	// Identifier is a name.
    650 	switch prefix {
    651 	case 0:
    652 		a.Type = obj.TYPE_MEM
    653 	case '$':
    654 		a.Type = obj.TYPE_ADDR
    655 	case '*':
    656 		a.Type = obj.TYPE_INDIR
    657 	}
    658 	// Weirdness with statics: Might now have "<>".
    659 	isStatic := false
    660 	if p.peek() == '<' {
    661 		isStatic = true
    662 		p.next()
    663 		p.get('>')
    664 	}
    665 	if p.peek() == '+' || p.peek() == '-' {
    666 		a.Offset = int64(p.expr())
    667 	}
    668 	if isStatic {
    669 		a.Sym = p.ctxt.LookupStatic(name)
    670 	} else {
    671 		a.Sym = p.ctxt.Lookup(name)
    672 	}
    673 	if p.peek() == scanner.EOF {
    674 		if prefix == 0 && p.isJump {
    675 			// Symbols without prefix or suffix are jump labels.
    676 			return
    677 		}
    678 		p.errorf("illegal or missing addressing mode for symbol %s", name)
    679 		return
    680 	}
    681 	// Expect (SB), (FP), (PC), or (SP)
    682 	p.get('(')
    683 	reg := p.get(scanner.Ident).String()
    684 	p.get(')')
    685 	p.setPseudoRegister(a, reg, isStatic, prefix)
    686 }
    687 
    688 // setPseudoRegister sets the NAME field of addr for a pseudo-register reference such as (SB).
    689 func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, prefix rune) {
    690 	if addr.Reg != 0 {
    691 		p.errorf("internal error: reg %s already set in pseudo", reg)
    692 	}
    693 	switch reg {
    694 	case "FP":
    695 		addr.Name = obj.NAME_PARAM
    696 	case "PC":
    697 		if prefix != 0 {
    698 			p.errorf("illegal addressing mode for PC")
    699 		}
    700 		addr.Type = obj.TYPE_BRANCH // We set the type and leave NAME untouched. See asmJump.
    701 	case "SB":
    702 		addr.Name = obj.NAME_EXTERN
    703 		if isStatic {
    704 			addr.Name = obj.NAME_STATIC
    705 		}
    706 	case "SP":
    707 		addr.Name = obj.NAME_AUTO // The pseudo-stack.
    708 	default:
    709 		p.errorf("expected pseudo-register; found %s", reg)
    710 	}
    711 	if prefix == '$' {
    712 		addr.Type = obj.TYPE_ADDR
    713 	}
    714 }
    715 
    716 // registerIndirect parses the general form of a register indirection.
    717 // It is can be (R1), (R2*scale), or (R1)(R2*scale) where R1 may be a simple
    718 // register or register pair R:R or (R, R) or (R+R).
    719 // Or it might be a pseudo-indirection like (FP).
    720 // We are sitting on the opening parenthesis.
    721 func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) {
    722 	p.get('(')
    723 	tok := p.next()
    724 	name := tok.String()
    725 	r1, r2, scale, ok := p.register(name, 0)
    726 	if !ok {
    727 		p.errorf("indirect through non-register %s", tok)
    728 	}
    729 	p.get(')')
    730 	a.Type = obj.TYPE_MEM
    731 	if r1 < 0 {
    732 		// Pseudo-register reference.
    733 		if r2 != 0 {
    734 			p.errorf("cannot use pseudo-register in pair")
    735 			return
    736 		}
    737 		// For SB, SP, and FP, there must be a name here. 0(FP) is not legal.
    738 		if name != "PC" && a.Name == obj.NAME_NONE {
    739 			p.errorf("cannot reference %s without a symbol", name)
    740 		}
    741 		p.setPseudoRegister(a, name, false, prefix)
    742 		return
    743 	}
    744 	a.Reg = r1
    745 	if r2 != 0 {
    746 		// TODO: Consistency in the encoding would be nice here.
    747 		if p.arch.InFamily(sys.ARM, sys.ARM64) {
    748 			// Special form
    749 			// ARM: destination register pair (R1, R2).
    750 			// ARM64: register pair (R1, R2) for LDP/STP.
    751 			if prefix != 0 || scale != 0 {
    752 				p.errorf("illegal address mode for register pair")
    753 				return
    754 			}
    755 			a.Type = obj.TYPE_REGREG
    756 			a.Offset = int64(r2)
    757 			// Nothing may follow
    758 			return
    759 		}
    760 		if p.arch.Family == sys.PPC64 {
    761 			// Special form for PPC64: (R1+R2); alias for (R1)(R2*1).
    762 			if prefix != 0 || scale != 0 {
    763 				p.errorf("illegal address mode for register+register")
    764 				return
    765 			}
    766 			a.Type = obj.TYPE_MEM
    767 			a.Scale = 1
    768 			a.Index = r2
    769 			// Nothing may follow.
    770 			return
    771 		}
    772 	}
    773 	if r2 != 0 {
    774 		p.errorf("indirect through register pair")
    775 	}
    776 	if prefix == '$' {
    777 		a.Type = obj.TYPE_ADDR
    778 	}
    779 	if r1 == arch.RPC && prefix != 0 {
    780 		p.errorf("illegal addressing mode for PC")
    781 	}
    782 	if scale == 0 && p.peek() == '(' {
    783 		// General form (R)(R*scale).
    784 		p.next()
    785 		tok := p.next()
    786 		r1, r2, scale, ok = p.register(tok.String(), 0)
    787 		if !ok {
    788 			p.errorf("indirect through non-register %s", tok)
    789 		}
    790 		if r2 != 0 {
    791 			p.errorf("unimplemented two-register form")
    792 		}
    793 		a.Index = r1
    794 		if scale == 0 && p.arch.Family == sys.ARM64 {
    795 			// scale is 1 by default for ARM64
    796 			a.Scale = 1
    797 		} else {
    798 			a.Scale = int16(scale)
    799 		}
    800 		p.get(')')
    801 	} else if scale != 0 {
    802 		// First (R) was missing, all we have is (R*scale).
    803 		a.Reg = 0
    804 		a.Index = r1
    805 		a.Scale = int16(scale)
    806 	}
    807 }
    808 
    809 // registerList parses an ARM or ARM64 register list expression, a list of
    810 // registers in []. There may be comma-separated ranges or individual
    811 // registers, as in [R1,R3-R5] or [V1.S4, V2.S4, V3.S4, V4.S4].
    812 // For ARM, only R0 through R15 may appear.
    813 // For ARM64, V0 through V31 with arrangement may appear.
    814 // The opening bracket has been consumed.
    815 func (p *Parser) registerList(a *obj.Addr) {
    816 	// One range per loop.
    817 	var maxReg int
    818 	var bits uint16
    819 	var arrangement int64
    820 	switch p.arch.Family {
    821 	case sys.ARM:
    822 		maxReg = 16
    823 	case sys.ARM64:
    824 		maxReg = 32
    825 	default:
    826 		p.errorf("unexpected register list")
    827 	}
    828 	firstReg := -1
    829 	nextReg := -1
    830 	regCnt := 0
    831 ListLoop:
    832 	for {
    833 		tok := p.next()
    834 		switch tok.ScanToken {
    835 		case ']':
    836 			break ListLoop
    837 		case scanner.EOF:
    838 			p.errorf("missing ']' in register list")
    839 			return
    840 		}
    841 		switch p.arch.Family {
    842 		case sys.ARM64:
    843 			// Vn.T
    844 			name := tok.String()
    845 			r, ok := p.registerReference(name)
    846 			if !ok {
    847 				p.errorf("invalid register: %s", name)
    848 			}
    849 			reg := r - p.arch.Register["V0"]
    850 			p.get('.')
    851 			tok := p.next()
    852 			ext := tok.String()
    853 			curArrangement, err := arch.ARM64RegisterArrangement(reg, name, ext)
    854 			if err != nil {
    855 				p.errorf(err.Error())
    856 			}
    857 			if firstReg == -1 {
    858 				// only record the first register and arrangement
    859 				firstReg = int(reg)
    860 				nextReg = firstReg
    861 				arrangement = curArrangement
    862 			} else if curArrangement != arrangement {
    863 				p.errorf("inconsistent arrangement in ARM64 register list")
    864 			} else if nextReg != int(reg) {
    865 				p.errorf("incontiguous register in ARM64 register list: %s", name)
    866 			}
    867 			regCnt++
    868 			nextReg = (nextReg + 1) % 32
    869 		case sys.ARM:
    870 			// Parse the upper and lower bounds.
    871 			lo := p.registerNumber(tok.String())
    872 			hi := lo
    873 			if p.peek() == '-' {
    874 				p.next()
    875 				hi = p.registerNumber(p.next().String())
    876 			}
    877 			if hi < lo {
    878 				lo, hi = hi, lo
    879 			}
    880 			// Check there are no duplicates in the register list.
    881 			for i := 0; lo <= hi && i < maxReg; i++ {
    882 				if bits&(1<<lo) != 0 {
    883 					p.errorf("register R%d already in list", lo)
    884 				}
    885 				bits |= 1 << lo
    886 				lo++
    887 			}
    888 		default:
    889 			p.errorf("unexpected register list")
    890 		}
    891 		if p.peek() != ']' {
    892 			p.get(',')
    893 		}
    894 	}
    895 	a.Type = obj.TYPE_REGLIST
    896 	switch p.arch.Family {
    897 	case sys.ARM:
    898 		a.Offset = int64(bits)
    899 	case sys.ARM64:
    900 		offset, err := arch.ARM64RegisterListOffset(firstReg, regCnt, arrangement)
    901 		if err != nil {
    902 			p.errorf(err.Error())
    903 		}
    904 		a.Offset = offset
    905 	default:
    906 		p.errorf("register list not supported on this architecuture")
    907 	}
    908 }
    909 
    910 // register number is ARM-specific. It returns the number of the specified register.
    911 func (p *Parser) registerNumber(name string) uint16 {
    912 	if p.arch.Family == sys.ARM && name == "g" {
    913 		return 10
    914 	}
    915 	if name[0] != 'R' {
    916 		p.errorf("expected g or R0 through R15; found %s", name)
    917 		return 0
    918 	}
    919 	r, ok := p.registerReference(name)
    920 	if !ok {
    921 		return 0
    922 	}
    923 	reg := r - p.arch.Register["R0"]
    924 	if reg < 0 {
    925 		// Could happen for an architecture having other registers prefixed by R
    926 		p.errorf("expected g or R0 through R15; found %s", name)
    927 		return 0
    928 	}
    929 	return uint16(reg)
    930 }
    931 
    932 // Note: There are two changes in the expression handling here
    933 // compared to the old yacc/C implementations. Neither has
    934 // much practical consequence because the expressions we
    935 // see in assembly code are simple, but for the record:
    936 //
    937 // 1) Evaluation uses uint64; the old one used int64.
    938 // 2) Precedence uses Go rules not C rules.
    939 
    940 // expr = term | term ('+' | '-' | '|' | '^') term.
    941 func (p *Parser) expr() uint64 {
    942 	value := p.term()
    943 	for {
    944 		switch p.peek() {
    945 		case '+':
    946 			p.next()
    947 			value += p.term()
    948 		case '-':
    949 			p.next()
    950 			value -= p.term()
    951 		case '|':
    952 			p.next()
    953 			value |= p.term()
    954 		case '^':
    955 			p.next()
    956 			value ^= p.term()
    957 		default:
    958 			return value
    959 		}
    960 	}
    961 }
    962 
    963 // floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')'
    964 func (p *Parser) floatExpr() float64 {
    965 	tok := p.next()
    966 	switch tok.ScanToken {
    967 	case '(':
    968 		v := p.floatExpr()
    969 		if p.next().ScanToken != ')' {
    970 			p.errorf("missing closing paren")
    971 		}
    972 		return v
    973 	case '+':
    974 		return +p.floatExpr()
    975 	case '-':
    976 		return -p.floatExpr()
    977 	case scanner.Float:
    978 		return p.atof(tok.String())
    979 	}
    980 	p.errorf("unexpected %s evaluating float expression", tok)
    981 	return 0
    982 }
    983 
    984 // term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor
    985 func (p *Parser) term() uint64 {
    986 	value := p.factor()
    987 	for {
    988 		switch p.peek() {
    989 		case '*':
    990 			p.next()
    991 			value *= p.factor()
    992 		case '/':
    993 			p.next()
    994 			if int64(value) < 0 {
    995 				p.errorf("divide of value with high bit set")
    996 			}
    997 			divisor := p.factor()
    998 			if divisor == 0 {
    999 				p.errorf("division by zero")
   1000 			} else {
   1001 				value /= divisor
   1002 			}
   1003 		case '%':
   1004 			p.next()
   1005 			divisor := p.factor()
   1006 			if int64(value) < 0 {
   1007 				p.errorf("modulo of value with high bit set")
   1008 			}
   1009 			if divisor == 0 {
   1010 				p.errorf("modulo by zero")
   1011 			} else {
   1012 				value %= divisor
   1013 			}
   1014 		case lex.LSH:
   1015 			p.next()
   1016 			shift := p.factor()
   1017 			if int64(shift) < 0 {
   1018 				p.errorf("negative left shift count")
   1019 			}
   1020 			return value << shift
   1021 		case lex.RSH:
   1022 			p.next()
   1023 			shift := p.term()
   1024 			if int64(shift) < 0 {
   1025 				p.errorf("negative right shift count")
   1026 			}
   1027 			if int64(value) < 0 {
   1028 				p.errorf("right shift of value with high bit set")
   1029 			}
   1030 			value >>= shift
   1031 		case '&':
   1032 			p.next()
   1033 			value &= p.factor()
   1034 		default:
   1035 			return value
   1036 		}
   1037 	}
   1038 }
   1039 
   1040 // factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')'
   1041 func (p *Parser) factor() uint64 {
   1042 	tok := p.next()
   1043 	switch tok.ScanToken {
   1044 	case scanner.Int:
   1045 		return p.atoi(tok.String())
   1046 	case scanner.Char:
   1047 		str, err := strconv.Unquote(tok.String())
   1048 		if err != nil {
   1049 			p.errorf("%s", err)
   1050 		}
   1051 		r, w := utf8.DecodeRuneInString(str)
   1052 		if w == 1 && r == utf8.RuneError {
   1053 			p.errorf("illegal UTF-8 encoding for character constant")
   1054 		}
   1055 		return uint64(r)
   1056 	case '+':
   1057 		return +p.factor()
   1058 	case '-':
   1059 		return -p.factor()
   1060 	case '~':
   1061 		return ^p.factor()
   1062 	case '(':
   1063 		v := p.expr()
   1064 		if p.next().ScanToken != ')' {
   1065 			p.errorf("missing closing paren")
   1066 		}
   1067 		return v
   1068 	}
   1069 	p.errorf("unexpected %s evaluating expression", tok)
   1070 	return 0
   1071 }
   1072 
   1073 // positiveAtoi returns an int64 that must be >= 0.
   1074 func (p *Parser) positiveAtoi(str string) int64 {
   1075 	value, err := strconv.ParseInt(str, 0, 64)
   1076 	if err != nil {
   1077 		p.errorf("%s", err)
   1078 	}
   1079 	if value < 0 {
   1080 		p.errorf("%s overflows int64", str)
   1081 	}
   1082 	return value
   1083 }
   1084 
   1085 func (p *Parser) atoi(str string) uint64 {
   1086 	value, err := strconv.ParseUint(str, 0, 64)
   1087 	if err != nil {
   1088 		p.errorf("%s", err)
   1089 	}
   1090 	return value
   1091 }
   1092 
   1093 func (p *Parser) atof(str string) float64 {
   1094 	value, err := strconv.ParseFloat(str, 64)
   1095 	if err != nil {
   1096 		p.errorf("%s", err)
   1097 	}
   1098 	return value
   1099 }
   1100 
   1101 // EOF represents the end of input.
   1102 var EOF = lex.Make(scanner.EOF, "EOF")
   1103 
   1104 func (p *Parser) next() lex.Token {
   1105 	if !p.more() {
   1106 		return EOF
   1107 	}
   1108 	tok := p.input[p.inputPos]
   1109 	p.inputPos++
   1110 	return tok
   1111 }
   1112 
   1113 func (p *Parser) back() {
   1114 	if p.inputPos == 0 {
   1115 		p.errorf("internal error: backing up before BOL")
   1116 	} else {
   1117 		p.inputPos--
   1118 	}
   1119 }
   1120 
   1121 func (p *Parser) peek() lex.ScanToken {
   1122 	if p.more() {
   1123 		return p.input[p.inputPos].ScanToken
   1124 	}
   1125 	return scanner.EOF
   1126 }
   1127 
   1128 func (p *Parser) more() bool {
   1129 	return p.inputPos < len(p.input)
   1130 }
   1131 
   1132 // get verifies that the next item has the expected type and returns it.
   1133 func (p *Parser) get(expected lex.ScanToken) lex.Token {
   1134 	p.expect(expected, expected.String())
   1135 	return p.next()
   1136 }
   1137 
   1138 // expectOperandEnd verifies that the parsing state is properly at the end of an operand.
   1139 func (p *Parser) expectOperandEnd() {
   1140 	p.expect(scanner.EOF, "end of operand")
   1141 }
   1142 
   1143 // expect verifies that the next item has the expected type. It does not consume it.
   1144 func (p *Parser) expect(expectedToken lex.ScanToken, expectedMessage string) {
   1145 	if p.peek() != expectedToken {
   1146 		p.errorf("expected %s, found %s", expectedMessage, p.next())
   1147 	}
   1148 }
   1149 
   1150 // have reports whether the remaining tokens (including the current one) contain the specified token.
   1151 func (p *Parser) have(token lex.ScanToken) bool {
   1152 	for i := p.inputPos; i < len(p.input); i++ {
   1153 		if p.input[i].ScanToken == token {
   1154 			return true
   1155 		}
   1156 	}
   1157 	return false
   1158 }
   1159 
   1160 // at reports whether the next tokens are as requested.
   1161 func (p *Parser) at(next ...lex.ScanToken) bool {
   1162 	if len(p.input)-p.inputPos < len(next) {
   1163 		return false
   1164 	}
   1165 	for i, r := range next {
   1166 		if p.input[p.inputPos+i].ScanToken != r {
   1167 			return false
   1168 		}
   1169 	}
   1170 	return true
   1171 }
   1172