1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package asm implements the parser and instruction generator for the assembler. 6 // TODO: Split apart? 7 package asm 8 9 import ( 10 "fmt" 11 "io" 12 "log" 13 "os" 14 "strconv" 15 "text/scanner" 16 "unicode/utf8" 17 18 "cmd/asm/internal/arch" 19 "cmd/asm/internal/flags" 20 "cmd/asm/internal/lex" 21 "cmd/internal/obj" 22 "cmd/internal/src" 23 "cmd/internal/sys" 24 ) 25 26 type Parser struct { 27 lex lex.TokenReader 28 lineNum int // Line number in source file. 29 errorLine int // Line number of last error. 30 errorCount int // Number of errors. 31 pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. 32 input []lex.Token 33 inputPos int 34 pendingLabels []string // Labels to attach to next instruction. 35 labels map[string]*obj.Prog 36 toPatch []Patch 37 addr []obj.Addr 38 arch *arch.Arch 39 ctxt *obj.Link 40 firstProg *obj.Prog 41 lastProg *obj.Prog 42 dataAddr map[string]int64 // Most recent address for DATA for this symbol. 43 isJump bool // Instruction being assembled is a jump. 44 errorWriter io.Writer 45 } 46 47 type Patch struct { 48 prog *obj.Prog 49 label string 50 } 51 52 func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser { 53 return &Parser{ 54 ctxt: ctxt, 55 arch: ar, 56 lex: lexer, 57 labels: make(map[string]*obj.Prog), 58 dataAddr: make(map[string]int64), 59 errorWriter: os.Stderr, 60 } 61 } 62 63 // panicOnError is enabled when testing to abort execution on the first error 64 // and turn it into a recoverable panic. 65 var panicOnError bool 66 67 func (p *Parser) errorf(format string, args ...interface{}) { 68 if panicOnError { 69 panic(fmt.Errorf(format, args...)) 70 } 71 if p.lineNum == p.errorLine { 72 // Only one error per line. 73 return 74 } 75 p.errorLine = p.lineNum 76 if p.lex != nil { 77 // Put file and line information on head of message. 78 format = "%s:%d: " + format + "\n" 79 args = append([]interface{}{p.lex.File(), p.lineNum}, args...) 80 } 81 fmt.Fprintf(p.errorWriter, format, args...) 82 p.errorCount++ 83 if p.errorCount > 10 && !*flags.AllErrors { 84 log.Fatal("too many errors") 85 } 86 } 87 88 func (p *Parser) pos() src.XPos { 89 return p.ctxt.PosTable.XPos(src.MakePos(p.lex.Base(), uint(p.lineNum), 0)) 90 } 91 92 func (p *Parser) Parse() (*obj.Prog, bool) { 93 for p.line() { 94 } 95 if p.errorCount > 0 { 96 return nil, false 97 } 98 p.patch() 99 return p.firstProg, true 100 } 101 102 // WORD [ arg {, arg} ] (';' | '\n') 103 func (p *Parser) line() bool { 104 // Skip newlines. 105 var tok lex.ScanToken 106 for { 107 tok = p.lex.Next() 108 // We save the line number here so error messages from this instruction 109 // are labeled with this line. Otherwise we complain after we've absorbed 110 // the terminating newline and the line numbers are off by one in errors. 111 p.lineNum = p.lex.Line() 112 switch tok { 113 case '\n', ';': 114 continue 115 case scanner.EOF: 116 return false 117 } 118 break 119 } 120 // First item must be an identifier. 121 if tok != scanner.Ident { 122 p.errorf("expected identifier, found %q", p.lex.Text()) 123 return false // Might as well stop now. 124 } 125 word := p.lex.Text() 126 var cond string 127 operands := make([][]lex.Token, 0, 3) 128 // Zero or more comma-separated operands, one per loop. 129 nesting := 0 130 colon := -1 131 for tok != '\n' && tok != ';' { 132 // Process one operand. 133 items := make([]lex.Token, 0, 3) 134 for { 135 tok = p.lex.Next() 136 if len(operands) == 0 && len(items) == 0 { 137 if p.arch.InFamily(sys.ARM, sys.ARM64) && tok == '.' { 138 // ARM conditionals. 139 tok = p.lex.Next() 140 str := p.lex.Text() 141 if tok != scanner.Ident { 142 p.errorf("ARM condition expected identifier, found %s", str) 143 } 144 cond = cond + "." + str 145 continue 146 } 147 if tok == ':' { 148 // Labels. 149 p.pendingLabels = append(p.pendingLabels, word) 150 return true 151 } 152 } 153 if tok == scanner.EOF { 154 p.errorf("unexpected EOF") 155 return false 156 } 157 // Split operands on comma. Also, the old syntax on x86 for a "register pair" 158 // was AX:DX, for which the new syntax is DX, AX. Note the reordering. 159 if tok == '\n' || tok == ';' || (nesting == 0 && (tok == ',' || tok == ':')) { 160 if tok == ':' { 161 // Remember this location so we can swap the operands below. 162 if colon >= 0 { 163 p.errorf("invalid ':' in operand") 164 return true 165 } 166 colon = len(operands) 167 } 168 break 169 } 170 if tok == '(' || tok == '[' { 171 nesting++ 172 } 173 if tok == ')' || tok == ']' { 174 nesting-- 175 } 176 items = append(items, lex.Make(tok, p.lex.Text())) 177 } 178 if len(items) > 0 { 179 operands = append(operands, items) 180 if colon >= 0 && len(operands) == colon+2 { 181 // AX:DX becomes DX, AX. 182 operands[colon], operands[colon+1] = operands[colon+1], operands[colon] 183 colon = -1 184 } 185 } else if len(operands) > 0 || tok == ',' || colon >= 0 { 186 // Had a separator with nothing after. 187 p.errorf("missing operand") 188 } 189 } 190 if p.pseudo(word, operands) { 191 return true 192 } 193 i, present := p.arch.Instructions[word] 194 if present { 195 p.instruction(i, word, cond, operands) 196 return true 197 } 198 p.errorf("unrecognized instruction %q", word) 199 return true 200 } 201 202 func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) { 203 p.addr = p.addr[0:0] 204 p.isJump = p.arch.IsJump(word) 205 for _, op := range operands { 206 addr := p.address(op) 207 if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo. 208 p.errorf("illegal use of pseudo-register in %s", word) 209 } 210 p.addr = append(p.addr, addr) 211 } 212 if p.isJump { 213 p.asmJump(op, cond, p.addr) 214 return 215 } 216 p.asmInstruction(op, cond, p.addr) 217 } 218 219 func (p *Parser) pseudo(word string, operands [][]lex.Token) bool { 220 switch word { 221 case "DATA": 222 p.asmData(word, operands) 223 case "FUNCDATA": 224 p.asmFuncData(word, operands) 225 case "GLOBL": 226 p.asmGlobl(word, operands) 227 case "PCDATA": 228 p.asmPCData(word, operands) 229 case "TEXT": 230 p.asmText(word, operands) 231 default: 232 return false 233 } 234 return true 235 } 236 237 func (p *Parser) start(operand []lex.Token) { 238 p.input = operand 239 p.inputPos = 0 240 } 241 242 // address parses the operand into a link address structure. 243 func (p *Parser) address(operand []lex.Token) obj.Addr { 244 p.start(operand) 245 addr := obj.Addr{} 246 p.operand(&addr) 247 return addr 248 } 249 250 // parseScale converts a decimal string into a valid scale factor. 251 func (p *Parser) parseScale(s string) int8 { 252 switch s { 253 case "1", "2", "4", "8": 254 return int8(s[0] - '0') 255 } 256 p.errorf("bad scale: %s", s) 257 return 0 258 } 259 260 // operand parses a general operand and stores the result in *a. 261 func (p *Parser) operand(a *obj.Addr) { 262 //fmt.Printf("Operand: %v\n", p.input) 263 if len(p.input) == 0 { 264 p.errorf("empty operand: cannot happen") 265 return 266 } 267 // General address (with a few exceptions) looks like 268 // $symoffset(SB)(reg)(index*scale) 269 // Exceptions are: 270 // 271 // R1 272 // offset 273 // $offset 274 // Every piece is optional, so we scan left to right and what 275 // we discover tells us where we are. 276 277 // Prefix: $. 278 var prefix rune 279 switch tok := p.peek(); tok { 280 case '$', '*': 281 prefix = rune(tok) 282 p.next() 283 } 284 285 // Symbol: symoffset(SB) 286 tok := p.next() 287 name := tok.String() 288 if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) { 289 // We have a symbol. Parse $symoffset(symkind) 290 p.symbolReference(a, name, prefix) 291 // fmt.Printf("SYM %s\n", obj.Dconv(&emptyProg, 0, a)) 292 if p.peek() == scanner.EOF { 293 return 294 } 295 } 296 297 // Special register list syntax for arm: [R1,R3-R7] 298 if tok.ScanToken == '[' { 299 if prefix != 0 { 300 p.errorf("illegal use of register list") 301 } 302 p.registerList(a) 303 p.expectOperandEnd() 304 return 305 } 306 307 // Register: R1 308 if tok.ScanToken == scanner.Ident && p.atStartOfRegister(name) { 309 if p.atRegisterShift() { 310 // ARM shifted register such as R1<<R2 or R1>>2. 311 a.Type = obj.TYPE_SHIFT 312 a.Offset = p.registerShift(tok.String(), prefix) 313 if p.peek() == '(' { 314 // Can only be a literal register here. 315 p.next() 316 tok := p.next() 317 name := tok.String() 318 if !p.atStartOfRegister(name) { 319 p.errorf("expected register; found %s", name) 320 } 321 a.Reg, _ = p.registerReference(name) 322 p.get(')') 323 } 324 } else if p.atRegisterExtension() { 325 p.registerExtension(a, tok.String(), prefix) 326 p.expectOperandEnd() 327 return 328 } else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok { 329 if scale != 0 { 330 p.errorf("expected simple register reference") 331 } 332 a.Type = obj.TYPE_REG 333 a.Reg = r1 334 if r2 != 0 { 335 // Form is R1:R2. It is on RHS and the second register 336 // needs to go into the LHS. 337 panic("cannot happen (Addr.Reg2)") 338 } 339 } 340 // fmt.Printf("REG %s\n", obj.Dconv(&emptyProg, 0, a)) 341 p.expectOperandEnd() 342 return 343 } 344 345 // Constant. 346 haveConstant := false 347 switch tok.ScanToken { 348 case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~': 349 haveConstant = true 350 case '(': 351 // Could be parenthesized expression or (R). Must be something, though. 352 tok := p.next() 353 if tok.ScanToken == scanner.EOF { 354 p.errorf("missing right parenthesis") 355 return 356 } 357 rname := tok.String() 358 p.back() 359 haveConstant = !p.atStartOfRegister(rname) 360 if !haveConstant { 361 p.back() // Put back the '('. 362 } 363 } 364 if haveConstant { 365 p.back() 366 if p.have(scanner.Float) { 367 if prefix != '$' { 368 p.errorf("floating-point constant must be an immediate") 369 } 370 a.Type = obj.TYPE_FCONST 371 a.Val = p.floatExpr() 372 // fmt.Printf("FCONST %s\n", obj.Dconv(&emptyProg, 0, a)) 373 p.expectOperandEnd() 374 return 375 } 376 if p.have(scanner.String) { 377 if prefix != '$' { 378 p.errorf("string constant must be an immediate") 379 return 380 } 381 str, err := strconv.Unquote(p.get(scanner.String).String()) 382 if err != nil { 383 p.errorf("string parse error: %s", err) 384 } 385 a.Type = obj.TYPE_SCONST 386 a.Val = str 387 // fmt.Printf("SCONST %s\n", obj.Dconv(&emptyProg, 0, a)) 388 p.expectOperandEnd() 389 return 390 } 391 a.Offset = int64(p.expr()) 392 if p.peek() != '(' { 393 switch prefix { 394 case '$': 395 a.Type = obj.TYPE_CONST 396 case '*': 397 a.Type = obj.TYPE_INDIR // Can appear but is illegal, will be rejected by the linker. 398 default: 399 a.Type = obj.TYPE_MEM 400 } 401 // fmt.Printf("CONST %d %s\n", a.Offset, obj.Dconv(&emptyProg, 0, a)) 402 p.expectOperandEnd() 403 return 404 } 405 // fmt.Printf("offset %d \n", a.Offset) 406 } 407 408 // Register indirection: (reg) or (index*scale). We are on the opening paren. 409 p.registerIndirect(a, prefix) 410 // fmt.Printf("DONE %s\n", p.arch.Dconv(&emptyProg, 0, a)) 411 412 p.expectOperandEnd() 413 return 414 } 415 416 // atStartOfRegister reports whether the parser is at the start of a register definition. 417 func (p *Parser) atStartOfRegister(name string) bool { 418 // Simple register: R10. 419 _, present := p.arch.Register[name] 420 if present { 421 return true 422 } 423 // Parenthesized register: R(10). 424 return p.arch.RegisterPrefix[name] && p.peek() == '(' 425 } 426 427 // atRegisterShift reports whether we are at the start of an ARM shifted register. 428 // We have consumed the register or R prefix. 429 func (p *Parser) atRegisterShift() bool { 430 // ARM only. 431 if !p.arch.InFamily(sys.ARM, sys.ARM64) { 432 return false 433 } 434 // R1<<... 435 if lex.IsRegisterShift(p.peek()) { 436 return true 437 } 438 // R(1)<<... Ugly check. TODO: Rethink how we handle ARM register shifts to be 439 // less special. 440 if p.peek() != '(' || len(p.input)-p.inputPos < 4 { 441 return false 442 } 443 return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken) 444 } 445 446 // atRegisterExtension reports whether we are at the start of an ARM64 extended register. 447 // We have consumed the register or R prefix. 448 func (p *Parser) atRegisterExtension() bool { 449 // ARM64 only. 450 if p.arch.Family != sys.ARM64 { 451 return false 452 } 453 // R1.xxx 454 if p.peek() == '.' { 455 return true 456 } 457 return false 458 } 459 460 // registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10). 461 func (p *Parser) registerReference(name string) (int16, bool) { 462 r, present := p.arch.Register[name] 463 if present { 464 return r, true 465 } 466 if !p.arch.RegisterPrefix[name] { 467 p.errorf("expected register; found %s", name) 468 return 0, false 469 } 470 p.get('(') 471 tok := p.get(scanner.Int) 472 num, err := strconv.ParseInt(tok.String(), 10, 16) 473 p.get(')') 474 if err != nil { 475 p.errorf("parsing register list: %s", err) 476 return 0, false 477 } 478 r, ok := p.arch.RegisterNumber(name, int16(num)) 479 if !ok { 480 p.errorf("illegal register %s(%d)", name, r) 481 return 0, false 482 } 483 return r, true 484 } 485 486 // register parses a full register reference where there is no symbol present (as in 4(R0) or R(10) but not sym(SB)) 487 // including forms involving multiple registers such as R1:R2. 488 func (p *Parser) register(name string, prefix rune) (r1, r2 int16, scale int8, ok bool) { 489 // R1 or R(1) R1:R2 R1,R2 R1+R2, or R1*scale. 490 r1, ok = p.registerReference(name) 491 if !ok { 492 return 493 } 494 if prefix != 0 && prefix != '*' { // *AX is OK. 495 p.errorf("prefix %c not allowed for register: %c%s", prefix, prefix, name) 496 } 497 c := p.peek() 498 if c == ':' || c == ',' || c == '+' { 499 // 2nd register; syntax (R1+R2) etc. No two architectures agree. 500 // Check the architectures match the syntax. 501 switch p.next().ScanToken { 502 case ',': 503 if !p.arch.InFamily(sys.ARM, sys.ARM64) { 504 p.errorf("(register,register) not supported on this architecture") 505 return 506 } 507 case '+': 508 if p.arch.Family != sys.PPC64 { 509 p.errorf("(register+register) not supported on this architecture") 510 return 511 } 512 } 513 name := p.next().String() 514 r2, ok = p.registerReference(name) 515 if !ok { 516 return 517 } 518 } 519 if p.peek() == '*' { 520 // Scale 521 p.next() 522 scale = p.parseScale(p.next().String()) 523 } 524 return r1, r2, scale, true 525 } 526 527 // registerShift parses an ARM/ARM64 shifted register reference and returns the encoded representation. 528 // There is known to be a register (current token) and a shift operator (peeked token). 529 func (p *Parser) registerShift(name string, prefix rune) int64 { 530 if prefix != 0 { 531 p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name) 532 } 533 // R1 op R2 or r1 op constant. 534 // op is: 535 // "<<" == 0 536 // ">>" == 1 537 // "->" == 2 538 // "@>" == 3 539 r1, ok := p.registerReference(name) 540 if !ok { 541 return 0 542 } 543 var op int16 544 switch p.next().ScanToken { 545 case lex.LSH: 546 op = 0 547 case lex.RSH: 548 op = 1 549 case lex.ARR: 550 op = 2 551 case lex.ROT: 552 // following instructions on ARM64 support rotate right 553 // AND, ANDS, TST, BIC, BICS, EON, EOR, ORR, MVN, ORN 554 op = 3 555 } 556 tok := p.next() 557 str := tok.String() 558 var count int16 559 switch tok.ScanToken { 560 case scanner.Ident: 561 if p.arch.Family == sys.ARM64 { 562 p.errorf("rhs of shift must be integer: %s", str) 563 } else { 564 r2, ok := p.registerReference(str) 565 if !ok { 566 p.errorf("rhs of shift must be register or integer: %s", str) 567 } 568 count = (r2&15)<<8 | 1<<4 569 } 570 case scanner.Int, '(': 571 p.back() 572 x := int64(p.expr()) 573 if p.arch.Family == sys.ARM64 { 574 if x >= 64 { 575 p.errorf("register shift count too large: %s", str) 576 } 577 count = int16((x & 63) << 10) 578 } else { 579 if x >= 32 { 580 p.errorf("register shift count too large: %s", str) 581 } 582 count = int16((x & 31) << 7) 583 } 584 default: 585 p.errorf("unexpected %s in register shift", tok.String()) 586 } 587 if p.arch.Family == sys.ARM64 { 588 return int64(int64(r1&31)<<16 | int64(op)<<22 | int64(uint16(count))) 589 } else { 590 return int64((r1 & 15) | op<<5 | count) 591 } 592 } 593 594 // registerExtension parses a register with extension or arrangment. 595 // There is known to be a register (current token) and an extension operator (peeked token). 596 func (p *Parser) registerExtension(a *obj.Addr, name string, prefix rune) { 597 if prefix != 0 { 598 p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name) 599 } 600 601 reg, ok := p.registerReference(name) 602 if !ok { 603 p.errorf("unexpected %s in register extension", name) 604 return 605 } 606 607 p.get('.') 608 tok := p.next() 609 ext := tok.String() 610 isIndex := false 611 num := int16(0) 612 isAmount := true // Amount is zero by default 613 if p.peek() == lex.LSH { 614 // parses left shift amount applied after extension: <<Amount 615 p.get(lex.LSH) 616 tok := p.get(scanner.Int) 617 amount, err := strconv.ParseInt(tok.String(), 10, 16) 618 if err != nil { 619 p.errorf("parsing left shift amount: %s", err) 620 } 621 num = int16(amount) 622 } else if p.peek() == '[' { 623 // parses an element: [Index] 624 p.get('[') 625 tok := p.get(scanner.Int) 626 index, err := strconv.ParseInt(tok.String(), 10, 16) 627 p.get(']') 628 if err != nil { 629 p.errorf("parsing element index: %s", err) 630 } 631 isIndex = true 632 isAmount = false 633 num = int16(index) 634 } 635 636 switch p.arch.Family { 637 case sys.ARM64: 638 err := arch.ARM64RegisterExtension(a, ext, reg, num, isAmount, isIndex) 639 if err != nil { 640 p.errorf(err.Error()) 641 } 642 default: 643 p.errorf("register extension not supported on this architecture") 644 } 645 } 646 647 // symbolReference parses a symbol that is known not to be a register. 648 func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) { 649 // Identifier is a name. 650 switch prefix { 651 case 0: 652 a.Type = obj.TYPE_MEM 653 case '$': 654 a.Type = obj.TYPE_ADDR 655 case '*': 656 a.Type = obj.TYPE_INDIR 657 } 658 // Weirdness with statics: Might now have "<>". 659 isStatic := false 660 if p.peek() == '<' { 661 isStatic = true 662 p.next() 663 p.get('>') 664 } 665 if p.peek() == '+' || p.peek() == '-' { 666 a.Offset = int64(p.expr()) 667 } 668 if isStatic { 669 a.Sym = p.ctxt.LookupStatic(name) 670 } else { 671 a.Sym = p.ctxt.Lookup(name) 672 } 673 if p.peek() == scanner.EOF { 674 if prefix == 0 && p.isJump { 675 // Symbols without prefix or suffix are jump labels. 676 return 677 } 678 p.errorf("illegal or missing addressing mode for symbol %s", name) 679 return 680 } 681 // Expect (SB), (FP), (PC), or (SP) 682 p.get('(') 683 reg := p.get(scanner.Ident).String() 684 p.get(')') 685 p.setPseudoRegister(a, reg, isStatic, prefix) 686 } 687 688 // setPseudoRegister sets the NAME field of addr for a pseudo-register reference such as (SB). 689 func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, prefix rune) { 690 if addr.Reg != 0 { 691 p.errorf("internal error: reg %s already set in pseudo", reg) 692 } 693 switch reg { 694 case "FP": 695 addr.Name = obj.NAME_PARAM 696 case "PC": 697 if prefix != 0 { 698 p.errorf("illegal addressing mode for PC") 699 } 700 addr.Type = obj.TYPE_BRANCH // We set the type and leave NAME untouched. See asmJump. 701 case "SB": 702 addr.Name = obj.NAME_EXTERN 703 if isStatic { 704 addr.Name = obj.NAME_STATIC 705 } 706 case "SP": 707 addr.Name = obj.NAME_AUTO // The pseudo-stack. 708 default: 709 p.errorf("expected pseudo-register; found %s", reg) 710 } 711 if prefix == '$' { 712 addr.Type = obj.TYPE_ADDR 713 } 714 } 715 716 // registerIndirect parses the general form of a register indirection. 717 // It is can be (R1), (R2*scale), or (R1)(R2*scale) where R1 may be a simple 718 // register or register pair R:R or (R, R) or (R+R). 719 // Or it might be a pseudo-indirection like (FP). 720 // We are sitting on the opening parenthesis. 721 func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) { 722 p.get('(') 723 tok := p.next() 724 name := tok.String() 725 r1, r2, scale, ok := p.register(name, 0) 726 if !ok { 727 p.errorf("indirect through non-register %s", tok) 728 } 729 p.get(')') 730 a.Type = obj.TYPE_MEM 731 if r1 < 0 { 732 // Pseudo-register reference. 733 if r2 != 0 { 734 p.errorf("cannot use pseudo-register in pair") 735 return 736 } 737 // For SB, SP, and FP, there must be a name here. 0(FP) is not legal. 738 if name != "PC" && a.Name == obj.NAME_NONE { 739 p.errorf("cannot reference %s without a symbol", name) 740 } 741 p.setPseudoRegister(a, name, false, prefix) 742 return 743 } 744 a.Reg = r1 745 if r2 != 0 { 746 // TODO: Consistency in the encoding would be nice here. 747 if p.arch.InFamily(sys.ARM, sys.ARM64) { 748 // Special form 749 // ARM: destination register pair (R1, R2). 750 // ARM64: register pair (R1, R2) for LDP/STP. 751 if prefix != 0 || scale != 0 { 752 p.errorf("illegal address mode for register pair") 753 return 754 } 755 a.Type = obj.TYPE_REGREG 756 a.Offset = int64(r2) 757 // Nothing may follow 758 return 759 } 760 if p.arch.Family == sys.PPC64 { 761 // Special form for PPC64: (R1+R2); alias for (R1)(R2*1). 762 if prefix != 0 || scale != 0 { 763 p.errorf("illegal address mode for register+register") 764 return 765 } 766 a.Type = obj.TYPE_MEM 767 a.Scale = 1 768 a.Index = r2 769 // Nothing may follow. 770 return 771 } 772 } 773 if r2 != 0 { 774 p.errorf("indirect through register pair") 775 } 776 if prefix == '$' { 777 a.Type = obj.TYPE_ADDR 778 } 779 if r1 == arch.RPC && prefix != 0 { 780 p.errorf("illegal addressing mode for PC") 781 } 782 if scale == 0 && p.peek() == '(' { 783 // General form (R)(R*scale). 784 p.next() 785 tok := p.next() 786 r1, r2, scale, ok = p.register(tok.String(), 0) 787 if !ok { 788 p.errorf("indirect through non-register %s", tok) 789 } 790 if r2 != 0 { 791 p.errorf("unimplemented two-register form") 792 } 793 a.Index = r1 794 if scale == 0 && p.arch.Family == sys.ARM64 { 795 // scale is 1 by default for ARM64 796 a.Scale = 1 797 } else { 798 a.Scale = int16(scale) 799 } 800 p.get(')') 801 } else if scale != 0 { 802 // First (R) was missing, all we have is (R*scale). 803 a.Reg = 0 804 a.Index = r1 805 a.Scale = int16(scale) 806 } 807 } 808 809 // registerList parses an ARM or ARM64 register list expression, a list of 810 // registers in []. There may be comma-separated ranges or individual 811 // registers, as in [R1,R3-R5] or [V1.S4, V2.S4, V3.S4, V4.S4]. 812 // For ARM, only R0 through R15 may appear. 813 // For ARM64, V0 through V31 with arrangement may appear. 814 // The opening bracket has been consumed. 815 func (p *Parser) registerList(a *obj.Addr) { 816 // One range per loop. 817 var maxReg int 818 var bits uint16 819 var arrangement int64 820 switch p.arch.Family { 821 case sys.ARM: 822 maxReg = 16 823 case sys.ARM64: 824 maxReg = 32 825 default: 826 p.errorf("unexpected register list") 827 } 828 firstReg := -1 829 nextReg := -1 830 regCnt := 0 831 ListLoop: 832 for { 833 tok := p.next() 834 switch tok.ScanToken { 835 case ']': 836 break ListLoop 837 case scanner.EOF: 838 p.errorf("missing ']' in register list") 839 return 840 } 841 switch p.arch.Family { 842 case sys.ARM64: 843 // Vn.T 844 name := tok.String() 845 r, ok := p.registerReference(name) 846 if !ok { 847 p.errorf("invalid register: %s", name) 848 } 849 reg := r - p.arch.Register["V0"] 850 p.get('.') 851 tok := p.next() 852 ext := tok.String() 853 curArrangement, err := arch.ARM64RegisterArrangement(reg, name, ext) 854 if err != nil { 855 p.errorf(err.Error()) 856 } 857 if firstReg == -1 { 858 // only record the first register and arrangement 859 firstReg = int(reg) 860 nextReg = firstReg 861 arrangement = curArrangement 862 } else if curArrangement != arrangement { 863 p.errorf("inconsistent arrangement in ARM64 register list") 864 } else if nextReg != int(reg) { 865 p.errorf("incontiguous register in ARM64 register list: %s", name) 866 } 867 regCnt++ 868 nextReg = (nextReg + 1) % 32 869 case sys.ARM: 870 // Parse the upper and lower bounds. 871 lo := p.registerNumber(tok.String()) 872 hi := lo 873 if p.peek() == '-' { 874 p.next() 875 hi = p.registerNumber(p.next().String()) 876 } 877 if hi < lo { 878 lo, hi = hi, lo 879 } 880 // Check there are no duplicates in the register list. 881 for i := 0; lo <= hi && i < maxReg; i++ { 882 if bits&(1<<lo) != 0 { 883 p.errorf("register R%d already in list", lo) 884 } 885 bits |= 1 << lo 886 lo++ 887 } 888 default: 889 p.errorf("unexpected register list") 890 } 891 if p.peek() != ']' { 892 p.get(',') 893 } 894 } 895 a.Type = obj.TYPE_REGLIST 896 switch p.arch.Family { 897 case sys.ARM: 898 a.Offset = int64(bits) 899 case sys.ARM64: 900 offset, err := arch.ARM64RegisterListOffset(firstReg, regCnt, arrangement) 901 if err != nil { 902 p.errorf(err.Error()) 903 } 904 a.Offset = offset 905 default: 906 p.errorf("register list not supported on this architecuture") 907 } 908 } 909 910 // register number is ARM-specific. It returns the number of the specified register. 911 func (p *Parser) registerNumber(name string) uint16 { 912 if p.arch.Family == sys.ARM && name == "g" { 913 return 10 914 } 915 if name[0] != 'R' { 916 p.errorf("expected g or R0 through R15; found %s", name) 917 return 0 918 } 919 r, ok := p.registerReference(name) 920 if !ok { 921 return 0 922 } 923 reg := r - p.arch.Register["R0"] 924 if reg < 0 { 925 // Could happen for an architecture having other registers prefixed by R 926 p.errorf("expected g or R0 through R15; found %s", name) 927 return 0 928 } 929 return uint16(reg) 930 } 931 932 // Note: There are two changes in the expression handling here 933 // compared to the old yacc/C implementations. Neither has 934 // much practical consequence because the expressions we 935 // see in assembly code are simple, but for the record: 936 // 937 // 1) Evaluation uses uint64; the old one used int64. 938 // 2) Precedence uses Go rules not C rules. 939 940 // expr = term | term ('+' | '-' | '|' | '^') term. 941 func (p *Parser) expr() uint64 { 942 value := p.term() 943 for { 944 switch p.peek() { 945 case '+': 946 p.next() 947 value += p.term() 948 case '-': 949 p.next() 950 value -= p.term() 951 case '|': 952 p.next() 953 value |= p.term() 954 case '^': 955 p.next() 956 value ^= p.term() 957 default: 958 return value 959 } 960 } 961 } 962 963 // floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')' 964 func (p *Parser) floatExpr() float64 { 965 tok := p.next() 966 switch tok.ScanToken { 967 case '(': 968 v := p.floatExpr() 969 if p.next().ScanToken != ')' { 970 p.errorf("missing closing paren") 971 } 972 return v 973 case '+': 974 return +p.floatExpr() 975 case '-': 976 return -p.floatExpr() 977 case scanner.Float: 978 return p.atof(tok.String()) 979 } 980 p.errorf("unexpected %s evaluating float expression", tok) 981 return 0 982 } 983 984 // term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor 985 func (p *Parser) term() uint64 { 986 value := p.factor() 987 for { 988 switch p.peek() { 989 case '*': 990 p.next() 991 value *= p.factor() 992 case '/': 993 p.next() 994 if int64(value) < 0 { 995 p.errorf("divide of value with high bit set") 996 } 997 divisor := p.factor() 998 if divisor == 0 { 999 p.errorf("division by zero") 1000 } else { 1001 value /= divisor 1002 } 1003 case '%': 1004 p.next() 1005 divisor := p.factor() 1006 if int64(value) < 0 { 1007 p.errorf("modulo of value with high bit set") 1008 } 1009 if divisor == 0 { 1010 p.errorf("modulo by zero") 1011 } else { 1012 value %= divisor 1013 } 1014 case lex.LSH: 1015 p.next() 1016 shift := p.factor() 1017 if int64(shift) < 0 { 1018 p.errorf("negative left shift count") 1019 } 1020 return value << shift 1021 case lex.RSH: 1022 p.next() 1023 shift := p.term() 1024 if int64(shift) < 0 { 1025 p.errorf("negative right shift count") 1026 } 1027 if int64(value) < 0 { 1028 p.errorf("right shift of value with high bit set") 1029 } 1030 value >>= shift 1031 case '&': 1032 p.next() 1033 value &= p.factor() 1034 default: 1035 return value 1036 } 1037 } 1038 } 1039 1040 // factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')' 1041 func (p *Parser) factor() uint64 { 1042 tok := p.next() 1043 switch tok.ScanToken { 1044 case scanner.Int: 1045 return p.atoi(tok.String()) 1046 case scanner.Char: 1047 str, err := strconv.Unquote(tok.String()) 1048 if err != nil { 1049 p.errorf("%s", err) 1050 } 1051 r, w := utf8.DecodeRuneInString(str) 1052 if w == 1 && r == utf8.RuneError { 1053 p.errorf("illegal UTF-8 encoding for character constant") 1054 } 1055 return uint64(r) 1056 case '+': 1057 return +p.factor() 1058 case '-': 1059 return -p.factor() 1060 case '~': 1061 return ^p.factor() 1062 case '(': 1063 v := p.expr() 1064 if p.next().ScanToken != ')' { 1065 p.errorf("missing closing paren") 1066 } 1067 return v 1068 } 1069 p.errorf("unexpected %s evaluating expression", tok) 1070 return 0 1071 } 1072 1073 // positiveAtoi returns an int64 that must be >= 0. 1074 func (p *Parser) positiveAtoi(str string) int64 { 1075 value, err := strconv.ParseInt(str, 0, 64) 1076 if err != nil { 1077 p.errorf("%s", err) 1078 } 1079 if value < 0 { 1080 p.errorf("%s overflows int64", str) 1081 } 1082 return value 1083 } 1084 1085 func (p *Parser) atoi(str string) uint64 { 1086 value, err := strconv.ParseUint(str, 0, 64) 1087 if err != nil { 1088 p.errorf("%s", err) 1089 } 1090 return value 1091 } 1092 1093 func (p *Parser) atof(str string) float64 { 1094 value, err := strconv.ParseFloat(str, 64) 1095 if err != nil { 1096 p.errorf("%s", err) 1097 } 1098 return value 1099 } 1100 1101 // EOF represents the end of input. 1102 var EOF = lex.Make(scanner.EOF, "EOF") 1103 1104 func (p *Parser) next() lex.Token { 1105 if !p.more() { 1106 return EOF 1107 } 1108 tok := p.input[p.inputPos] 1109 p.inputPos++ 1110 return tok 1111 } 1112 1113 func (p *Parser) back() { 1114 if p.inputPos == 0 { 1115 p.errorf("internal error: backing up before BOL") 1116 } else { 1117 p.inputPos-- 1118 } 1119 } 1120 1121 func (p *Parser) peek() lex.ScanToken { 1122 if p.more() { 1123 return p.input[p.inputPos].ScanToken 1124 } 1125 return scanner.EOF 1126 } 1127 1128 func (p *Parser) more() bool { 1129 return p.inputPos < len(p.input) 1130 } 1131 1132 // get verifies that the next item has the expected type and returns it. 1133 func (p *Parser) get(expected lex.ScanToken) lex.Token { 1134 p.expect(expected, expected.String()) 1135 return p.next() 1136 } 1137 1138 // expectOperandEnd verifies that the parsing state is properly at the end of an operand. 1139 func (p *Parser) expectOperandEnd() { 1140 p.expect(scanner.EOF, "end of operand") 1141 } 1142 1143 // expect verifies that the next item has the expected type. It does not consume it. 1144 func (p *Parser) expect(expectedToken lex.ScanToken, expectedMessage string) { 1145 if p.peek() != expectedToken { 1146 p.errorf("expected %s, found %s", expectedMessage, p.next()) 1147 } 1148 } 1149 1150 // have reports whether the remaining tokens (including the current one) contain the specified token. 1151 func (p *Parser) have(token lex.ScanToken) bool { 1152 for i := p.inputPos; i < len(p.input); i++ { 1153 if p.input[i].ScanToken == token { 1154 return true 1155 } 1156 } 1157 return false 1158 } 1159 1160 // at reports whether the next tokens are as requested. 1161 func (p *Parser) at(next ...lex.ScanToken) bool { 1162 if len(p.input)-p.inputPos < len(next) { 1163 return false 1164 } 1165 for i, r := range next { 1166 if p.input[p.inputPos+i].ScanToken != r { 1167 return false 1168 } 1169 } 1170 return true 1171 } 1172