1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package asm implements the parser and instruction generator for the assembler. 6 // TODO: Split apart? 7 package asm 8 9 import ( 10 "fmt" 11 "io" 12 "log" 13 "os" 14 "strconv" 15 "text/scanner" 16 "unicode/utf8" 17 18 "cmd/asm/internal/arch" 19 "cmd/asm/internal/flags" 20 "cmd/asm/internal/lex" 21 "cmd/internal/obj" 22 "cmd/internal/sys" 23 ) 24 25 type Parser struct { 26 lex lex.TokenReader 27 lineNum int // Line number in source file. 28 histLineNum int32 // Cumulative line number across source files. 29 errorLine int32 // (Cumulative) line number of last error. 30 errorCount int // Number of errors. 31 pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. 32 input []lex.Token 33 inputPos int 34 pendingLabels []string // Labels to attach to next instruction. 35 labels map[string]*obj.Prog 36 toPatch []Patch 37 addr []obj.Addr 38 arch *arch.Arch 39 ctxt *obj.Link 40 firstProg *obj.Prog 41 lastProg *obj.Prog 42 dataAddr map[string]int64 // Most recent address for DATA for this symbol. 43 isJump bool // Instruction being assembled is a jump. 44 errorWriter io.Writer 45 } 46 47 type Patch struct { 48 prog *obj.Prog 49 label string 50 } 51 52 func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser { 53 return &Parser{ 54 ctxt: ctxt, 55 arch: ar, 56 lex: lexer, 57 labels: make(map[string]*obj.Prog), 58 dataAddr: make(map[string]int64), 59 errorWriter: os.Stderr, 60 } 61 } 62 63 // panicOnError is enable when testing to abort execution on the first error 64 // and turn it into a recoverable panic. 65 var panicOnError bool 66 67 func (p *Parser) errorf(format string, args ...interface{}) { 68 if panicOnError { 69 panic(fmt.Errorf(format, args...)) 70 } 71 if p.histLineNum == p.errorLine { 72 // Only one error per line. 73 return 74 } 75 p.errorLine = p.histLineNum 76 if p.lex != nil { 77 // Put file and line information on head of message. 78 format = "%s:%d: " + format + "\n" 79 args = append([]interface{}{p.lex.File(), p.lineNum}, args...) 80 } 81 fmt.Fprintf(p.errorWriter, format, args...) 82 p.errorCount++ 83 if p.errorCount > 10 && !*flags.AllErrors { 84 log.Fatal("too many errors") 85 } 86 } 87 88 func (p *Parser) Parse() (*obj.Prog, bool) { 89 for p.line() { 90 } 91 if p.errorCount > 0 { 92 return nil, false 93 } 94 p.patch() 95 return p.firstProg, true 96 } 97 98 // WORD [ arg {, arg} ] (';' | '\n') 99 func (p *Parser) line() bool { 100 // Skip newlines. 101 var tok lex.ScanToken 102 for { 103 tok = p.lex.Next() 104 // We save the line number here so error messages from this instruction 105 // are labeled with this line. Otherwise we complain after we've absorbed 106 // the terminating newline and the line numbers are off by one in errors. 107 p.lineNum = p.lex.Line() 108 p.histLineNum = lex.HistLine() 109 switch tok { 110 case '\n', ';': 111 continue 112 case scanner.EOF: 113 return false 114 } 115 break 116 } 117 // First item must be an identifier. 118 if tok != scanner.Ident { 119 p.errorf("expected identifier, found %q", p.lex.Text()) 120 return false // Might as well stop now. 121 } 122 word := p.lex.Text() 123 var cond string 124 operands := make([][]lex.Token, 0, 3) 125 // Zero or more comma-separated operands, one per loop. 126 nesting := 0 127 colon := -1 128 for tok != '\n' && tok != ';' { 129 // Process one operand. 130 items := make([]lex.Token, 0, 3) 131 for { 132 tok = p.lex.Next() 133 if len(operands) == 0 && len(items) == 0 { 134 if p.arch.InFamily(sys.ARM, sys.ARM64) && tok == '.' { 135 // ARM conditionals. 136 tok = p.lex.Next() 137 str := p.lex.Text() 138 if tok != scanner.Ident { 139 p.errorf("ARM condition expected identifier, found %s", str) 140 } 141 cond = cond + "." + str 142 continue 143 } 144 if tok == ':' { 145 // Labels. 146 p.pendingLabels = append(p.pendingLabels, word) 147 return true 148 } 149 } 150 if tok == scanner.EOF { 151 p.errorf("unexpected EOF") 152 return false 153 } 154 // Split operands on comma. Also, the old syntax on x86 for a "register pair" 155 // was AX:DX, for which the new syntax is DX, AX. Note the reordering. 156 if tok == '\n' || tok == ';' || (nesting == 0 && (tok == ',' || tok == ':')) { 157 if tok == ':' { 158 // Remember this location so we can swap the operands below. 159 if colon >= 0 { 160 p.errorf("invalid ':' in operand") 161 return true 162 } 163 colon = len(operands) 164 } 165 break 166 } 167 if tok == '(' || tok == '[' { 168 nesting++ 169 } 170 if tok == ')' || tok == ']' { 171 nesting-- 172 } 173 items = append(items, lex.Make(tok, p.lex.Text())) 174 } 175 if len(items) > 0 { 176 operands = append(operands, items) 177 if colon >= 0 && len(operands) == colon+2 { 178 // AX:DX becomes DX, AX. 179 operands[colon], operands[colon+1] = operands[colon+1], operands[colon] 180 colon = -1 181 } 182 } else if len(operands) > 0 || tok == ',' || colon >= 0 { 183 // Had a separator with nothing after. 184 p.errorf("missing operand") 185 } 186 } 187 if p.pseudo(word, operands) { 188 return true 189 } 190 i, present := p.arch.Instructions[word] 191 if present { 192 p.instruction(i, word, cond, operands) 193 return true 194 } 195 p.errorf("unrecognized instruction %q", word) 196 return true 197 } 198 199 func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) { 200 p.addr = p.addr[0:0] 201 p.isJump = p.arch.IsJump(word) 202 for _, op := range operands { 203 addr := p.address(op) 204 if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo. 205 p.errorf("illegal use of pseudo-register in %s", word) 206 } 207 p.addr = append(p.addr, addr) 208 } 209 if p.isJump { 210 p.asmJump(op, cond, p.addr) 211 return 212 } 213 p.asmInstruction(op, cond, p.addr) 214 } 215 216 func (p *Parser) pseudo(word string, operands [][]lex.Token) bool { 217 switch word { 218 case "DATA": 219 p.asmData(word, operands) 220 case "FUNCDATA": 221 p.asmFuncData(word, operands) 222 case "GLOBL": 223 p.asmGlobl(word, operands) 224 case "PCDATA": 225 p.asmPCData(word, operands) 226 case "TEXT": 227 p.asmText(word, operands) 228 default: 229 return false 230 } 231 return true 232 } 233 234 func (p *Parser) start(operand []lex.Token) { 235 p.input = operand 236 p.inputPos = 0 237 } 238 239 // address parses the operand into a link address structure. 240 func (p *Parser) address(operand []lex.Token) obj.Addr { 241 p.start(operand) 242 addr := obj.Addr{} 243 p.operand(&addr) 244 return addr 245 } 246 247 // parseScale converts a decimal string into a valid scale factor. 248 func (p *Parser) parseScale(s string) int8 { 249 switch s { 250 case "1", "2", "4", "8": 251 return int8(s[0] - '0') 252 } 253 p.errorf("bad scale: %s", s) 254 return 0 255 } 256 257 // operand parses a general operand and stores the result in *a. 258 func (p *Parser) operand(a *obj.Addr) bool { 259 //fmt.Printf("Operand: %v\n", p.input) 260 if len(p.input) == 0 { 261 p.errorf("empty operand: cannot happen") 262 return false 263 } 264 // General address (with a few exceptions) looks like 265 // $symoffset(SB)(reg)(index*scale) 266 // Exceptions are: 267 // 268 // R1 269 // offset 270 // $offset 271 // Every piece is optional, so we scan left to right and what 272 // we discover tells us where we are. 273 274 // Prefix: $. 275 var prefix rune 276 switch tok := p.peek(); tok { 277 case '$', '*': 278 prefix = rune(tok) 279 p.next() 280 } 281 282 // Symbol: symoffset(SB) 283 tok := p.next() 284 name := tok.String() 285 if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) { 286 // We have a symbol. Parse $symoffset(symkind) 287 p.symbolReference(a, name, prefix) 288 // fmt.Printf("SYM %s\n", obj.Dconv(&emptyProg, 0, a)) 289 if p.peek() == scanner.EOF { 290 return true 291 } 292 } 293 294 // Special register list syntax for arm: [R1,R3-R7] 295 if tok.ScanToken == '[' { 296 if prefix != 0 { 297 p.errorf("illegal use of register list") 298 } 299 p.registerList(a) 300 p.expectOperandEnd() 301 return true 302 } 303 304 // Register: R1 305 if tok.ScanToken == scanner.Ident && p.atStartOfRegister(name) { 306 if p.atRegisterShift() { 307 // ARM shifted register such as R1<<R2 or R1>>2. 308 a.Type = obj.TYPE_SHIFT 309 a.Offset = p.registerShift(tok.String(), prefix) 310 if p.peek() == '(' { 311 // Can only be a literal register here. 312 p.next() 313 tok := p.next() 314 name := tok.String() 315 if !p.atStartOfRegister(name) { 316 p.errorf("expected register; found %s", name) 317 } 318 a.Reg, _ = p.registerReference(name) 319 p.get(')') 320 } 321 } else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok { 322 if scale != 0 { 323 p.errorf("expected simple register reference") 324 } 325 a.Type = obj.TYPE_REG 326 a.Reg = r1 327 if r2 != 0 { 328 // Form is R1:R2. It is on RHS and the second register 329 // needs to go into the LHS. 330 panic("cannot happen (Addr.Reg2)") 331 } 332 } 333 // fmt.Printf("REG %s\n", obj.Dconv(&emptyProg, 0, a)) 334 p.expectOperandEnd() 335 return true 336 } 337 338 // Constant. 339 haveConstant := false 340 switch tok.ScanToken { 341 case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~': 342 haveConstant = true 343 case '(': 344 // Could be parenthesized expression or (R). Must be something, though. 345 tok := p.next() 346 if tok.ScanToken == scanner.EOF { 347 p.errorf("missing right parenthesis") 348 return false 349 } 350 rname := tok.String() 351 p.back() 352 haveConstant = !p.atStartOfRegister(rname) 353 if !haveConstant { 354 p.back() // Put back the '('. 355 } 356 } 357 if haveConstant { 358 p.back() 359 if p.have(scanner.Float) { 360 if prefix != '$' { 361 p.errorf("floating-point constant must be an immediate") 362 } 363 a.Type = obj.TYPE_FCONST 364 a.Val = p.floatExpr() 365 // fmt.Printf("FCONST %s\n", obj.Dconv(&emptyProg, 0, a)) 366 p.expectOperandEnd() 367 return true 368 } 369 if p.have(scanner.String) { 370 if prefix != '$' { 371 p.errorf("string constant must be an immediate") 372 return false 373 } 374 str, err := strconv.Unquote(p.get(scanner.String).String()) 375 if err != nil { 376 p.errorf("string parse error: %s", err) 377 } 378 a.Type = obj.TYPE_SCONST 379 a.Val = str 380 // fmt.Printf("SCONST %s\n", obj.Dconv(&emptyProg, 0, a)) 381 p.expectOperandEnd() 382 return true 383 } 384 a.Offset = int64(p.expr()) 385 if p.peek() != '(' { 386 switch prefix { 387 case '$': 388 a.Type = obj.TYPE_CONST 389 case '*': 390 a.Type = obj.TYPE_INDIR // Can appear but is illegal, will be rejected by the linker. 391 default: 392 a.Type = obj.TYPE_MEM 393 } 394 // fmt.Printf("CONST %d %s\n", a.Offset, obj.Dconv(&emptyProg, 0, a)) 395 p.expectOperandEnd() 396 return true 397 } 398 // fmt.Printf("offset %d \n", a.Offset) 399 } 400 401 // Register indirection: (reg) or (index*scale). We are on the opening paren. 402 p.registerIndirect(a, prefix) 403 // fmt.Printf("DONE %s\n", p.arch.Dconv(&emptyProg, 0, a)) 404 405 p.expectOperandEnd() 406 return true 407 } 408 409 // atStartOfRegister reports whether the parser is at the start of a register definition. 410 func (p *Parser) atStartOfRegister(name string) bool { 411 // Simple register: R10. 412 _, present := p.arch.Register[name] 413 if present { 414 return true 415 } 416 // Parenthesized register: R(10). 417 return p.arch.RegisterPrefix[name] && p.peek() == '(' 418 } 419 420 // atRegisterShift reports whether we are at the start of an ARM shifted register. 421 // We have consumed the register or R prefix. 422 func (p *Parser) atRegisterShift() bool { 423 // ARM only. 424 if p.arch.Family != sys.ARM { 425 return false 426 } 427 // R1<<... 428 if lex.IsRegisterShift(p.peek()) { 429 return true 430 } 431 // R(1)<<... Ugly check. TODO: Rethink how we handle ARM register shifts to be 432 // less special. 433 if p.peek() != '(' || len(p.input)-p.inputPos < 4 { 434 return false 435 } 436 return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken) 437 } 438 439 // registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10). 440 func (p *Parser) registerReference(name string) (int16, bool) { 441 r, present := p.arch.Register[name] 442 if present { 443 return r, true 444 } 445 if !p.arch.RegisterPrefix[name] { 446 p.errorf("expected register; found %s", name) 447 return 0, false 448 } 449 p.get('(') 450 tok := p.get(scanner.Int) 451 num, err := strconv.ParseInt(tok.String(), 10, 16) 452 p.get(')') 453 if err != nil { 454 p.errorf("parsing register list: %s", err) 455 return 0, false 456 } 457 r, ok := p.arch.RegisterNumber(name, int16(num)) 458 if !ok { 459 p.errorf("illegal register %s(%d)", name, r) 460 return 0, false 461 } 462 return r, true 463 } 464 465 // register parses a full register reference where there is no symbol present (as in 4(R0) or R(10) but not sym(SB)) 466 // including forms involving multiple registers such as R1:R2. 467 func (p *Parser) register(name string, prefix rune) (r1, r2 int16, scale int8, ok bool) { 468 // R1 or R(1) R1:R2 R1,R2 R1+R2, or R1*scale. 469 r1, ok = p.registerReference(name) 470 if !ok { 471 return 472 } 473 if prefix != 0 && prefix != '*' { // *AX is OK. 474 p.errorf("prefix %c not allowed for register: %c%s", prefix, prefix, name) 475 } 476 c := p.peek() 477 if c == ':' || c == ',' || c == '+' { 478 // 2nd register; syntax (R1+R2) etc. No two architectures agree. 479 // Check the architectures match the syntax. 480 switch p.next().ScanToken { 481 case ',': 482 if !p.arch.InFamily(sys.ARM, sys.ARM64) { 483 p.errorf("(register,register) not supported on this architecture") 484 return 485 } 486 case '+': 487 if p.arch.Family != sys.PPC64 { 488 p.errorf("(register+register) not supported on this architecture") 489 return 490 } 491 } 492 name := p.next().String() 493 r2, ok = p.registerReference(name) 494 if !ok { 495 return 496 } 497 } 498 if p.peek() == '*' { 499 // Scale 500 p.next() 501 scale = p.parseScale(p.next().String()) 502 } 503 return r1, r2, scale, true 504 } 505 506 // registerShift parses an ARM shifted register reference and returns the encoded representation. 507 // There is known to be a register (current token) and a shift operator (peeked token). 508 func (p *Parser) registerShift(name string, prefix rune) int64 { 509 if prefix != 0 { 510 p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name) 511 } 512 // R1 op R2 or r1 op constant. 513 // op is: 514 // "<<" == 0 515 // ">>" == 1 516 // "->" == 2 517 // "@>" == 3 518 r1, ok := p.registerReference(name) 519 if !ok { 520 return 0 521 } 522 var op int16 523 switch p.next().ScanToken { 524 case lex.LSH: 525 op = 0 526 case lex.RSH: 527 op = 1 528 case lex.ARR: 529 op = 2 530 case lex.ROT: 531 op = 3 532 } 533 tok := p.next() 534 str := tok.String() 535 var count int16 536 switch tok.ScanToken { 537 case scanner.Ident: 538 r2, ok := p.registerReference(str) 539 if !ok { 540 p.errorf("rhs of shift must be register or integer: %s", str) 541 } 542 count = (r2&15)<<8 | 1<<4 543 case scanner.Int, '(': 544 p.back() 545 x := int64(p.expr()) 546 if x >= 32 { 547 p.errorf("register shift count too large: %s", str) 548 } 549 count = int16((x & 31) << 7) 550 default: 551 p.errorf("unexpected %s in register shift", tok.String()) 552 } 553 return int64((r1 & 15) | op<<5 | count) 554 } 555 556 // symbolReference parses a symbol that is known not to be a register. 557 func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) { 558 // Identifier is a name. 559 switch prefix { 560 case 0: 561 a.Type = obj.TYPE_MEM 562 case '$': 563 a.Type = obj.TYPE_ADDR 564 case '*': 565 a.Type = obj.TYPE_INDIR 566 } 567 // Weirdness with statics: Might now have "<>". 568 isStatic := 0 // TODO: Really a boolean, but Linklookup wants a "version" integer. 569 if p.peek() == '<' { 570 isStatic = 1 571 p.next() 572 p.get('>') 573 } 574 if p.peek() == '+' || p.peek() == '-' { 575 a.Offset = int64(p.expr()) 576 } 577 a.Sym = obj.Linklookup(p.ctxt, name, isStatic) 578 if p.peek() == scanner.EOF { 579 if prefix == 0 && p.isJump { 580 // Symbols without prefix or suffix are jump labels. 581 return 582 } 583 p.errorf("illegal or missing addressing mode for symbol %s", name) 584 return 585 } 586 // Expect (SB), (FP), (PC), or (SP) 587 p.get('(') 588 reg := p.get(scanner.Ident).String() 589 p.get(')') 590 p.setPseudoRegister(a, reg, isStatic != 0, prefix) 591 } 592 593 // setPseudoRegister sets the NAME field of addr for a pseudo-register reference such as (SB). 594 func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, prefix rune) { 595 if addr.Reg != 0 { 596 p.errorf("internal error: reg %s already set in pseudo", reg) 597 } 598 switch reg { 599 case "FP": 600 addr.Name = obj.NAME_PARAM 601 case "PC": 602 if prefix != 0 { 603 p.errorf("illegal addressing mode for PC") 604 } 605 addr.Type = obj.TYPE_BRANCH // We set the type and leave NAME untouched. See asmJump. 606 case "SB": 607 addr.Name = obj.NAME_EXTERN 608 if isStatic { 609 addr.Name = obj.NAME_STATIC 610 } 611 case "SP": 612 addr.Name = obj.NAME_AUTO // The pseudo-stack. 613 default: 614 p.errorf("expected pseudo-register; found %s", reg) 615 } 616 if prefix == '$' { 617 addr.Type = obj.TYPE_ADDR 618 } 619 } 620 621 // registerIndirect parses the general form of a register indirection. 622 // It is can be (R1), (R2*scale), or (R1)(R2*scale) where R1 may be a simple 623 // register or register pair R:R or (R, R) or (R+R). 624 // Or it might be a pseudo-indirection like (FP). 625 // We are sitting on the opening parenthesis. 626 func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) { 627 p.get('(') 628 tok := p.next() 629 name := tok.String() 630 r1, r2, scale, ok := p.register(name, 0) 631 if !ok { 632 p.errorf("indirect through non-register %s", tok) 633 } 634 p.get(')') 635 a.Type = obj.TYPE_MEM 636 if r1 < 0 { 637 // Pseudo-register reference. 638 if r2 != 0 { 639 p.errorf("cannot use pseudo-register in pair") 640 return 641 } 642 // For SB, SP, and FP, there must be a name here. 0(FP) is not legal. 643 if name != "PC" && a.Name == obj.NAME_NONE { 644 p.errorf("cannot reference %s without a symbol", name) 645 } 646 p.setPseudoRegister(a, name, false, prefix) 647 return 648 } 649 a.Reg = r1 650 if r2 != 0 { 651 // TODO: Consistency in the encoding would be nice here. 652 if p.arch.InFamily(sys.ARM, sys.ARM64) { 653 // Special form 654 // ARM: destination register pair (R1, R2). 655 // ARM64: register pair (R1, R2) for LDP/STP. 656 if prefix != 0 || scale != 0 { 657 p.errorf("illegal address mode for register pair") 658 return 659 } 660 a.Type = obj.TYPE_REGREG 661 a.Offset = int64(r2) 662 // Nothing may follow 663 return 664 } 665 if p.arch.Family == sys.PPC64 { 666 // Special form for PPC64: (R1+R2); alias for (R1)(R2*1). 667 if prefix != 0 || scale != 0 { 668 p.errorf("illegal address mode for register+register") 669 return 670 } 671 a.Type = obj.TYPE_MEM 672 a.Scale = 1 673 a.Index = r2 674 // Nothing may follow. 675 return 676 } 677 } 678 if r2 != 0 { 679 p.errorf("indirect through register pair") 680 } 681 if prefix == '$' { 682 a.Type = obj.TYPE_ADDR 683 } 684 if r1 == arch.RPC && prefix != 0 { 685 p.errorf("illegal addressing mode for PC") 686 } 687 if scale == 0 && p.peek() == '(' { 688 // General form (R)(R*scale). 689 p.next() 690 tok := p.next() 691 r1, r2, scale, ok = p.register(tok.String(), 0) 692 if !ok { 693 p.errorf("indirect through non-register %s", tok) 694 } 695 if r2 != 0 { 696 p.errorf("unimplemented two-register form") 697 } 698 a.Index = r1 699 a.Scale = int16(scale) 700 p.get(')') 701 } else if scale != 0 { 702 // First (R) was missing, all we have is (R*scale). 703 a.Reg = 0 704 a.Index = r1 705 a.Scale = int16(scale) 706 } 707 } 708 709 // registerList parses an ARM register list expression, a list of registers in []. 710 // There may be comma-separated ranges or individual registers, as in 711 // [R1,R3-R5]. Only R0 through R15 may appear. 712 // The opening bracket has been consumed. 713 func (p *Parser) registerList(a *obj.Addr) { 714 // One range per loop. 715 const maxReg = 16 716 var bits uint16 717 ListLoop: 718 for { 719 tok := p.next() 720 switch tok.ScanToken { 721 case ']': 722 break ListLoop 723 case scanner.EOF: 724 p.errorf("missing ']' in register list") 725 return 726 } 727 // Parse the upper and lower bounds. 728 lo := p.registerNumber(tok.String()) 729 hi := lo 730 if p.peek() == '-' { 731 p.next() 732 hi = p.registerNumber(p.next().String()) 733 } 734 if hi < lo { 735 lo, hi = hi, lo 736 } 737 // Check there are no duplicates in the register list. 738 for i := 0; lo <= hi && i < maxReg; i++ { 739 if bits&(1<<lo) != 0 { 740 p.errorf("register R%d already in list", lo) 741 } 742 bits |= 1 << lo 743 lo++ 744 } 745 if p.peek() != ']' { 746 p.get(',') 747 } 748 } 749 a.Type = obj.TYPE_REGLIST 750 a.Offset = int64(bits) 751 } 752 753 // register number is ARM-specific. It returns the number of the specified register. 754 func (p *Parser) registerNumber(name string) uint16 { 755 if p.arch.Family == sys.ARM && name == "g" { 756 return 10 757 } 758 if name[0] != 'R' { 759 p.errorf("expected g or R0 through R15; found %s", name) 760 return 0 761 } 762 r, ok := p.registerReference(name) 763 if !ok { 764 return 0 765 } 766 reg := r - p.arch.Register["R0"] 767 if reg < 0 { 768 // Could happen for an architecture having other registers prefixed by R 769 p.errorf("expected g or R0 through R15; found %s", name) 770 return 0 771 } 772 return uint16(reg) 773 } 774 775 // Note: There are two changes in the expression handling here 776 // compared to the old yacc/C implementations. Neither has 777 // much practical consequence because the expressions we 778 // see in assembly code are simple, but for the record: 779 // 780 // 1) Evaluation uses uint64; the old one used int64. 781 // 2) Precedence uses Go rules not C rules. 782 783 // expr = term | term ('+' | '-' | '|' | '^') term. 784 func (p *Parser) expr() uint64 { 785 value := p.term() 786 for { 787 switch p.peek() { 788 case '+': 789 p.next() 790 value += p.term() 791 case '-': 792 p.next() 793 value -= p.term() 794 case '|': 795 p.next() 796 value |= p.term() 797 case '^': 798 p.next() 799 value ^= p.term() 800 default: 801 return value 802 } 803 } 804 } 805 806 // floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')' 807 func (p *Parser) floatExpr() float64 { 808 tok := p.next() 809 switch tok.ScanToken { 810 case '(': 811 v := p.floatExpr() 812 if p.next().ScanToken != ')' { 813 p.errorf("missing closing paren") 814 } 815 return v 816 case '+': 817 return +p.floatExpr() 818 case '-': 819 return -p.floatExpr() 820 case scanner.Float: 821 return p.atof(tok.String()) 822 } 823 p.errorf("unexpected %s evaluating float expression", tok) 824 return 0 825 } 826 827 // term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor 828 func (p *Parser) term() uint64 { 829 value := p.factor() 830 for { 831 switch p.peek() { 832 case '*': 833 p.next() 834 value *= p.factor() 835 case '/': 836 p.next() 837 if int64(value) < 0 { 838 p.errorf("divide of value with high bit set") 839 } 840 divisor := p.factor() 841 if divisor == 0 { 842 p.errorf("division by zero") 843 } else { 844 value /= divisor 845 } 846 case '%': 847 p.next() 848 divisor := p.factor() 849 if int64(value) < 0 { 850 p.errorf("modulo of value with high bit set") 851 } 852 if divisor == 0 { 853 p.errorf("modulo by zero") 854 } else { 855 value %= divisor 856 } 857 case lex.LSH: 858 p.next() 859 shift := p.factor() 860 if int64(shift) < 0 { 861 p.errorf("negative left shift count") 862 } 863 return value << shift 864 case lex.RSH: 865 p.next() 866 shift := p.term() 867 if int64(shift) < 0 { 868 p.errorf("negative right shift count") 869 } 870 if int64(value) < 0 { 871 p.errorf("right shift of value with high bit set") 872 } 873 value >>= shift 874 case '&': 875 p.next() 876 value &= p.factor() 877 default: 878 return value 879 } 880 } 881 } 882 883 // factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')' 884 func (p *Parser) factor() uint64 { 885 tok := p.next() 886 switch tok.ScanToken { 887 case scanner.Int: 888 return p.atoi(tok.String()) 889 case scanner.Char: 890 str, err := strconv.Unquote(tok.String()) 891 if err != nil { 892 p.errorf("%s", err) 893 } 894 r, w := utf8.DecodeRuneInString(str) 895 if w == 1 && r == utf8.RuneError { 896 p.errorf("illegal UTF-8 encoding for character constant") 897 } 898 return uint64(r) 899 case '+': 900 return +p.factor() 901 case '-': 902 return -p.factor() 903 case '~': 904 return ^p.factor() 905 case '(': 906 v := p.expr() 907 if p.next().ScanToken != ')' { 908 p.errorf("missing closing paren") 909 } 910 return v 911 } 912 p.errorf("unexpected %s evaluating expression", tok) 913 return 0 914 } 915 916 // positiveAtoi returns an int64 that must be >= 0. 917 func (p *Parser) positiveAtoi(str string) int64 { 918 value, err := strconv.ParseInt(str, 0, 64) 919 if err != nil { 920 p.errorf("%s", err) 921 } 922 if value < 0 { 923 p.errorf("%s overflows int64", str) 924 } 925 return value 926 } 927 928 func (p *Parser) atoi(str string) uint64 { 929 value, err := strconv.ParseUint(str, 0, 64) 930 if err != nil { 931 p.errorf("%s", err) 932 } 933 return value 934 } 935 936 func (p *Parser) atof(str string) float64 { 937 value, err := strconv.ParseFloat(str, 64) 938 if err != nil { 939 p.errorf("%s", err) 940 } 941 return value 942 } 943 944 // EOF represents the end of input. 945 var EOF = lex.Make(scanner.EOF, "EOF") 946 947 func (p *Parser) next() lex.Token { 948 if !p.more() { 949 return EOF 950 } 951 tok := p.input[p.inputPos] 952 p.inputPos++ 953 return tok 954 } 955 956 func (p *Parser) back() { 957 if p.inputPos == 0 { 958 p.errorf("internal error: backing up before BOL") 959 } else { 960 p.inputPos-- 961 } 962 } 963 964 func (p *Parser) peek() lex.ScanToken { 965 if p.more() { 966 return p.input[p.inputPos].ScanToken 967 } 968 return scanner.EOF 969 } 970 971 func (p *Parser) more() bool { 972 return p.inputPos < len(p.input) 973 } 974 975 // get verifies that the next item has the expected type and returns it. 976 func (p *Parser) get(expected lex.ScanToken) lex.Token { 977 p.expect(expected, expected.String()) 978 return p.next() 979 } 980 981 // expectOperandEnd verifies that the parsing state is properly at the end of an operand. 982 func (p *Parser) expectOperandEnd() { 983 p.expect(scanner.EOF, "end of operand") 984 } 985 986 // expect verifies that the next item has the expected type. It does not consume it. 987 func (p *Parser) expect(expectedToken lex.ScanToken, expectedMessage string) { 988 if p.peek() != expectedToken { 989 p.errorf("expected %s, found %s", expectedMessage, p.next()) 990 } 991 } 992 993 // have reports whether the remaining tokens (including the current one) contain the specified token. 994 func (p *Parser) have(token lex.ScanToken) bool { 995 for i := p.inputPos; i < len(p.input); i++ { 996 if p.input[i].ScanToken == token { 997 return true 998 } 999 } 1000 return false 1001 } 1002 1003 // at reports whether the next tokens are as requested. 1004 func (p *Parser) at(next ...lex.ScanToken) bool { 1005 if len(p.input)-p.inputPos < len(next) { 1006 return false 1007 } 1008 for i, r := range next { 1009 if p.input[p.inputPos+i].ScanToken != r { 1010 return false 1011 } 1012 } 1013 return true 1014 } 1015