1 // Derived from Inferno utils/6c/reg.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c 3 // 4 // Copyright 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright 1995-1997 C H Forsyth (forsyth (a] terzarima.net) 6 // Portions Copyright 1997-1999 Vita Nuova Limited 7 // Portions Copyright 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright 2004,2006 Bruce Ellis 9 // Portions Copyright 2005-2007 C H Forsyth (forsyth (a] terzarima.net) 10 // Revisions Copyright 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package gc 32 33 import ( 34 "bytes" 35 "cmd/internal/obj" 36 "fmt" 37 "sort" 38 "strings" 39 ) 40 41 // A Var represents a single variable that may be stored in a register. 42 // That variable may itself correspond to a hardware register, 43 // to represent the use of registers in the unoptimized instruction stream. 44 type Var struct { 45 offset int64 46 node *Node 47 nextinnode *Var 48 width int 49 id int // index in vars 50 name int8 51 etype int8 52 addr int8 53 } 54 55 // Bits represents a set of Vars, stored as a bit set of var numbers 56 // (the index in vars, or equivalently v.id). 57 type Bits struct { 58 b [BITS]uint64 59 } 60 61 const ( 62 BITS = 3 63 NVAR = BITS * 64 64 ) 65 66 var ( 67 vars [NVAR]Var // variables under consideration 68 nvar int // number of vars 69 70 regbits uint64 // bits for hardware registers 71 72 zbits Bits // zero 73 externs Bits // global variables 74 params Bits // function parameters and results 75 ivar Bits // function parameters (inputs) 76 ovar Bits // function results (outputs) 77 consts Bits // constant values 78 addrs Bits // variables with address taken 79 ) 80 81 // A Reg is a wrapper around a single Prog (one instruction) that holds 82 // register optimization information while the optimizer runs. 83 // r->prog is the instruction. 84 type Reg struct { 85 set Bits // regopt variables written by this instruction. 86 use1 Bits // regopt variables read by prog->from. 87 use2 Bits // regopt variables read by prog->to. 88 89 // refahead/refbehind are the regopt variables whose current 90 // value may be used in the following/preceding instructions 91 // up to a CALL (or the value is clobbered). 92 refbehind Bits 93 refahead Bits 94 95 // calahead/calbehind are similar, but for variables in 96 // instructions that are reachable after hitting at least one 97 // CALL. 98 calbehind Bits 99 calahead Bits 100 101 regdiff Bits 102 act Bits 103 regu uint64 // register used bitmap 104 } 105 106 // A Rgn represents a single regopt variable over a region of code 107 // where a register could potentially be dedicated to that variable. 108 // The code encompassed by a Rgn is defined by the flow graph, 109 // starting at enter, flood-filling forward while varno is refahead 110 // and backward while varno is refbehind, and following branches. 111 // A single variable may be represented by multiple disjoint Rgns and 112 // each Rgn may choose a different register for that variable. 113 // Registers are allocated to regions greedily in order of descending 114 // cost. 115 type Rgn struct { 116 enter *Flow 117 cost int16 118 varno int16 119 regno int16 120 } 121 122 // The Plan 9 C compilers used a limit of 600 regions, 123 // but the yacc-generated parser in y.go has 3100 regions. 124 // We set MaxRgn large enough to handle that. 125 // There's not a huge cost to having too many regions: 126 // the main processing traces the live area for each variable, 127 // which is limited by the number of variables times the area, 128 // not the raw region count. If there are many regions, they 129 // are almost certainly small and easy to trace. 130 // The only operation that scales with region count is the 131 // sorting by cost, which uses sort.Sort and is therefore 132 // guaranteed n log n. 133 const MaxRgn = 6000 134 135 var ( 136 region []Rgn 137 nregion int 138 ) 139 140 type rcmp []Rgn 141 142 func (x rcmp) Len() int { 143 return len(x) 144 } 145 146 func (x rcmp) Swap(i, j int) { 147 x[i], x[j] = x[j], x[i] 148 } 149 150 func (x rcmp) Less(i, j int) bool { 151 p1 := &x[i] 152 p2 := &x[j] 153 if p1.cost != p2.cost { 154 return int(p2.cost)-int(p1.cost) < 0 155 } 156 if p1.varno != p2.varno { 157 return int(p2.varno)-int(p1.varno) < 0 158 } 159 if p1.enter != p2.enter { 160 return int(p2.enter.Id-p1.enter.Id) < 0 161 } 162 return false 163 } 164 165 func setaddrs(bit Bits) { 166 var i int 167 var n int 168 var v *Var 169 var node *Node 170 171 for bany(&bit) { 172 // convert each bit to a variable 173 i = bnum(bit) 174 175 node = vars[i].node 176 n = int(vars[i].name) 177 biclr(&bit, uint(i)) 178 179 // disable all pieces of that variable 180 for i = 0; i < nvar; i++ { 181 v = &vars[i] 182 if v.node == node && int(v.name) == n { 183 v.addr = 2 184 } 185 } 186 } 187 } 188 189 var regnodes [64]*Node 190 191 func walkvardef(n *Node, f *Flow, active int) { 192 var f1 *Flow 193 var bn int 194 var v *Var 195 196 for f1 = f; f1 != nil; f1 = f1.S1 { 197 if f1.Active == int32(active) { 198 break 199 } 200 f1.Active = int32(active) 201 if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n { 202 break 203 } 204 for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode { 205 bn = v.id 206 biset(&(f1.Data.(*Reg)).act, uint(bn)) 207 } 208 209 if f1.Prog.As == obj.ACALL { 210 break 211 } 212 } 213 214 for f2 := f; f2 != f1; f2 = f2.S1 { 215 if f2.S2 != nil { 216 walkvardef(n, f2.S2, active) 217 } 218 } 219 } 220 221 /* 222 * add mov b,rn 223 * just after r 224 */ 225 func addmove(r *Flow, bn int, rn int, f int) { 226 p1 := Ctxt.NewProg() 227 Clearp(p1) 228 p1.Pc = 9999 229 230 p := r.Prog 231 p1.Link = p.Link 232 p.Link = p1 233 p1.Lineno = p.Lineno 234 235 v := &vars[bn] 236 237 a := &p1.To 238 a.Offset = v.offset 239 a.Etype = uint8(v.etype) 240 a.Type = obj.TYPE_MEM 241 a.Name = v.name 242 a.Node = v.node 243 a.Sym = Linksym(v.node.Sym) 244 245 /* NOTE(rsc): 9g did 246 if(a->etype == TARRAY) 247 a->type = TYPE_ADDR; 248 else if(a->sym == nil) 249 a->type = TYPE_CONST; 250 */ 251 p1.As = int16(Thearch.Optoas(OAS, Types[uint8(v.etype)])) 252 253 // TODO(rsc): Remove special case here. 254 if (Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9') && v.etype == TBOOL { 255 p1.As = int16(Thearch.Optoas(OAS, Types[TUINT8])) 256 } 257 p1.From.Type = obj.TYPE_REG 258 p1.From.Reg = int16(rn) 259 p1.From.Name = obj.NAME_NONE 260 if f == 0 { 261 p1.From = *a 262 *a = obj.Addr{} 263 a.Type = obj.TYPE_REG 264 a.Reg = int16(rn) 265 } 266 267 if Debug['R'] != 0 && Debug['v'] != 0 { 268 fmt.Printf("%v ===add=== %v\n", p, p1) 269 } 270 Ostats.Nspill++ 271 } 272 273 func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool { 274 t1 := o1 + int64(w1) 275 t2 := o2 + int64(w2) 276 277 if t1 <= o2 || t2 <= o1 { 278 return false 279 } 280 281 return true 282 } 283 284 func mkvar(f *Flow, a *obj.Addr) Bits { 285 /* 286 * mark registers used 287 */ 288 if a.Type == obj.TYPE_NONE { 289 return zbits 290 } 291 292 r := f.Data.(*Reg) 293 r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB 294 295 var n int 296 switch a.Type { 297 default: 298 regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB 299 if regu == 0 { 300 return zbits 301 } 302 bit := zbits 303 bit.b[0] = regu 304 return bit 305 306 // TODO(rsc): Remove special case here. 307 case obj.TYPE_ADDR: 308 var bit Bits 309 if Thearch.Thechar == '5' || Thearch.Thechar == '7' || Thearch.Thechar == '9' { 310 goto memcase 311 } 312 a.Type = obj.TYPE_MEM 313 bit = mkvar(f, a) 314 setaddrs(bit) 315 a.Type = obj.TYPE_ADDR 316 Ostats.Naddr++ 317 return zbits 318 319 memcase: 320 fallthrough 321 322 case obj.TYPE_MEM: 323 if r != nil { 324 r.use1.b[0] |= Thearch.RtoB(int(a.Reg)) 325 } 326 327 /* NOTE: 5g did 328 if(r->f.prog->scond & (C_PBIT|C_WBIT)) 329 r->set.b[0] |= RtoB(a->reg); 330 */ 331 switch a.Name { 332 default: 333 // Note: This case handles NAME_EXTERN and NAME_STATIC. 334 // We treat these as requiring eager writes to memory, due to 335 // the possibility of a fault handler looking at them, so there is 336 // not much point in registerizing the loads. 337 // If we later choose the set of candidate variables from a 338 // larger list, these cases could be deprioritized instead of 339 // removed entirely. 340 return zbits 341 342 case obj.NAME_PARAM, 343 obj.NAME_AUTO: 344 n = int(a.Name) 345 } 346 } 347 348 node, _ := a.Node.(*Node) 349 if node == nil || node.Op != ONAME || node.Orig == nil { 350 return zbits 351 } 352 node = node.Orig 353 if node.Orig != node { 354 Fatal("%v: bad node", Ctxt.Dconv(a)) 355 } 356 if node.Sym == nil || node.Sym.Name[0] == '.' { 357 return zbits 358 } 359 et := int(a.Etype) 360 o := a.Offset 361 w := a.Width 362 if w < 0 { 363 Fatal("bad width %d for %v", w, Ctxt.Dconv(a)) 364 } 365 366 flag := 0 367 var v *Var 368 for i := 0; i < nvar; i++ { 369 v = &vars[i] 370 if v.node == node && int(v.name) == n { 371 if v.offset == o { 372 if int(v.etype) == et { 373 if int64(v.width) == w { 374 // TODO(rsc): Remove special case for arm here. 375 if flag == 0 || Thearch.Thechar != '5' { 376 return blsh(uint(i)) 377 } 378 } 379 } 380 } 381 382 // if they overlap, disable both 383 if overlap_reg(v.offset, v.width, o, int(w)) { 384 // print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et); 385 v.addr = 1 386 387 flag = 1 388 } 389 } 390 } 391 392 switch et { 393 case 0, TFUNC: 394 return zbits 395 } 396 397 if nvar >= NVAR { 398 if Debug['w'] > 1 && node != nil { 399 Fatal("variable not optimized: %v", Nconv(node, obj.FmtSharp)) 400 } 401 if Debug['v'] > 0 { 402 Warn("variable not optimized: %v", Nconv(node, obj.FmtSharp)) 403 } 404 405 // If we're not tracking a word in a variable, mark the rest as 406 // having its address taken, so that we keep the whole thing 407 // live at all calls. otherwise we might optimize away part of 408 // a variable but not all of it. 409 var v *Var 410 for i := 0; i < nvar; i++ { 411 v = &vars[i] 412 if v.node == node { 413 v.addr = 1 414 } 415 } 416 417 return zbits 418 } 419 420 i := nvar 421 nvar++ 422 v = &vars[i] 423 v.id = i 424 v.offset = o 425 v.name = int8(n) 426 v.etype = int8(et) 427 v.width = int(w) 428 v.addr = int8(flag) // funny punning 429 v.node = node 430 431 // node->opt is the head of a linked list 432 // of Vars within the given Node, so that 433 // we can start at a Var and find all the other 434 // Vars in the same Go variable. 435 v.nextinnode, _ = node.Opt().(*Var) 436 437 node.SetOpt(v) 438 439 bit := blsh(uint(i)) 440 if n == obj.NAME_EXTERN || n == obj.NAME_STATIC { 441 for z := 0; z < BITS; z++ { 442 externs.b[z] |= bit.b[z] 443 } 444 } 445 if n == obj.NAME_PARAM { 446 for z := 0; z < BITS; z++ { 447 params.b[z] |= bit.b[z] 448 } 449 } 450 451 if node.Class == PPARAM { 452 for z := 0; z < BITS; z++ { 453 ivar.b[z] |= bit.b[z] 454 } 455 } 456 if node.Class == PPARAMOUT { 457 for z := 0; z < BITS; z++ { 458 ovar.b[z] |= bit.b[z] 459 } 460 } 461 462 // Treat values with their address taken as live at calls, 463 // because the garbage collector's liveness analysis in ../gc/plive.c does. 464 // These must be consistent or else we will elide stores and the garbage 465 // collector will see uninitialized data. 466 // The typical case where our own analysis is out of sync is when the 467 // node appears to have its address taken but that code doesn't actually 468 // get generated and therefore doesn't show up as an address being 469 // taken when we analyze the instruction stream. 470 // One instance of this case is when a closure uses the same name as 471 // an outer variable for one of its own variables declared with :=. 472 // The parser flags the outer variable as possibly shared, and therefore 473 // sets addrtaken, even though it ends up not being actually shared. 474 // If we were better about _ elision, _ = &x would suffice too. 475 // The broader := in a closure problem is mentioned in a comment in 476 // closure.c:/^typecheckclosure and dcl.c:/^oldname. 477 if node.Addrtaken { 478 v.addr = 1 479 } 480 481 // Disable registerization for globals, because: 482 // (1) we might panic at any time and we want the recovery code 483 // to see the latest values (issue 1304). 484 // (2) we don't know what pointers might point at them and we want 485 // loads via those pointers to see updated values and vice versa (issue 7995). 486 // 487 // Disable registerization for results if using defer, because the deferred func 488 // might recover and return, causing the current values to be used. 489 if node.Class == PEXTERN || (Hasdefer != 0 && node.Class == PPARAMOUT) { 490 v.addr = 1 491 } 492 493 if Debug['R'] != 0 { 494 fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, Econv(int(et), 0), o, w, Nconv(node, obj.FmtSharp), Ctxt.Dconv(a), v.addr) 495 } 496 Ostats.Nvar++ 497 498 return bit 499 } 500 501 var change int 502 503 func prop(f *Flow, ref Bits, cal Bits) { 504 var f1 *Flow 505 var r1 *Reg 506 var z int 507 var i int 508 var v *Var 509 var v1 *Var 510 511 for f1 = f; f1 != nil; f1 = f1.P1 { 512 r1 = f1.Data.(*Reg) 513 for z = 0; z < BITS; z++ { 514 ref.b[z] |= r1.refahead.b[z] 515 if ref.b[z] != r1.refahead.b[z] { 516 r1.refahead.b[z] = ref.b[z] 517 change = 1 518 } 519 520 cal.b[z] |= r1.calahead.b[z] 521 if cal.b[z] != r1.calahead.b[z] { 522 r1.calahead.b[z] = cal.b[z] 523 change = 1 524 } 525 } 526 527 switch f1.Prog.As { 528 case obj.ACALL: 529 if Noreturn(f1.Prog) { 530 break 531 } 532 533 // Mark all input variables (ivar) as used, because that's what the 534 // liveness bitmaps say. The liveness bitmaps say that so that a 535 // panic will not show stale values in the parameter dump. 536 // Mark variables with a recent VARDEF (r1->act) as used, 537 // so that the optimizer flushes initializations to memory, 538 // so that if a garbage collection happens during this CALL, 539 // the collector will see initialized memory. Again this is to 540 // match what the liveness bitmaps say. 541 for z = 0; z < BITS; z++ { 542 cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z] 543 ref.b[z] = 0 544 } 545 546 // cal.b is the current approximation of what's live across the call. 547 // Every bit in cal.b is a single stack word. For each such word, 548 // find all the other tracked stack words in the same Go variable 549 // (struct/slice/string/interface) and mark them live too. 550 // This is necessary because the liveness analysis for the garbage 551 // collector works at variable granularity, not at word granularity. 552 // It is fundamental for slice/string/interface: the garbage collector 553 // needs the whole value, not just some of the words, in order to 554 // interpret the other bits correctly. Specifically, slice needs a consistent 555 // ptr and cap, string needs a consistent ptr and len, and interface 556 // needs a consistent type word and data word. 557 for z = 0; z < BITS; z++ { 558 if cal.b[z] == 0 { 559 continue 560 } 561 for i = 0; i < 64; i++ { 562 if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 { 563 continue 564 } 565 v = &vars[z*64+i] 566 if v.node.Opt() == nil { // v represents fixed register, not Go variable 567 continue 568 } 569 570 // v->node->opt is the head of a linked list of Vars 571 // corresponding to tracked words from the Go variable v->node. 572 // Walk the list and set all the bits. 573 // For a large struct this could end up being quadratic: 574 // after the first setting, the outer loop (for z, i) would see a 1 bit 575 // for all of the remaining words in the struct, and for each such 576 // word would go through and turn on all the bits again. 577 // To avoid the quadratic behavior, we only turn on the bits if 578 // v is the head of the list or if the head's bit is not yet turned on. 579 // This will set the bits at most twice, keeping the overall loop linear. 580 v1, _ = v.node.Opt().(*Var) 581 582 if v == v1 || !btest(&cal, uint(v1.id)) { 583 for ; v1 != nil; v1 = v1.nextinnode { 584 biset(&cal, uint(v1.id)) 585 } 586 } 587 } 588 } 589 590 case obj.ATEXT: 591 for z = 0; z < BITS; z++ { 592 cal.b[z] = 0 593 ref.b[z] = 0 594 } 595 596 case obj.ARET: 597 for z = 0; z < BITS; z++ { 598 cal.b[z] = externs.b[z] | ovar.b[z] 599 ref.b[z] = 0 600 } 601 } 602 603 for z = 0; z < BITS; z++ { 604 ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z] 605 cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]) 606 r1.refbehind.b[z] = ref.b[z] 607 r1.calbehind.b[z] = cal.b[z] 608 } 609 610 if f1.Active != 0 { 611 break 612 } 613 f1.Active = 1 614 } 615 616 var r *Reg 617 var f2 *Flow 618 for ; f != f1; f = f.P1 { 619 r = f.Data.(*Reg) 620 for f2 = f.P2; f2 != nil; f2 = f2.P2link { 621 prop(f2, r.refbehind, r.calbehind) 622 } 623 } 624 } 625 626 func synch(f *Flow, dif Bits) { 627 var r1 *Reg 628 var z int 629 630 for f1 := f; f1 != nil; f1 = f1.S1 { 631 r1 = f1.Data.(*Reg) 632 for z = 0; z < BITS; z++ { 633 dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z] 634 if dif.b[z] != r1.regdiff.b[z] { 635 r1.regdiff.b[z] = dif.b[z] 636 change = 1 637 } 638 } 639 640 if f1.Active != 0 { 641 break 642 } 643 f1.Active = 1 644 for z = 0; z < BITS; z++ { 645 dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z]) 646 } 647 if f1.S2 != nil { 648 synch(f1.S2, dif) 649 } 650 } 651 } 652 653 func allreg(b uint64, r *Rgn) uint64 { 654 v := &vars[r.varno] 655 r.regno = 0 656 switch v.etype { 657 default: 658 Fatal("unknown etype %d/%v", Bitno(b), Econv(int(v.etype), 0)) 659 660 case TINT8, 661 TUINT8, 662 TINT16, 663 TUINT16, 664 TINT32, 665 TUINT32, 666 TINT64, 667 TUINT64, 668 TINT, 669 TUINT, 670 TUINTPTR, 671 TBOOL, 672 TPTR32, 673 TPTR64: 674 i := Thearch.BtoR(^b) 675 if i != 0 && r.cost > 0 { 676 r.regno = int16(i) 677 return Thearch.RtoB(i) 678 } 679 680 case TFLOAT32, TFLOAT64: 681 i := Thearch.BtoF(^b) 682 if i != 0 && r.cost > 0 { 683 r.regno = int16(i) 684 return Thearch.FtoB(i) 685 } 686 } 687 688 return 0 689 } 690 691 func LOAD(r *Reg, z int) uint64 { 692 return ^r.refbehind.b[z] & r.refahead.b[z] 693 } 694 695 func STORE(r *Reg, z int) uint64 { 696 return ^r.calbehind.b[z] & r.calahead.b[z] 697 } 698 699 // Cost parameters 700 const ( 701 CLOAD = 5 // cost of load 702 CREF = 5 // cost of reference if not registerized 703 LOOP = 3 // loop execution count (applied in popt.go) 704 ) 705 706 func paint1(f *Flow, bn int) { 707 z := bn / 64 708 bb := uint64(1 << uint(bn%64)) 709 r := f.Data.(*Reg) 710 if r.act.b[z]&bb != 0 { 711 return 712 } 713 var f1 *Flow 714 var r1 *Reg 715 for { 716 if r.refbehind.b[z]&bb == 0 { 717 break 718 } 719 f1 = f.P1 720 if f1 == nil { 721 break 722 } 723 r1 = f1.Data.(*Reg) 724 if r1.refahead.b[z]&bb == 0 { 725 break 726 } 727 if r1.act.b[z]&bb != 0 { 728 break 729 } 730 f = f1 731 r = r1 732 } 733 734 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 { 735 change -= CLOAD * int(f.Loop) 736 } 737 738 for { 739 r.act.b[z] |= bb 740 741 if f.Prog.As != obj.ANOP { // don't give credit for NOPs 742 if r.use1.b[z]&bb != 0 { 743 change += CREF * int(f.Loop) 744 } 745 if (r.use2.b[z]|r.set.b[z])&bb != 0 { 746 change += CREF * int(f.Loop) 747 } 748 } 749 750 if STORE(r, z)&r.regdiff.b[z]&bb != 0 { 751 change -= CLOAD * int(f.Loop) 752 } 753 754 if r.refbehind.b[z]&bb != 0 { 755 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 756 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 757 paint1(f1, bn) 758 } 759 } 760 } 761 762 if r.refahead.b[z]&bb == 0 { 763 break 764 } 765 f1 = f.S2 766 if f1 != nil { 767 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 768 paint1(f1, bn) 769 } 770 } 771 f = f.S1 772 if f == nil { 773 break 774 } 775 r = f.Data.(*Reg) 776 if r.act.b[z]&bb != 0 { 777 break 778 } 779 if r.refbehind.b[z]&bb == 0 { 780 break 781 } 782 } 783 } 784 785 func paint2(f *Flow, bn int, depth int) uint64 { 786 z := bn / 64 787 bb := uint64(1 << uint(bn%64)) 788 vreg := regbits 789 r := f.Data.(*Reg) 790 if r.act.b[z]&bb == 0 { 791 return vreg 792 } 793 var r1 *Reg 794 var f1 *Flow 795 for { 796 if r.refbehind.b[z]&bb == 0 { 797 break 798 } 799 f1 = f.P1 800 if f1 == nil { 801 break 802 } 803 r1 = f1.Data.(*Reg) 804 if r1.refahead.b[z]&bb == 0 { 805 break 806 } 807 if r1.act.b[z]&bb == 0 { 808 break 809 } 810 f = f1 811 r = r1 812 } 813 814 for { 815 if Debug['R'] != 0 && Debug['v'] != 0 { 816 fmt.Printf(" paint2 %d %v\n", depth, f.Prog) 817 } 818 819 r.act.b[z] &^= bb 820 821 vreg |= r.regu 822 823 if r.refbehind.b[z]&bb != 0 { 824 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 825 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 826 vreg |= paint2(f1, bn, depth+1) 827 } 828 } 829 } 830 831 if r.refahead.b[z]&bb == 0 { 832 break 833 } 834 f1 = f.S2 835 if f1 != nil { 836 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 837 vreg |= paint2(f1, bn, depth+1) 838 } 839 } 840 f = f.S1 841 if f == nil { 842 break 843 } 844 r = f.Data.(*Reg) 845 if r.act.b[z]&bb == 0 { 846 break 847 } 848 if r.refbehind.b[z]&bb == 0 { 849 break 850 } 851 } 852 853 return vreg 854 } 855 856 func paint3(f *Flow, bn int, rb uint64, rn int) { 857 z := bn / 64 858 bb := uint64(1 << uint(bn%64)) 859 r := f.Data.(*Reg) 860 if r.act.b[z]&bb != 0 { 861 return 862 } 863 var r1 *Reg 864 var f1 *Flow 865 for { 866 if r.refbehind.b[z]&bb == 0 { 867 break 868 } 869 f1 = f.P1 870 if f1 == nil { 871 break 872 } 873 r1 = f1.Data.(*Reg) 874 if r1.refahead.b[z]&bb == 0 { 875 break 876 } 877 if r1.act.b[z]&bb != 0 { 878 break 879 } 880 f = f1 881 r = r1 882 } 883 884 if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 { 885 addmove(f, bn, rn, 0) 886 } 887 var p *obj.Prog 888 for { 889 r.act.b[z] |= bb 890 p = f.Prog 891 892 if r.use1.b[z]&bb != 0 { 893 if Debug['R'] != 0 && Debug['v'] != 0 { 894 fmt.Printf("%v", p) 895 } 896 addreg(&p.From, rn) 897 if Debug['R'] != 0 && Debug['v'] != 0 { 898 fmt.Printf(" ===change== %v\n", p) 899 } 900 } 901 902 if (r.use2.b[z]|r.set.b[z])&bb != 0 { 903 if Debug['R'] != 0 && Debug['v'] != 0 { 904 fmt.Printf("%v", p) 905 } 906 addreg(&p.To, rn) 907 if Debug['R'] != 0 && Debug['v'] != 0 { 908 fmt.Printf(" ===change== %v\n", p) 909 } 910 } 911 912 if STORE(r, z)&r.regdiff.b[z]&bb != 0 { 913 addmove(f, bn, rn, 1) 914 } 915 r.regu |= rb 916 917 if r.refbehind.b[z]&bb != 0 { 918 for f1 = f.P2; f1 != nil; f1 = f1.P2link { 919 if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 { 920 paint3(f1, bn, rb, rn) 921 } 922 } 923 } 924 925 if r.refahead.b[z]&bb == 0 { 926 break 927 } 928 f1 = f.S2 929 if f1 != nil { 930 if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 { 931 paint3(f1, bn, rb, rn) 932 } 933 } 934 f = f.S1 935 if f == nil { 936 break 937 } 938 r = f.Data.(*Reg) 939 if r.act.b[z]&bb != 0 { 940 break 941 } 942 if r.refbehind.b[z]&bb == 0 { 943 break 944 } 945 } 946 } 947 948 func addreg(a *obj.Addr, rn int) { 949 a.Sym = nil 950 a.Node = nil 951 a.Offset = 0 952 a.Type = obj.TYPE_REG 953 a.Reg = int16(rn) 954 a.Name = 0 955 956 Ostats.Ncvtreg++ 957 } 958 959 func dumpone(f *Flow, isreg int) { 960 fmt.Printf("%d:%v", f.Loop, f.Prog) 961 if isreg != 0 { 962 r := f.Data.(*Reg) 963 var bit Bits 964 for z := 0; z < BITS; z++ { 965 bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0 966 } 967 if bany(&bit) { 968 fmt.Printf("\t") 969 if bany(&r.set) { 970 fmt.Printf(" s:%v", &r.set) 971 } 972 if bany(&r.use1) { 973 fmt.Printf(" u1:%v", &r.use1) 974 } 975 if bany(&r.use2) { 976 fmt.Printf(" u2:%v", &r.use2) 977 } 978 if bany(&r.refbehind) { 979 fmt.Printf(" rb:%v ", &r.refbehind) 980 } 981 if bany(&r.refahead) { 982 fmt.Printf(" ra:%v ", &r.refahead) 983 } 984 if bany(&r.calbehind) { 985 fmt.Printf(" cb:%v ", &r.calbehind) 986 } 987 if bany(&r.calahead) { 988 fmt.Printf(" ca:%v ", &r.calahead) 989 } 990 if bany(&r.regdiff) { 991 fmt.Printf(" d:%v ", &r.regdiff) 992 } 993 if bany(&r.act) { 994 fmt.Printf(" a:%v ", &r.act) 995 } 996 } 997 } 998 999 fmt.Printf("\n") 1000 } 1001 1002 func Dumpit(str string, r0 *Flow, isreg int) { 1003 var r1 *Flow 1004 1005 fmt.Printf("\n%s\n", str) 1006 for r := r0; r != nil; r = r.Link { 1007 dumpone(r, isreg) 1008 r1 = r.P2 1009 if r1 != nil { 1010 fmt.Printf("\tpred:") 1011 for ; r1 != nil; r1 = r1.P2link { 1012 fmt.Printf(" %.4d", uint(int(r1.Prog.Pc))) 1013 } 1014 if r.P1 != nil { 1015 fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc))) 1016 } else { 1017 fmt.Printf(" (only)") 1018 } 1019 fmt.Printf("\n") 1020 } 1021 1022 // Print successors if it's not just the next one 1023 if r.S1 != r.Link || r.S2 != nil { 1024 fmt.Printf("\tsucc:") 1025 if r.S1 != nil { 1026 fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc))) 1027 } 1028 if r.S2 != nil { 1029 fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc))) 1030 } 1031 fmt.Printf("\n") 1032 } 1033 } 1034 } 1035 1036 func regopt(firstp *obj.Prog) { 1037 mergetemp(firstp) 1038 1039 /* 1040 * control flow is more complicated in generated go code 1041 * than in generated c code. define pseudo-variables for 1042 * registers, so we have complete register usage information. 1043 */ 1044 var nreg int 1045 regnames := Thearch.Regnames(&nreg) 1046 1047 nvar = nreg 1048 for i := 0; i < nreg; i++ { 1049 vars[i] = Var{} 1050 } 1051 for i := 0; i < nreg; i++ { 1052 if regnodes[i] == nil { 1053 regnodes[i] = newname(Lookup(regnames[i])) 1054 } 1055 vars[i].node = regnodes[i] 1056 } 1057 1058 regbits = Thearch.Excludedregs() 1059 externs = zbits 1060 params = zbits 1061 consts = zbits 1062 addrs = zbits 1063 ivar = zbits 1064 ovar = zbits 1065 1066 /* 1067 * pass 1 1068 * build aux data structure 1069 * allocate pcs 1070 * find use and set of variables 1071 */ 1072 g := Flowstart(firstp, func() interface{} { return new(Reg) }) 1073 if g == nil { 1074 for i := 0; i < nvar; i++ { 1075 vars[i].node.SetOpt(nil) 1076 } 1077 return 1078 } 1079 1080 firstf := g.Start 1081 1082 for f := firstf; f != nil; f = f.Link { 1083 p := f.Prog 1084 if p.As == obj.AVARDEF || p.As == obj.AVARKILL { 1085 continue 1086 } 1087 1088 // Avoid making variables for direct-called functions. 1089 if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN { 1090 continue 1091 } 1092 1093 // from vs to doesn't matter for registers. 1094 r := f.Data.(*Reg) 1095 r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex 1096 r.set.b[0] |= p.Info.Regset 1097 1098 bit := mkvar(f, &p.From) 1099 if bany(&bit) { 1100 if p.Info.Flags&LeftAddr != 0 { 1101 setaddrs(bit) 1102 } 1103 if p.Info.Flags&LeftRead != 0 { 1104 for z := 0; z < BITS; z++ { 1105 r.use1.b[z] |= bit.b[z] 1106 } 1107 } 1108 if p.Info.Flags&LeftWrite != 0 { 1109 for z := 0; z < BITS; z++ { 1110 r.set.b[z] |= bit.b[z] 1111 } 1112 } 1113 } 1114 1115 // Compute used register for reg 1116 if p.Info.Flags&RegRead != 0 { 1117 r.use1.b[0] |= Thearch.RtoB(int(p.Reg)) 1118 } 1119 1120 // Currently we never generate three register forms. 1121 // If we do, this will need to change. 1122 if p.From3Type() != obj.TYPE_NONE { 1123 Fatal("regopt not implemented for from3") 1124 } 1125 1126 bit = mkvar(f, &p.To) 1127 if bany(&bit) { 1128 if p.Info.Flags&RightAddr != 0 { 1129 setaddrs(bit) 1130 } 1131 if p.Info.Flags&RightRead != 0 { 1132 for z := 0; z < BITS; z++ { 1133 r.use2.b[z] |= bit.b[z] 1134 } 1135 } 1136 if p.Info.Flags&RightWrite != 0 { 1137 for z := 0; z < BITS; z++ { 1138 r.set.b[z] |= bit.b[z] 1139 } 1140 } 1141 } 1142 } 1143 1144 for i := 0; i < nvar; i++ { 1145 v := &vars[i] 1146 if v.addr != 0 { 1147 bit := blsh(uint(i)) 1148 for z := 0; z < BITS; z++ { 1149 addrs.b[z] |= bit.b[z] 1150 } 1151 } 1152 1153 if Debug['R'] != 0 && Debug['v'] != 0 { 1154 fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, Econv(int(v.etype), 0), v.width, v.node, v.offset) 1155 } 1156 } 1157 1158 if Debug['R'] != 0 && Debug['v'] != 0 { 1159 Dumpit("pass1", firstf, 1) 1160 } 1161 1162 /* 1163 * pass 2 1164 * find looping structure 1165 */ 1166 flowrpo(g) 1167 1168 if Debug['R'] != 0 && Debug['v'] != 0 { 1169 Dumpit("pass2", firstf, 1) 1170 } 1171 1172 /* 1173 * pass 2.5 1174 * iterate propagating fat vardef covering forward 1175 * r->act records vars with a VARDEF since the last CALL. 1176 * (r->act will be reused in pass 5 for something else, 1177 * but we'll be done with it by then.) 1178 */ 1179 active := 0 1180 1181 for f := firstf; f != nil; f = f.Link { 1182 f.Active = 0 1183 r := f.Data.(*Reg) 1184 r.act = zbits 1185 } 1186 1187 for f := firstf; f != nil; f = f.Link { 1188 p := f.Prog 1189 if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil { 1190 active++ 1191 walkvardef(p.To.Node.(*Node), f, active) 1192 } 1193 } 1194 1195 /* 1196 * pass 3 1197 * iterate propagating usage 1198 * back until flow graph is complete 1199 */ 1200 var f1 *Flow 1201 var i int 1202 var f *Flow 1203 loop1: 1204 change = 0 1205 1206 for f = firstf; f != nil; f = f.Link { 1207 f.Active = 0 1208 } 1209 for f = firstf; f != nil; f = f.Link { 1210 if f.Prog.As == obj.ARET { 1211 prop(f, zbits, zbits) 1212 } 1213 } 1214 1215 /* pick up unreachable code */ 1216 loop11: 1217 i = 0 1218 1219 for f = firstf; f != nil; f = f1 { 1220 f1 = f.Link 1221 if f1 != nil && f1.Active != 0 && f.Active == 0 { 1222 prop(f, zbits, zbits) 1223 i = 1 1224 } 1225 } 1226 1227 if i != 0 { 1228 goto loop11 1229 } 1230 if change != 0 { 1231 goto loop1 1232 } 1233 1234 if Debug['R'] != 0 && Debug['v'] != 0 { 1235 Dumpit("pass3", firstf, 1) 1236 } 1237 1238 /* 1239 * pass 4 1240 * iterate propagating register/variable synchrony 1241 * forward until graph is complete 1242 */ 1243 loop2: 1244 change = 0 1245 1246 for f = firstf; f != nil; f = f.Link { 1247 f.Active = 0 1248 } 1249 synch(firstf, zbits) 1250 if change != 0 { 1251 goto loop2 1252 } 1253 1254 if Debug['R'] != 0 && Debug['v'] != 0 { 1255 Dumpit("pass4", firstf, 1) 1256 } 1257 1258 /* 1259 * pass 4.5 1260 * move register pseudo-variables into regu. 1261 */ 1262 mask := uint64((1 << uint(nreg)) - 1) 1263 for f := firstf; f != nil; f = f.Link { 1264 r := f.Data.(*Reg) 1265 r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask 1266 r.set.b[0] &^= mask 1267 r.use1.b[0] &^= mask 1268 r.use2.b[0] &^= mask 1269 r.refbehind.b[0] &^= mask 1270 r.refahead.b[0] &^= mask 1271 r.calbehind.b[0] &^= mask 1272 r.calahead.b[0] &^= mask 1273 r.regdiff.b[0] &^= mask 1274 r.act.b[0] &^= mask 1275 } 1276 1277 if Debug['R'] != 0 && Debug['v'] != 0 { 1278 Dumpit("pass4.5", firstf, 1) 1279 } 1280 1281 /* 1282 * pass 5 1283 * isolate regions 1284 * calculate costs (paint1) 1285 */ 1286 var bit Bits 1287 if f := firstf; f != nil { 1288 r := f.Data.(*Reg) 1289 for z := 0; z < BITS; z++ { 1290 bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]) 1291 } 1292 if bany(&bit) && f.Refset == 0 { 1293 // should never happen - all variables are preset 1294 if Debug['w'] != 0 { 1295 fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit) 1296 } 1297 f.Refset = 1 1298 } 1299 } 1300 1301 for f := firstf; f != nil; f = f.Link { 1302 (f.Data.(*Reg)).act = zbits 1303 } 1304 nregion = 0 1305 region = region[:0] 1306 var rgp *Rgn 1307 for f := firstf; f != nil; f = f.Link { 1308 r := f.Data.(*Reg) 1309 for z := 0; z < BITS; z++ { 1310 bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z]) 1311 } 1312 if bany(&bit) && f.Refset == 0 { 1313 if Debug['w'] != 0 { 1314 fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit) 1315 } 1316 f.Refset = 1 1317 Thearch.Excise(f) 1318 } 1319 1320 for z := 0; z < BITS; z++ { 1321 bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z]) 1322 } 1323 for bany(&bit) { 1324 i = bnum(bit) 1325 change = 0 1326 paint1(f, i) 1327 biclr(&bit, uint(i)) 1328 if change <= 0 { 1329 continue 1330 } 1331 if nregion >= MaxRgn { 1332 nregion++ 1333 continue 1334 } 1335 1336 region = append(region, Rgn{ 1337 enter: f, 1338 cost: int16(change), 1339 varno: int16(i), 1340 }) 1341 nregion++ 1342 } 1343 } 1344 1345 if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") { 1346 Warn("regions: %d\n", nregion) 1347 } 1348 if nregion >= MaxRgn { 1349 if Debug['v'] != 0 { 1350 Warn("too many regions: %d\n", nregion) 1351 } 1352 nregion = MaxRgn 1353 } 1354 1355 sort.Sort(rcmp(region[:nregion])) 1356 1357 if Debug['R'] != 0 && Debug['v'] != 0 { 1358 Dumpit("pass5", firstf, 1) 1359 } 1360 1361 /* 1362 * pass 6 1363 * determine used registers (paint2) 1364 * replace code (paint3) 1365 */ 1366 if Debug['R'] != 0 && Debug['v'] != 0 { 1367 fmt.Printf("\nregisterizing\n") 1368 } 1369 var usedreg uint64 1370 var vreg uint64 1371 for i := 0; i < nregion; i++ { 1372 rgp = ®ion[i] 1373 if Debug['R'] != 0 && Debug['v'] != 0 { 1374 fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc) 1375 } 1376 bit = blsh(uint(rgp.varno)) 1377 usedreg = paint2(rgp.enter, int(rgp.varno), 0) 1378 vreg = allreg(usedreg, rgp) 1379 if rgp.regno != 0 { 1380 if Debug['R'] != 0 && Debug['v'] != 0 { 1381 v := &vars[rgp.varno] 1382 fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, Econv(int(v.etype), 0), obj.Rconv(int(rgp.regno)), usedreg, vreg) 1383 } 1384 1385 paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno)) 1386 } 1387 } 1388 1389 /* 1390 * free aux structures. peep allocates new ones. 1391 */ 1392 for i := 0; i < nvar; i++ { 1393 vars[i].node.SetOpt(nil) 1394 } 1395 Flowend(g) 1396 firstf = nil 1397 1398 if Debug['R'] != 0 && Debug['v'] != 0 { 1399 // Rebuild flow graph, since we inserted instructions 1400 g := Flowstart(firstp, nil) 1401 firstf = g.Start 1402 Dumpit("pass6", firstf, 0) 1403 Flowend(g) 1404 firstf = nil 1405 } 1406 1407 /* 1408 * pass 7 1409 * peep-hole on basic block 1410 */ 1411 if Debug['R'] == 0 || Debug['P'] != 0 { 1412 Thearch.Peep(firstp) 1413 } 1414 1415 /* 1416 * eliminate nops 1417 */ 1418 for p := firstp; p != nil; p = p.Link { 1419 for p.Link != nil && p.Link.As == obj.ANOP { 1420 p.Link = p.Link.Link 1421 } 1422 if p.To.Type == obj.TYPE_BRANCH { 1423 for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP { 1424 p.To.Val = p.To.Val.(*obj.Prog).Link 1425 } 1426 } 1427 } 1428 1429 if Debug['R'] != 0 { 1430 if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false { 1431 fmt.Printf("\nstats\n") 1432 } 1433 1434 if Ostats.Ncvtreg != 0 { 1435 fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg) 1436 } 1437 if Ostats.Nspill != 0 { 1438 fmt.Printf("\t%4d spill\n", Ostats.Nspill) 1439 } 1440 if Ostats.Nreload != 0 { 1441 fmt.Printf("\t%4d reload\n", Ostats.Nreload) 1442 } 1443 if Ostats.Ndelmov != 0 { 1444 fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov) 1445 } 1446 if Ostats.Nvar != 0 { 1447 fmt.Printf("\t%4d var\n", Ostats.Nvar) 1448 } 1449 if Ostats.Naddr != 0 { 1450 fmt.Printf("\t%4d addr\n", Ostats.Naddr) 1451 } 1452 1453 Ostats = OptStats{} 1454 } 1455 } 1456 1457 // bany reports whether any bits in a are set. 1458 func bany(a *Bits) bool { 1459 for _, x := range &a.b { // & to avoid making a copy of a.b 1460 if x != 0 { 1461 return true 1462 } 1463 } 1464 return false 1465 } 1466 1467 // bnum reports the lowest index of a 1 bit in a. 1468 func bnum(a Bits) int { 1469 for i, x := range &a.b { // & to avoid making a copy of a.b 1470 if x != 0 { 1471 return 64*i + Bitno(x) 1472 } 1473 } 1474 1475 Fatal("bad in bnum") 1476 return 0 1477 } 1478 1479 // blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n). 1480 func blsh(n uint) Bits { 1481 c := zbits 1482 c.b[n/64] = 1 << (n % 64) 1483 return c 1484 } 1485 1486 // btest reports whether bit n is 1. 1487 func btest(a *Bits, n uint) bool { 1488 return a.b[n/64]&(1<<(n%64)) != 0 1489 } 1490 1491 // biset sets bit n to 1. 1492 func biset(a *Bits, n uint) { 1493 a.b[n/64] |= 1 << (n % 64) 1494 } 1495 1496 // biclr sets bit n to 0. 1497 func biclr(a *Bits, n uint) { 1498 a.b[n/64] &^= (1 << (n % 64)) 1499 } 1500 1501 // Bitno reports the lowest index of a 1 bit in b. 1502 // It calls Fatal if there is no 1 bit. 1503 func Bitno(b uint64) int { 1504 if b == 0 { 1505 Fatal("bad in bitno") 1506 } 1507 n := 0 1508 if b&(1<<32-1) == 0 { 1509 n += 32 1510 b >>= 32 1511 } 1512 if b&(1<<16-1) == 0 { 1513 n += 16 1514 b >>= 16 1515 } 1516 if b&(1<<8-1) == 0 { 1517 n += 8 1518 b >>= 8 1519 } 1520 if b&(1<<4-1) == 0 { 1521 n += 4 1522 b >>= 4 1523 } 1524 if b&(1<<2-1) == 0 { 1525 n += 2 1526 b >>= 2 1527 } 1528 if b&1 == 0 { 1529 n++ 1530 } 1531 return n 1532 } 1533 1534 // String returns a space-separated list of the variables represented by bits. 1535 func (bits Bits) String() string { 1536 // Note: This method takes a value receiver, both for convenience 1537 // and to make it safe to modify the bits as we process them. 1538 // Even so, most prints above use &bits, because then the value 1539 // being stored in the interface{} is a pointer and does not require 1540 // an allocation and copy to create the interface{}. 1541 var buf bytes.Buffer 1542 sep := "" 1543 for bany(&bits) { 1544 i := bnum(bits) 1545 buf.WriteString(sep) 1546 sep = " " 1547 v := &vars[i] 1548 if v.node == nil || v.node.Sym == nil { 1549 fmt.Fprintf(&buf, "$%d", i) 1550 } else { 1551 fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i) 1552 if v.offset != 0 { 1553 fmt.Fprintf(&buf, "%+d", int64(v.offset)) 1554 } 1555 } 1556 biclr(&bits, uint(i)) 1557 } 1558 return buf.String() 1559 } 1560