Home | History | Annotate | Download | only in amd64
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package amd64
      6 
      7 import (
      8 	"fmt"
      9 	"math"
     10 
     11 	"cmd/compile/internal/gc"
     12 	"cmd/compile/internal/ssa"
     13 	"cmd/internal/obj"
     14 	"cmd/internal/obj/x86"
     15 )
     16 
     17 // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
     18 func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
     19 	flive := b.FlagsLiveAtEnd
     20 	if b.Control != nil && b.Control.Type.IsFlags() {
     21 		flive = true
     22 	}
     23 	for i := len(b.Values) - 1; i >= 0; i-- {
     24 		v := b.Values[i]
     25 		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
     26 			// The "mark" is any non-nil Aux value.
     27 			v.Aux = v
     28 		}
     29 		if v.Type.IsFlags() {
     30 			flive = false
     31 		}
     32 		for _, a := range v.Args {
     33 			if a.Type.IsFlags() {
     34 				flive = true
     35 			}
     36 		}
     37 	}
     38 }
     39 
     40 // loadByType returns the load instruction of the given type.
     41 func loadByType(t ssa.Type) obj.As {
     42 	// Avoid partial register write
     43 	if !t.IsFloat() && t.Size() <= 2 {
     44 		if t.Size() == 1 {
     45 			return x86.AMOVBLZX
     46 		} else {
     47 			return x86.AMOVWLZX
     48 		}
     49 	}
     50 	// Otherwise, there's no difference between load and store opcodes.
     51 	return storeByType(t)
     52 }
     53 
     54 // storeByType returns the store instruction of the given type.
     55 func storeByType(t ssa.Type) obj.As {
     56 	width := t.Size()
     57 	if t.IsFloat() {
     58 		switch width {
     59 		case 4:
     60 			return x86.AMOVSS
     61 		case 8:
     62 			return x86.AMOVSD
     63 		}
     64 	} else {
     65 		switch width {
     66 		case 1:
     67 			return x86.AMOVB
     68 		case 2:
     69 			return x86.AMOVW
     70 		case 4:
     71 			return x86.AMOVL
     72 		case 8:
     73 			return x86.AMOVQ
     74 		}
     75 	}
     76 	panic("bad store type")
     77 }
     78 
     79 // moveByType returns the reg->reg move instruction of the given type.
     80 func moveByType(t ssa.Type) obj.As {
     81 	if t.IsFloat() {
     82 		// Moving the whole sse2 register is faster
     83 		// than moving just the correct low portion of it.
     84 		// There is no xmm->xmm move with 1 byte opcode,
     85 		// so use movups, which has 2 byte opcode.
     86 		return x86.AMOVUPS
     87 	} else {
     88 		switch t.Size() {
     89 		case 1:
     90 			// Avoids partial register write
     91 			return x86.AMOVL
     92 		case 2:
     93 			return x86.AMOVL
     94 		case 4:
     95 			return x86.AMOVL
     96 		case 8:
     97 			return x86.AMOVQ
     98 		case 16:
     99 			return x86.AMOVUPS // int128s are in SSE registers
    100 		default:
    101 			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
    102 		}
    103 	}
    104 }
    105 
    106 // opregreg emits instructions for
    107 //     dest := dest(To) op src(From)
    108 // and also returns the created obj.Prog so it
    109 // may be further adjusted (offset, scale, etc).
    110 func opregreg(op obj.As, dest, src int16) *obj.Prog {
    111 	p := gc.Prog(op)
    112 	p.From.Type = obj.TYPE_REG
    113 	p.To.Type = obj.TYPE_REG
    114 	p.To.Reg = dest
    115 	p.From.Reg = src
    116 	return p
    117 }
    118 
    119 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
    120 // See runtime/mkduff.go.
    121 func duffStart(size int64) int64 {
    122 	x, _ := duff(size)
    123 	return x
    124 }
    125 func duffAdj(size int64) int64 {
    126 	_, x := duff(size)
    127 	return x
    128 }
    129 
    130 // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
    131 // required to use the duffzero mechanism for a block of the given size.
    132 func duff(size int64) (int64, int64) {
    133 	if size < 32 || size > 1024 || size%dzClearStep != 0 {
    134 		panic("bad duffzero size")
    135 	}
    136 	steps := size / dzClearStep
    137 	blocks := steps / dzBlockLen
    138 	steps %= dzBlockLen
    139 	off := dzBlockSize * (dzBlocks - blocks)
    140 	var adj int64
    141 	if steps != 0 {
    142 		off -= dzAddSize
    143 		off -= dzMovSize * steps
    144 		adj -= dzClearStep * (dzBlockLen - steps)
    145 	}
    146 	return off, adj
    147 }
    148 
    149 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
    150 	s.SetLineno(v.Line)
    151 	switch v.Op {
    152 	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
    153 		r := v.Reg()
    154 		r1 := v.Args[0].Reg()
    155 		r2 := v.Args[1].Reg()
    156 		switch {
    157 		case r == r1:
    158 			p := gc.Prog(v.Op.Asm())
    159 			p.From.Type = obj.TYPE_REG
    160 			p.From.Reg = r2
    161 			p.To.Type = obj.TYPE_REG
    162 			p.To.Reg = r
    163 		case r == r2:
    164 			p := gc.Prog(v.Op.Asm())
    165 			p.From.Type = obj.TYPE_REG
    166 			p.From.Reg = r1
    167 			p.To.Type = obj.TYPE_REG
    168 			p.To.Reg = r
    169 		default:
    170 			var asm obj.As
    171 			if v.Op == ssa.OpAMD64ADDQ {
    172 				asm = x86.ALEAQ
    173 			} else {
    174 				asm = x86.ALEAL
    175 			}
    176 			p := gc.Prog(asm)
    177 			p.From.Type = obj.TYPE_MEM
    178 			p.From.Reg = r1
    179 			p.From.Scale = 1
    180 			p.From.Index = r2
    181 			p.To.Type = obj.TYPE_REG
    182 			p.To.Reg = r
    183 		}
    184 	// 2-address opcode arithmetic
    185 	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
    186 		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
    187 		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
    188 		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
    189 		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
    190 		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
    191 		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
    192 		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
    193 		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
    194 		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
    195 		ssa.OpAMD64PXOR:
    196 		r := v.Reg()
    197 		if r != v.Args[0].Reg() {
    198 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
    199 		}
    200 		opregreg(v.Op.Asm(), r, v.Args[1].Reg())
    201 
    202 	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
    203 		// Arg[0] (the dividend) is in AX.
    204 		// Arg[1] (the divisor) can be in any other register.
    205 		// Result[0] (the quotient) is in AX.
    206 		// Result[1] (the remainder) is in DX.
    207 		r := v.Args[1].Reg()
    208 
    209 		// Zero extend dividend.
    210 		c := gc.Prog(x86.AXORL)
    211 		c.From.Type = obj.TYPE_REG
    212 		c.From.Reg = x86.REG_DX
    213 		c.To.Type = obj.TYPE_REG
    214 		c.To.Reg = x86.REG_DX
    215 
    216 		// Issue divide.
    217 		p := gc.Prog(v.Op.Asm())
    218 		p.From.Type = obj.TYPE_REG
    219 		p.From.Reg = r
    220 
    221 	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
    222 		// Arg[0] (the dividend) is in AX.
    223 		// Arg[1] (the divisor) can be in any other register.
    224 		// Result[0] (the quotient) is in AX.
    225 		// Result[1] (the remainder) is in DX.
    226 		r := v.Args[1].Reg()
    227 
    228 		// CPU faults upon signed overflow, which occurs when the most
    229 		// negative int is divided by -1. Handle divide by -1 as a special case.
    230 		var c *obj.Prog
    231 		switch v.Op {
    232 		case ssa.OpAMD64DIVQ:
    233 			c = gc.Prog(x86.ACMPQ)
    234 		case ssa.OpAMD64DIVL:
    235 			c = gc.Prog(x86.ACMPL)
    236 		case ssa.OpAMD64DIVW:
    237 			c = gc.Prog(x86.ACMPW)
    238 		}
    239 		c.From.Type = obj.TYPE_REG
    240 		c.From.Reg = r
    241 		c.To.Type = obj.TYPE_CONST
    242 		c.To.Offset = -1
    243 		j1 := gc.Prog(x86.AJEQ)
    244 		j1.To.Type = obj.TYPE_BRANCH
    245 
    246 		// Sign extend dividend.
    247 		switch v.Op {
    248 		case ssa.OpAMD64DIVQ:
    249 			gc.Prog(x86.ACQO)
    250 		case ssa.OpAMD64DIVL:
    251 			gc.Prog(x86.ACDQ)
    252 		case ssa.OpAMD64DIVW:
    253 			gc.Prog(x86.ACWD)
    254 		}
    255 
    256 		// Issue divide.
    257 		p := gc.Prog(v.Op.Asm())
    258 		p.From.Type = obj.TYPE_REG
    259 		p.From.Reg = r
    260 
    261 		// Skip over -1 fixup code.
    262 		j2 := gc.Prog(obj.AJMP)
    263 		j2.To.Type = obj.TYPE_BRANCH
    264 
    265 		// Issue -1 fixup code.
    266 		// n / -1 = -n
    267 		n1 := gc.Prog(x86.ANEGQ)
    268 		n1.To.Type = obj.TYPE_REG
    269 		n1.To.Reg = x86.REG_AX
    270 
    271 		// n % -1 == 0
    272 		n2 := gc.Prog(x86.AXORL)
    273 		n2.From.Type = obj.TYPE_REG
    274 		n2.From.Reg = x86.REG_DX
    275 		n2.To.Type = obj.TYPE_REG
    276 		n2.To.Reg = x86.REG_DX
    277 
    278 		// TODO(khr): issue only the -1 fixup code we need.
    279 		// For instance, if only the quotient is used, no point in zeroing the remainder.
    280 
    281 		j1.To.Val = n1
    282 		j2.To.Val = s.Pc()
    283 
    284 	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
    285 		ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
    286 		// the frontend rewrites constant division by 8/16/32 bit integers into
    287 		// HMUL by a constant
    288 		// SSA rewrites generate the 64 bit versions
    289 
    290 		// Arg[0] is already in AX as it's the only register we allow
    291 		// and DX is the only output we care about (the high bits)
    292 		p := gc.Prog(v.Op.Asm())
    293 		p.From.Type = obj.TYPE_REG
    294 		p.From.Reg = v.Args[1].Reg()
    295 
    296 		// IMULB puts the high portion in AH instead of DL,
    297 		// so move it to DL for consistency
    298 		if v.Type.Size() == 1 {
    299 			m := gc.Prog(x86.AMOVB)
    300 			m.From.Type = obj.TYPE_REG
    301 			m.From.Reg = x86.REG_AH
    302 			m.To.Type = obj.TYPE_REG
    303 			m.To.Reg = x86.REG_DX
    304 		}
    305 
    306 	case ssa.OpAMD64MULQU2:
    307 		// Arg[0] is already in AX as it's the only register we allow
    308 		// results hi in DX, lo in AX
    309 		p := gc.Prog(v.Op.Asm())
    310 		p.From.Type = obj.TYPE_REG
    311 		p.From.Reg = v.Args[1].Reg()
    312 
    313 	case ssa.OpAMD64DIVQU2:
    314 		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
    315 		// results q in AX, r in DX
    316 		p := gc.Prog(v.Op.Asm())
    317 		p.From.Type = obj.TYPE_REG
    318 		p.From.Reg = v.Args[2].Reg()
    319 
    320 	case ssa.OpAMD64AVGQU:
    321 		// compute (x+y)/2 unsigned.
    322 		// Do a 64-bit add, the overflow goes into the carry.
    323 		// Shift right once and pull the carry back into the 63rd bit.
    324 		r := v.Reg()
    325 		if r != v.Args[0].Reg() {
    326 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
    327 		}
    328 		p := gc.Prog(x86.AADDQ)
    329 		p.From.Type = obj.TYPE_REG
    330 		p.To.Type = obj.TYPE_REG
    331 		p.To.Reg = r
    332 		p.From.Reg = v.Args[1].Reg()
    333 		p = gc.Prog(x86.ARCRQ)
    334 		p.From.Type = obj.TYPE_CONST
    335 		p.From.Offset = 1
    336 		p.To.Type = obj.TYPE_REG
    337 		p.To.Reg = r
    338 
    339 	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
    340 		r := v.Reg()
    341 		a := v.Args[0].Reg()
    342 		if r == a {
    343 			if v.AuxInt == 1 {
    344 				var asm obj.As
    345 				// Software optimization manual recommends add $1,reg.
    346 				// But inc/dec is 1 byte smaller. ICC always uses inc
    347 				// Clang/GCC choose depending on flags, but prefer add.
    348 				// Experiments show that inc/dec is both a little faster
    349 				// and make a binary a little smaller.
    350 				if v.Op == ssa.OpAMD64ADDQconst {
    351 					asm = x86.AINCQ
    352 				} else {
    353 					asm = x86.AINCL
    354 				}
    355 				p := gc.Prog(asm)
    356 				p.To.Type = obj.TYPE_REG
    357 				p.To.Reg = r
    358 				return
    359 			}
    360 			if v.AuxInt == -1 {
    361 				var asm obj.As
    362 				if v.Op == ssa.OpAMD64ADDQconst {
    363 					asm = x86.ADECQ
    364 				} else {
    365 					asm = x86.ADECL
    366 				}
    367 				p := gc.Prog(asm)
    368 				p.To.Type = obj.TYPE_REG
    369 				p.To.Reg = r
    370 				return
    371 			}
    372 			p := gc.Prog(v.Op.Asm())
    373 			p.From.Type = obj.TYPE_CONST
    374 			p.From.Offset = v.AuxInt
    375 			p.To.Type = obj.TYPE_REG
    376 			p.To.Reg = r
    377 			return
    378 		}
    379 		var asm obj.As
    380 		if v.Op == ssa.OpAMD64ADDQconst {
    381 			asm = x86.ALEAQ
    382 		} else {
    383 			asm = x86.ALEAL
    384 		}
    385 		p := gc.Prog(asm)
    386 		p.From.Type = obj.TYPE_MEM
    387 		p.From.Reg = a
    388 		p.From.Offset = v.AuxInt
    389 		p.To.Type = obj.TYPE_REG
    390 		p.To.Reg = r
    391 
    392 	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
    393 		r := v.Reg()
    394 		if r != v.Args[0].Reg() {
    395 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
    396 		}
    397 		p := gc.Prog(v.Op.Asm())
    398 		p.From.Type = obj.TYPE_REG
    399 		p.From.Reg = v.Args[1].Reg()
    400 		p.To.Type = obj.TYPE_REG
    401 		p.To.Reg = r
    402 
    403 	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
    404 		r := v.Reg()
    405 		if r != v.Args[0].Reg() {
    406 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
    407 		}
    408 		p := gc.Prog(v.Op.Asm())
    409 		p.From.Type = obj.TYPE_CONST
    410 		p.From.Offset = v.AuxInt
    411 		p.To.Type = obj.TYPE_REG
    412 		p.To.Reg = r
    413 		// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
    414 		// then we don't need to use resultInArg0 for these ops.
    415 		//p.From3 = new(obj.Addr)
    416 		//p.From3.Type = obj.TYPE_REG
    417 		//p.From3.Reg = v.Args[0].Reg()
    418 
    419 	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
    420 		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
    421 		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
    422 		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
    423 		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
    424 		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
    425 		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
    426 		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
    427 		r := v.Reg()
    428 		if r != v.Args[0].Reg() {
    429 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
    430 		}
    431 		p := gc.Prog(v.Op.Asm())
    432 		p.From.Type = obj.TYPE_CONST
    433 		p.From.Offset = v.AuxInt
    434 		p.To.Type = obj.TYPE_REG
    435 		p.To.Reg = r
    436 	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
    437 		r := v.Reg()
    438 		p := gc.Prog(v.Op.Asm())
    439 		p.From.Type = obj.TYPE_REG
    440 		p.From.Reg = r
    441 		p.To.Type = obj.TYPE_REG
    442 		p.To.Reg = r
    443 	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
    444 		r := v.Args[0].Reg()
    445 		i := v.Args[1].Reg()
    446 		p := gc.Prog(x86.ALEAQ)
    447 		switch v.Op {
    448 		case ssa.OpAMD64LEAQ1:
    449 			p.From.Scale = 1
    450 			if i == x86.REG_SP {
    451 				r, i = i, r
    452 			}
    453 		case ssa.OpAMD64LEAQ2:
    454 			p.From.Scale = 2
    455 		case ssa.OpAMD64LEAQ4:
    456 			p.From.Scale = 4
    457 		case ssa.OpAMD64LEAQ8:
    458 			p.From.Scale = 8
    459 		}
    460 		p.From.Type = obj.TYPE_MEM
    461 		p.From.Reg = r
    462 		p.From.Index = i
    463 		gc.AddAux(&p.From, v)
    464 		p.To.Type = obj.TYPE_REG
    465 		p.To.Reg = v.Reg()
    466 	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
    467 		p := gc.Prog(v.Op.Asm())
    468 		p.From.Type = obj.TYPE_MEM
    469 		p.From.Reg = v.Args[0].Reg()
    470 		gc.AddAux(&p.From, v)
    471 		p.To.Type = obj.TYPE_REG
    472 		p.To.Reg = v.Reg()
    473 	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
    474 		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB:
    475 		opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
    476 	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
    477 		// Go assembler has swapped operands for UCOMISx relative to CMP,
    478 		// must account for that right here.
    479 		opregreg(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
    480 	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
    481 		p := gc.Prog(v.Op.Asm())
    482 		p.From.Type = obj.TYPE_REG
    483 		p.From.Reg = v.Args[0].Reg()
    484 		p.To.Type = obj.TYPE_CONST
    485 		p.To.Offset = v.AuxInt
    486 	case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
    487 		p := gc.Prog(v.Op.Asm())
    488 		p.From.Type = obj.TYPE_CONST
    489 		p.From.Offset = v.AuxInt
    490 		p.To.Type = obj.TYPE_REG
    491 		p.To.Reg = v.Args[0].Reg()
    492 	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
    493 		x := v.Reg()
    494 		p := gc.Prog(v.Op.Asm())
    495 		p.From.Type = obj.TYPE_CONST
    496 		p.From.Offset = v.AuxInt
    497 		p.To.Type = obj.TYPE_REG
    498 		p.To.Reg = x
    499 		// If flags are live at this instruction, suppress the
    500 		// MOV $0,AX -> XOR AX,AX optimization.
    501 		if v.Aux != nil {
    502 			p.Mark |= x86.PRESERVEFLAGS
    503 		}
    504 	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
    505 		x := v.Reg()
    506 		p := gc.Prog(v.Op.Asm())
    507 		p.From.Type = obj.TYPE_FCONST
    508 		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
    509 		p.To.Type = obj.TYPE_REG
    510 		p.To.Reg = x
    511 	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
    512 		p := gc.Prog(v.Op.Asm())
    513 		p.From.Type = obj.TYPE_MEM
    514 		p.From.Reg = v.Args[0].Reg()
    515 		gc.AddAux(&p.From, v)
    516 		p.To.Type = obj.TYPE_REG
    517 		p.To.Reg = v.Reg()
    518 	case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8:
    519 		p := gc.Prog(v.Op.Asm())
    520 		p.From.Type = obj.TYPE_MEM
    521 		p.From.Reg = v.Args[0].Reg()
    522 		gc.AddAux(&p.From, v)
    523 		p.From.Scale = 8
    524 		p.From.Index = v.Args[1].Reg()
    525 		p.To.Type = obj.TYPE_REG
    526 		p.To.Reg = v.Reg()
    527 	case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
    528 		p := gc.Prog(v.Op.Asm())
    529 		p.From.Type = obj.TYPE_MEM
    530 		p.From.Reg = v.Args[0].Reg()
    531 		gc.AddAux(&p.From, v)
    532 		p.From.Scale = 4
    533 		p.From.Index = v.Args[1].Reg()
    534 		p.To.Type = obj.TYPE_REG
    535 		p.To.Reg = v.Reg()
    536 	case ssa.OpAMD64MOVWloadidx2:
    537 		p := gc.Prog(v.Op.Asm())
    538 		p.From.Type = obj.TYPE_MEM
    539 		p.From.Reg = v.Args[0].Reg()
    540 		gc.AddAux(&p.From, v)
    541 		p.From.Scale = 2
    542 		p.From.Index = v.Args[1].Reg()
    543 		p.To.Type = obj.TYPE_REG
    544 		p.To.Reg = v.Reg()
    545 	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
    546 		r := v.Args[0].Reg()
    547 		i := v.Args[1].Reg()
    548 		if i == x86.REG_SP {
    549 			r, i = i, r
    550 		}
    551 		p := gc.Prog(v.Op.Asm())
    552 		p.From.Type = obj.TYPE_MEM
    553 		p.From.Reg = r
    554 		p.From.Scale = 1
    555 		p.From.Index = i
    556 		gc.AddAux(&p.From, v)
    557 		p.To.Type = obj.TYPE_REG
    558 		p.To.Reg = v.Reg()
    559 	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
    560 		p := gc.Prog(v.Op.Asm())
    561 		p.From.Type = obj.TYPE_REG
    562 		p.From.Reg = v.Args[1].Reg()
    563 		p.To.Type = obj.TYPE_MEM
    564 		p.To.Reg = v.Args[0].Reg()
    565 		gc.AddAux(&p.To, v)
    566 	case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8:
    567 		p := gc.Prog(v.Op.Asm())
    568 		p.From.Type = obj.TYPE_REG
    569 		p.From.Reg = v.Args[2].Reg()
    570 		p.To.Type = obj.TYPE_MEM
    571 		p.To.Reg = v.Args[0].Reg()
    572 		p.To.Scale = 8
    573 		p.To.Index = v.Args[1].Reg()
    574 		gc.AddAux(&p.To, v)
    575 	case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
    576 		p := gc.Prog(v.Op.Asm())
    577 		p.From.Type = obj.TYPE_REG
    578 		p.From.Reg = v.Args[2].Reg()
    579 		p.To.Type = obj.TYPE_MEM
    580 		p.To.Reg = v.Args[0].Reg()
    581 		p.To.Scale = 4
    582 		p.To.Index = v.Args[1].Reg()
    583 		gc.AddAux(&p.To, v)
    584 	case ssa.OpAMD64MOVWstoreidx2:
    585 		p := gc.Prog(v.Op.Asm())
    586 		p.From.Type = obj.TYPE_REG
    587 		p.From.Reg = v.Args[2].Reg()
    588 		p.To.Type = obj.TYPE_MEM
    589 		p.To.Reg = v.Args[0].Reg()
    590 		p.To.Scale = 2
    591 		p.To.Index = v.Args[1].Reg()
    592 		gc.AddAux(&p.To, v)
    593 	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
    594 		r := v.Args[0].Reg()
    595 		i := v.Args[1].Reg()
    596 		if i == x86.REG_SP {
    597 			r, i = i, r
    598 		}
    599 		p := gc.Prog(v.Op.Asm())
    600 		p.From.Type = obj.TYPE_REG
    601 		p.From.Reg = v.Args[2].Reg()
    602 		p.To.Type = obj.TYPE_MEM
    603 		p.To.Reg = r
    604 		p.To.Scale = 1
    605 		p.To.Index = i
    606 		gc.AddAux(&p.To, v)
    607 	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
    608 		p := gc.Prog(v.Op.Asm())
    609 		p.From.Type = obj.TYPE_CONST
    610 		sc := v.AuxValAndOff()
    611 		p.From.Offset = sc.Val()
    612 		p.To.Type = obj.TYPE_MEM
    613 		p.To.Reg = v.Args[0].Reg()
    614 		gc.AddAux2(&p.To, v, sc.Off())
    615 	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
    616 		p := gc.Prog(v.Op.Asm())
    617 		p.From.Type = obj.TYPE_CONST
    618 		sc := v.AuxValAndOff()
    619 		p.From.Offset = sc.Val()
    620 		r := v.Args[0].Reg()
    621 		i := v.Args[1].Reg()
    622 		switch v.Op {
    623 		case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
    624 			p.To.Scale = 1
    625 			if i == x86.REG_SP {
    626 				r, i = i, r
    627 			}
    628 		case ssa.OpAMD64MOVWstoreconstidx2:
    629 			p.To.Scale = 2
    630 		case ssa.OpAMD64MOVLstoreconstidx4:
    631 			p.To.Scale = 4
    632 		case ssa.OpAMD64MOVQstoreconstidx8:
    633 			p.To.Scale = 8
    634 		}
    635 		p.To.Type = obj.TYPE_MEM
    636 		p.To.Reg = r
    637 		p.To.Index = i
    638 		gc.AddAux2(&p.To, v, sc.Off())
    639 	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
    640 		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
    641 		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
    642 		opregreg(v.Op.Asm(), v.Reg(), v.Args[0].Reg())
    643 	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
    644 		r := v.Reg()
    645 		// Break false dependency on destination register.
    646 		opregreg(x86.AXORPS, r, r)
    647 		opregreg(v.Op.Asm(), r, v.Args[0].Reg())
    648 	case ssa.OpAMD64DUFFZERO:
    649 		off := duffStart(v.AuxInt)
    650 		adj := duffAdj(v.AuxInt)
    651 		var p *obj.Prog
    652 		if adj != 0 {
    653 			p = gc.Prog(x86.AADDQ)
    654 			p.From.Type = obj.TYPE_CONST
    655 			p.From.Offset = adj
    656 			p.To.Type = obj.TYPE_REG
    657 			p.To.Reg = x86.REG_DI
    658 		}
    659 		p = gc.Prog(obj.ADUFFZERO)
    660 		p.To.Type = obj.TYPE_ADDR
    661 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
    662 		p.To.Offset = off
    663 	case ssa.OpAMD64MOVOconst:
    664 		if v.AuxInt != 0 {
    665 			v.Fatalf("MOVOconst can only do constant=0")
    666 		}
    667 		r := v.Reg()
    668 		opregreg(x86.AXORPS, r, r)
    669 	case ssa.OpAMD64DUFFCOPY:
    670 		p := gc.Prog(obj.ADUFFCOPY)
    671 		p.To.Type = obj.TYPE_ADDR
    672 		p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
    673 		p.To.Offset = v.AuxInt
    674 
    675 	case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
    676 		if v.Type.IsMemory() {
    677 			return
    678 		}
    679 		x := v.Args[0].Reg()
    680 		y := v.Reg()
    681 		if x != y {
    682 			opregreg(moveByType(v.Type), y, x)
    683 		}
    684 	case ssa.OpLoadReg:
    685 		if v.Type.IsFlags() {
    686 			v.Fatalf("load flags not implemented: %v", v.LongString())
    687 			return
    688 		}
    689 		p := gc.Prog(loadByType(v.Type))
    690 		gc.AddrAuto(&p.From, v.Args[0])
    691 		p.To.Type = obj.TYPE_REG
    692 		p.To.Reg = v.Reg()
    693 
    694 	case ssa.OpStoreReg:
    695 		if v.Type.IsFlags() {
    696 			v.Fatalf("store flags not implemented: %v", v.LongString())
    697 			return
    698 		}
    699 		p := gc.Prog(storeByType(v.Type))
    700 		p.From.Type = obj.TYPE_REG
    701 		p.From.Reg = v.Args[0].Reg()
    702 		gc.AddrAuto(&p.To, v)
    703 	case ssa.OpPhi:
    704 		gc.CheckLoweredPhi(v)
    705 	case ssa.OpInitMem:
    706 		// memory arg needs no code
    707 	case ssa.OpArg:
    708 		// input args need no code
    709 	case ssa.OpAMD64LoweredGetClosurePtr:
    710 		// Closure pointer is DX.
    711 		gc.CheckLoweredGetClosurePtr(v)
    712 	case ssa.OpAMD64LoweredGetG:
    713 		r := v.Reg()
    714 		// See the comments in cmd/internal/obj/x86/obj6.go
    715 		// near CanUse1InsnTLS for a detailed explanation of these instructions.
    716 		if x86.CanUse1InsnTLS(gc.Ctxt) {
    717 			// MOVQ (TLS), r
    718 			p := gc.Prog(x86.AMOVQ)
    719 			p.From.Type = obj.TYPE_MEM
    720 			p.From.Reg = x86.REG_TLS
    721 			p.To.Type = obj.TYPE_REG
    722 			p.To.Reg = r
    723 		} else {
    724 			// MOVQ TLS, r
    725 			// MOVQ (r)(TLS*1), r
    726 			p := gc.Prog(x86.AMOVQ)
    727 			p.From.Type = obj.TYPE_REG
    728 			p.From.Reg = x86.REG_TLS
    729 			p.To.Type = obj.TYPE_REG
    730 			p.To.Reg = r
    731 			q := gc.Prog(x86.AMOVQ)
    732 			q.From.Type = obj.TYPE_MEM
    733 			q.From.Reg = r
    734 			q.From.Index = x86.REG_TLS
    735 			q.From.Scale = 1
    736 			q.To.Type = obj.TYPE_REG
    737 			q.To.Reg = r
    738 		}
    739 	case ssa.OpAMD64CALLstatic:
    740 		if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym {
    741 			// Deferred calls will appear to be returning to
    742 			// the CALL deferreturn(SB) that we are about to emit.
    743 			// However, the stack trace code will show the line
    744 			// of the instruction byte before the return PC.
    745 			// To avoid that being an unrelated instruction,
    746 			// insert an actual hardware NOP that will have the right line number.
    747 			// This is different from obj.ANOP, which is a virtual no-op
    748 			// that doesn't make it into the instruction stream.
    749 			ginsnop()
    750 		}
    751 		p := gc.Prog(obj.ACALL)
    752 		p.To.Type = obj.TYPE_MEM
    753 		p.To.Name = obj.NAME_EXTERN
    754 		p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym))
    755 		if gc.Maxarg < v.AuxInt {
    756 			gc.Maxarg = v.AuxInt
    757 		}
    758 	case ssa.OpAMD64CALLclosure:
    759 		p := gc.Prog(obj.ACALL)
    760 		p.To.Type = obj.TYPE_REG
    761 		p.To.Reg = v.Args[0].Reg()
    762 		if gc.Maxarg < v.AuxInt {
    763 			gc.Maxarg = v.AuxInt
    764 		}
    765 	case ssa.OpAMD64CALLdefer:
    766 		p := gc.Prog(obj.ACALL)
    767 		p.To.Type = obj.TYPE_MEM
    768 		p.To.Name = obj.NAME_EXTERN
    769 		p.To.Sym = gc.Linksym(gc.Deferproc.Sym)
    770 		if gc.Maxarg < v.AuxInt {
    771 			gc.Maxarg = v.AuxInt
    772 		}
    773 	case ssa.OpAMD64CALLgo:
    774 		p := gc.Prog(obj.ACALL)
    775 		p.To.Type = obj.TYPE_MEM
    776 		p.To.Name = obj.NAME_EXTERN
    777 		p.To.Sym = gc.Linksym(gc.Newproc.Sym)
    778 		if gc.Maxarg < v.AuxInt {
    779 			gc.Maxarg = v.AuxInt
    780 		}
    781 	case ssa.OpAMD64CALLinter:
    782 		p := gc.Prog(obj.ACALL)
    783 		p.To.Type = obj.TYPE_REG
    784 		p.To.Reg = v.Args[0].Reg()
    785 		if gc.Maxarg < v.AuxInt {
    786 			gc.Maxarg = v.AuxInt
    787 		}
    788 	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
    789 		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
    790 		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
    791 		r := v.Reg()
    792 		if r != v.Args[0].Reg() {
    793 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
    794 		}
    795 		p := gc.Prog(v.Op.Asm())
    796 		p.To.Type = obj.TYPE_REG
    797 		p.To.Reg = r
    798 	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL:
    799 		p := gc.Prog(v.Op.Asm())
    800 		p.From.Type = obj.TYPE_REG
    801 		p.From.Reg = v.Args[0].Reg()
    802 		p.To.Type = obj.TYPE_REG
    803 		p.To.Reg = v.Reg0()
    804 	case ssa.OpAMD64SQRTSD:
    805 		p := gc.Prog(v.Op.Asm())
    806 		p.From.Type = obj.TYPE_REG
    807 		p.From.Reg = v.Args[0].Reg()
    808 		p.To.Type = obj.TYPE_REG
    809 		p.To.Reg = v.Reg()
    810 	case ssa.OpSP, ssa.OpSB:
    811 		// nothing to do
    812 	case ssa.OpSelect0, ssa.OpSelect1:
    813 		// nothing to do
    814 	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
    815 		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
    816 		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
    817 		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
    818 		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
    819 		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
    820 		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
    821 		p := gc.Prog(v.Op.Asm())
    822 		p.To.Type = obj.TYPE_REG
    823 		p.To.Reg = v.Reg()
    824 
    825 	case ssa.OpAMD64SETNEF:
    826 		p := gc.Prog(v.Op.Asm())
    827 		p.To.Type = obj.TYPE_REG
    828 		p.To.Reg = v.Reg()
    829 		q := gc.Prog(x86.ASETPS)
    830 		q.To.Type = obj.TYPE_REG
    831 		q.To.Reg = x86.REG_AX
    832 		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
    833 		opregreg(x86.AORL, v.Reg(), x86.REG_AX)
    834 
    835 	case ssa.OpAMD64SETEQF:
    836 		p := gc.Prog(v.Op.Asm())
    837 		p.To.Type = obj.TYPE_REG
    838 		p.To.Reg = v.Reg()
    839 		q := gc.Prog(x86.ASETPC)
    840 		q.To.Type = obj.TYPE_REG
    841 		q.To.Reg = x86.REG_AX
    842 		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
    843 		opregreg(x86.AANDL, v.Reg(), x86.REG_AX)
    844 
    845 	case ssa.OpAMD64InvertFlags:
    846 		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
    847 	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
    848 		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
    849 	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
    850 		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
    851 	case ssa.OpAMD64REPSTOSQ:
    852 		gc.Prog(x86.AREP)
    853 		gc.Prog(x86.ASTOSQ)
    854 	case ssa.OpAMD64REPMOVSQ:
    855 		gc.Prog(x86.AREP)
    856 		gc.Prog(x86.AMOVSQ)
    857 	case ssa.OpVarDef:
    858 		gc.Gvardef(v.Aux.(*gc.Node))
    859 	case ssa.OpVarKill:
    860 		gc.Gvarkill(v.Aux.(*gc.Node))
    861 	case ssa.OpVarLive:
    862 		gc.Gvarlive(v.Aux.(*gc.Node))
    863 	case ssa.OpKeepAlive:
    864 		gc.KeepAlive(v)
    865 	case ssa.OpAMD64LoweredNilCheck:
    866 		// Issue a load which will fault if the input is nil.
    867 		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
    868 		// Should we use the 3-byte TESTB $0, (reg) instead?  It is larger
    869 		// but it doesn't have false dependency on AX.
    870 		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
    871 		// That trades clobbering flags for clobbering a register.
    872 		p := gc.Prog(x86.ATESTB)
    873 		p.From.Type = obj.TYPE_REG
    874 		p.From.Reg = x86.REG_AX
    875 		p.To.Type = obj.TYPE_MEM
    876 		p.To.Reg = v.Args[0].Reg()
    877 		gc.AddAux(&p.To, v)
    878 		if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
    879 			gc.Warnl(v.Line, "generated nil check")
    880 		}
    881 	case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
    882 		p := gc.Prog(v.Op.Asm())
    883 		p.From.Type = obj.TYPE_MEM
    884 		p.From.Reg = v.Args[0].Reg()
    885 		gc.AddAux(&p.From, v)
    886 		p.To.Type = obj.TYPE_REG
    887 		p.To.Reg = v.Reg0()
    888 	case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
    889 		r := v.Reg0()
    890 		if r != v.Args[0].Reg() {
    891 			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
    892 		}
    893 		p := gc.Prog(v.Op.Asm())
    894 		p.From.Type = obj.TYPE_REG
    895 		p.From.Reg = r
    896 		p.To.Type = obj.TYPE_MEM
    897 		p.To.Reg = v.Args[1].Reg()
    898 		gc.AddAux(&p.To, v)
    899 	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
    900 		r := v.Reg0()
    901 		if r != v.Args[0].Reg() {
    902 			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
    903 		}
    904 		gc.Prog(x86.ALOCK)
    905 		p := gc.Prog(v.Op.Asm())
    906 		p.From.Type = obj.TYPE_REG
    907 		p.From.Reg = r
    908 		p.To.Type = obj.TYPE_MEM
    909 		p.To.Reg = v.Args[1].Reg()
    910 		gc.AddAux(&p.To, v)
    911 	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
    912 		if v.Args[1].Reg() != x86.REG_AX {
    913 			v.Fatalf("input[1] not in AX %s", v.LongString())
    914 		}
    915 		gc.Prog(x86.ALOCK)
    916 		p := gc.Prog(v.Op.Asm())
    917 		p.From.Type = obj.TYPE_REG
    918 		p.From.Reg = v.Args[2].Reg()
    919 		p.To.Type = obj.TYPE_MEM
    920 		p.To.Reg = v.Args[0].Reg()
    921 		gc.AddAux(&p.To, v)
    922 		p = gc.Prog(x86.ASETEQ)
    923 		p.To.Type = obj.TYPE_REG
    924 		p.To.Reg = v.Reg0()
    925 	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
    926 		gc.Prog(x86.ALOCK)
    927 		p := gc.Prog(v.Op.Asm())
    928 		p.From.Type = obj.TYPE_REG
    929 		p.From.Reg = v.Args[1].Reg()
    930 		p.To.Type = obj.TYPE_MEM
    931 		p.To.Reg = v.Args[0].Reg()
    932 		gc.AddAux(&p.To, v)
    933 	default:
    934 		v.Fatalf("genValue not implemented: %s", v.LongString())
    935 	}
    936 }
    937 
    938 var blockJump = [...]struct {
    939 	asm, invasm obj.As
    940 }{
    941 	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
    942 	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
    943 	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
    944 	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
    945 	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
    946 	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
    947 	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
    948 	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
    949 	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
    950 	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
    951 	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
    952 	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
    953 }
    954 
    955 var eqfJumps = [2][2]gc.FloatingEQNEJump{
    956 	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
    957 	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
    958 }
    959 var nefJumps = [2][2]gc.FloatingEQNEJump{
    960 	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
    961 	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
    962 }
    963 
    964 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
    965 	s.SetLineno(b.Line)
    966 
    967 	switch b.Kind {
    968 	case ssa.BlockPlain:
    969 		if b.Succs[0].Block() != next {
    970 			p := gc.Prog(obj.AJMP)
    971 			p.To.Type = obj.TYPE_BRANCH
    972 			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
    973 		}
    974 	case ssa.BlockDefer:
    975 		// defer returns in rax:
    976 		// 0 if we should continue executing
    977 		// 1 if we should jump to deferreturn call
    978 		p := gc.Prog(x86.ATESTL)
    979 		p.From.Type = obj.TYPE_REG
    980 		p.From.Reg = x86.REG_AX
    981 		p.To.Type = obj.TYPE_REG
    982 		p.To.Reg = x86.REG_AX
    983 		p = gc.Prog(x86.AJNE)
    984 		p.To.Type = obj.TYPE_BRANCH
    985 		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
    986 		if b.Succs[0].Block() != next {
    987 			p := gc.Prog(obj.AJMP)
    988 			p.To.Type = obj.TYPE_BRANCH
    989 			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
    990 		}
    991 	case ssa.BlockExit:
    992 		gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here
    993 	case ssa.BlockRet:
    994 		gc.Prog(obj.ARET)
    995 	case ssa.BlockRetJmp:
    996 		p := gc.Prog(obj.AJMP)
    997 		p.To.Type = obj.TYPE_MEM
    998 		p.To.Name = obj.NAME_EXTERN
    999 		p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym))
   1000 
   1001 	case ssa.BlockAMD64EQF:
   1002 		gc.SSAGenFPJump(s, b, next, &eqfJumps)
   1003 
   1004 	case ssa.BlockAMD64NEF:
   1005 		gc.SSAGenFPJump(s, b, next, &nefJumps)
   1006 
   1007 	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
   1008 		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
   1009 		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
   1010 		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
   1011 		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
   1012 		jmp := blockJump[b.Kind]
   1013 		likely := b.Likely
   1014 		var p *obj.Prog
   1015 		switch next {
   1016 		case b.Succs[0].Block():
   1017 			p = gc.Prog(jmp.invasm)
   1018 			likely *= -1
   1019 			p.To.Type = obj.TYPE_BRANCH
   1020 			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
   1021 		case b.Succs[1].Block():
   1022 			p = gc.Prog(jmp.asm)
   1023 			p.To.Type = obj.TYPE_BRANCH
   1024 			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   1025 		default:
   1026 			p = gc.Prog(jmp.asm)
   1027 			p.To.Type = obj.TYPE_BRANCH
   1028 			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
   1029 			q := gc.Prog(obj.AJMP)
   1030 			q.To.Type = obj.TYPE_BRANCH
   1031 			s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
   1032 		}
   1033 
   1034 		// liblink reorders the instruction stream as it sees fit.
   1035 		// Pass along what we know so liblink can make use of it.
   1036 		// TODO: Once we've fully switched to SSA,
   1037 		// make liblink leave our output alone.
   1038 		switch likely {
   1039 		case ssa.BranchUnlikely:
   1040 			p.From.Type = obj.TYPE_CONST
   1041 			p.From.Offset = 0
   1042 		case ssa.BranchLikely:
   1043 			p.From.Type = obj.TYPE_CONST
   1044 			p.From.Offset = 1
   1045 		}
   1046 
   1047 	default:
   1048 		b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
   1049 	}
   1050 }
   1051