Home | History | Annotate | Download | only in x86
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package x86
      6 
      7 import (
      8 	"cmd/compile/internal/gc"
      9 	"cmd/compile/internal/ssa"
     10 	"cmd/compile/internal/types"
     11 	"cmd/internal/obj"
     12 	"cmd/internal/obj/x86"
     13 	"math"
     14 )
     15 
     16 // Generates code for v using 387 instructions.
     17 func ssaGenValue387(s *gc.SSAGenState, v *ssa.Value) {
     18 	// The SSA compiler pretends that it has an SSE backend.
     19 	// If we don't have one of those, we need to translate
     20 	// all the SSE ops to equivalent 387 ops. That's what this
     21 	// function does.
     22 
     23 	switch v.Op {
     24 	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
     25 		p := s.Prog(loadPush(v.Type))
     26 		p.From.Type = obj.TYPE_FCONST
     27 		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
     28 		p.To.Type = obj.TYPE_REG
     29 		p.To.Reg = x86.REG_F0
     30 		popAndSave(s, v)
     31 
     32 	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
     33 		p := s.Prog(loadPush(v.Type))
     34 		p.From.Type = obj.TYPE_MEM
     35 		p.From.Reg = v.Args[0].Reg()
     36 		p.To.Type = obj.TYPE_REG
     37 		p.To.Reg = x86.REG_F0
     38 		popAndSave(s, v)
     39 
     40 	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, ssa.Op386MOVSSloadidx4, ssa.Op386MOVSDloadidx8:
     41 		p := s.Prog(loadPush(v.Type))
     42 		p.From.Type = obj.TYPE_MEM
     43 		p.From.Reg = v.Args[0].Reg()
     44 		gc.AddAux(&p.From, v)
     45 		switch v.Op {
     46 		case ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
     47 			p.From.Scale = 1
     48 			p.From.Index = v.Args[1].Reg()
     49 			if p.From.Index == x86.REG_SP {
     50 				p.From.Reg, p.From.Index = p.From.Index, p.From.Reg
     51 			}
     52 		case ssa.Op386MOVSSloadidx4:
     53 			p.From.Scale = 4
     54 			p.From.Index = v.Args[1].Reg()
     55 		case ssa.Op386MOVSDloadidx8:
     56 			p.From.Scale = 8
     57 			p.From.Index = v.Args[1].Reg()
     58 		}
     59 		p.To.Type = obj.TYPE_REG
     60 		p.To.Reg = x86.REG_F0
     61 		popAndSave(s, v)
     62 
     63 	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore:
     64 		// Push to-be-stored value on top of stack.
     65 		push(s, v.Args[1])
     66 
     67 		// Pop and store value.
     68 		var op obj.As
     69 		switch v.Op {
     70 		case ssa.Op386MOVSSstore:
     71 			op = x86.AFMOVFP
     72 		case ssa.Op386MOVSDstore:
     73 			op = x86.AFMOVDP
     74 		}
     75 		p := s.Prog(op)
     76 		p.From.Type = obj.TYPE_REG
     77 		p.From.Reg = x86.REG_F0
     78 		p.To.Type = obj.TYPE_MEM
     79 		p.To.Reg = v.Args[0].Reg()
     80 		gc.AddAux(&p.To, v)
     81 
     82 	case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVSDstoreidx8:
     83 		push(s, v.Args[2])
     84 		var op obj.As
     85 		switch v.Op {
     86 		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSSstoreidx4:
     87 			op = x86.AFMOVFP
     88 		case ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSDstoreidx8:
     89 			op = x86.AFMOVDP
     90 		}
     91 		p := s.Prog(op)
     92 		p.From.Type = obj.TYPE_REG
     93 		p.From.Reg = x86.REG_F0
     94 		p.To.Type = obj.TYPE_MEM
     95 		p.To.Reg = v.Args[0].Reg()
     96 		gc.AddAux(&p.To, v)
     97 		switch v.Op {
     98 		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
     99 			p.To.Scale = 1
    100 			p.To.Index = v.Args[1].Reg()
    101 			if p.To.Index == x86.REG_SP {
    102 				p.To.Reg, p.To.Index = p.To.Index, p.To.Reg
    103 			}
    104 		case ssa.Op386MOVSSstoreidx4:
    105 			p.To.Scale = 4
    106 			p.To.Index = v.Args[1].Reg()
    107 		case ssa.Op386MOVSDstoreidx8:
    108 			p.To.Scale = 8
    109 			p.To.Index = v.Args[1].Reg()
    110 		}
    111 
    112 	case ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
    113 		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD:
    114 		if v.Reg() != v.Args[0].Reg() {
    115 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
    116 		}
    117 
    118 		// Push arg1 on top of stack
    119 		push(s, v.Args[1])
    120 
    121 		// Set precision if needed.  64 bits is the default.
    122 		switch v.Op {
    123 		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
    124 			p := s.Prog(x86.AFSTCW)
    125 			s.AddrScratch(&p.To)
    126 			p = s.Prog(x86.AFLDCW)
    127 			p.From.Type = obj.TYPE_MEM
    128 			p.From.Name = obj.NAME_EXTERN
    129 			p.From.Sym = gc.ControlWord32
    130 		}
    131 
    132 		var op obj.As
    133 		switch v.Op {
    134 		case ssa.Op386ADDSS, ssa.Op386ADDSD:
    135 			op = x86.AFADDDP
    136 		case ssa.Op386SUBSS, ssa.Op386SUBSD:
    137 			op = x86.AFSUBDP
    138 		case ssa.Op386MULSS, ssa.Op386MULSD:
    139 			op = x86.AFMULDP
    140 		case ssa.Op386DIVSS, ssa.Op386DIVSD:
    141 			op = x86.AFDIVDP
    142 		}
    143 		p := s.Prog(op)
    144 		p.From.Type = obj.TYPE_REG
    145 		p.From.Reg = x86.REG_F0
    146 		p.To.Type = obj.TYPE_REG
    147 		p.To.Reg = s.SSEto387[v.Reg()] + 1
    148 
    149 		// Restore precision if needed.
    150 		switch v.Op {
    151 		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
    152 			p := s.Prog(x86.AFLDCW)
    153 			s.AddrScratch(&p.From)
    154 		}
    155 
    156 	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
    157 		push(s, v.Args[0])
    158 
    159 		// Compare.
    160 		p := s.Prog(x86.AFUCOMP)
    161 		p.From.Type = obj.TYPE_REG
    162 		p.From.Reg = x86.REG_F0
    163 		p.To.Type = obj.TYPE_REG
    164 		p.To.Reg = s.SSEto387[v.Args[1].Reg()] + 1
    165 
    166 		// Save AX.
    167 		p = s.Prog(x86.AMOVL)
    168 		p.From.Type = obj.TYPE_REG
    169 		p.From.Reg = x86.REG_AX
    170 		s.AddrScratch(&p.To)
    171 
    172 		// Move status word into AX.
    173 		p = s.Prog(x86.AFSTSW)
    174 		p.To.Type = obj.TYPE_REG
    175 		p.To.Reg = x86.REG_AX
    176 
    177 		// Then move the flags we need to the integer flags.
    178 		s.Prog(x86.ASAHF)
    179 
    180 		// Restore AX.
    181 		p = s.Prog(x86.AMOVL)
    182 		s.AddrScratch(&p.From)
    183 		p.To.Type = obj.TYPE_REG
    184 		p.To.Reg = x86.REG_AX
    185 
    186 	case ssa.Op386SQRTSD:
    187 		push(s, v.Args[0])
    188 		s.Prog(x86.AFSQRT)
    189 		popAndSave(s, v)
    190 
    191 	case ssa.Op386FCHS:
    192 		push(s, v.Args[0])
    193 		s.Prog(x86.AFCHS)
    194 		popAndSave(s, v)
    195 
    196 	case ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD:
    197 		p := s.Prog(x86.AMOVL)
    198 		p.From.Type = obj.TYPE_REG
    199 		p.From.Reg = v.Args[0].Reg()
    200 		s.AddrScratch(&p.To)
    201 		p = s.Prog(x86.AFMOVL)
    202 		s.AddrScratch(&p.From)
    203 		p.To.Type = obj.TYPE_REG
    204 		p.To.Reg = x86.REG_F0
    205 		popAndSave(s, v)
    206 
    207 	case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL:
    208 		push(s, v.Args[0])
    209 
    210 		// Save control word.
    211 		p := s.Prog(x86.AFSTCW)
    212 		s.AddrScratch(&p.To)
    213 		p.To.Offset += 4
    214 
    215 		// Load control word which truncates (rounds towards zero).
    216 		p = s.Prog(x86.AFLDCW)
    217 		p.From.Type = obj.TYPE_MEM
    218 		p.From.Name = obj.NAME_EXTERN
    219 		p.From.Sym = gc.ControlWord64trunc
    220 
    221 		// Now do the conversion.
    222 		p = s.Prog(x86.AFMOVLP)
    223 		p.From.Type = obj.TYPE_REG
    224 		p.From.Reg = x86.REG_F0
    225 		s.AddrScratch(&p.To)
    226 		p = s.Prog(x86.AMOVL)
    227 		s.AddrScratch(&p.From)
    228 		p.To.Type = obj.TYPE_REG
    229 		p.To.Reg = v.Reg()
    230 
    231 		// Restore control word.
    232 		p = s.Prog(x86.AFLDCW)
    233 		s.AddrScratch(&p.From)
    234 		p.From.Offset += 4
    235 
    236 	case ssa.Op386CVTSS2SD:
    237 		// float32 -> float64 is a nop
    238 		push(s, v.Args[0])
    239 		popAndSave(s, v)
    240 
    241 	case ssa.Op386CVTSD2SS:
    242 		// Round to nearest float32.
    243 		push(s, v.Args[0])
    244 		p := s.Prog(x86.AFMOVFP)
    245 		p.From.Type = obj.TYPE_REG
    246 		p.From.Reg = x86.REG_F0
    247 		s.AddrScratch(&p.To)
    248 		p = s.Prog(x86.AFMOVF)
    249 		s.AddrScratch(&p.From)
    250 		p.To.Type = obj.TYPE_REG
    251 		p.To.Reg = x86.REG_F0
    252 		popAndSave(s, v)
    253 
    254 	case ssa.OpLoadReg:
    255 		if !v.Type.IsFloat() {
    256 			ssaGenValue(s, v)
    257 			return
    258 		}
    259 		// Load+push the value we need.
    260 		p := s.Prog(loadPush(v.Type))
    261 		gc.AddrAuto(&p.From, v.Args[0])
    262 		p.To.Type = obj.TYPE_REG
    263 		p.To.Reg = x86.REG_F0
    264 		// Move the value to its assigned register.
    265 		popAndSave(s, v)
    266 
    267 	case ssa.OpStoreReg:
    268 		if !v.Type.IsFloat() {
    269 			ssaGenValue(s, v)
    270 			return
    271 		}
    272 		push(s, v.Args[0])
    273 		var op obj.As
    274 		switch v.Type.Size() {
    275 		case 4:
    276 			op = x86.AFMOVFP
    277 		case 8:
    278 			op = x86.AFMOVDP
    279 		}
    280 		p := s.Prog(op)
    281 		p.From.Type = obj.TYPE_REG
    282 		p.From.Reg = x86.REG_F0
    283 		gc.AddrAuto(&p.To, v)
    284 
    285 	case ssa.OpCopy:
    286 		if !v.Type.IsFloat() {
    287 			ssaGenValue(s, v)
    288 			return
    289 		}
    290 		push(s, v.Args[0])
    291 		popAndSave(s, v)
    292 
    293 	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
    294 		flush387(s) // Calls must empty the FP stack.
    295 		fallthrough // then issue the call as normal
    296 	default:
    297 		ssaGenValue(s, v)
    298 	}
    299 }
    300 
    301 // push pushes v onto the floating-point stack.  v must be in a register.
    302 func push(s *gc.SSAGenState, v *ssa.Value) {
    303 	p := s.Prog(x86.AFMOVD)
    304 	p.From.Type = obj.TYPE_REG
    305 	p.From.Reg = s.SSEto387[v.Reg()]
    306 	p.To.Type = obj.TYPE_REG
    307 	p.To.Reg = x86.REG_F0
    308 }
    309 
    310 // popAndSave pops a value off of the floating-point stack and stores
    311 // it in the reigster assigned to v.
    312 func popAndSave(s *gc.SSAGenState, v *ssa.Value) {
    313 	r := v.Reg()
    314 	if _, ok := s.SSEto387[r]; ok {
    315 		// Pop value, write to correct register.
    316 		p := s.Prog(x86.AFMOVDP)
    317 		p.From.Type = obj.TYPE_REG
    318 		p.From.Reg = x86.REG_F0
    319 		p.To.Type = obj.TYPE_REG
    320 		p.To.Reg = s.SSEto387[v.Reg()] + 1
    321 	} else {
    322 		// Don't actually pop value. This 387 register is now the
    323 		// new home for the not-yet-assigned-a-home SSE register.
    324 		// Increase the register mapping of all other registers by one.
    325 		for rSSE, r387 := range s.SSEto387 {
    326 			s.SSEto387[rSSE] = r387 + 1
    327 		}
    328 		s.SSEto387[r] = x86.REG_F0
    329 	}
    330 }
    331 
    332 // loadPush returns the opcode for load+push of the given type.
    333 func loadPush(t *types.Type) obj.As {
    334 	if t.Size() == 4 {
    335 		return x86.AFMOVF
    336 	}
    337 	return x86.AFMOVD
    338 }
    339 
    340 // flush387 removes all entries from the 387 floating-point stack.
    341 func flush387(s *gc.SSAGenState) {
    342 	for k := range s.SSEto387 {
    343 		p := s.Prog(x86.AFMOVDP)
    344 		p.From.Type = obj.TYPE_REG
    345 		p.From.Reg = x86.REG_F0
    346 		p.To.Type = obj.TYPE_REG
    347 		p.To.Reg = x86.REG_F0
    348 		delete(s.SSEto387, k)
    349 	}
    350 }
    351 
    352 func ssaGenBlock387(s *gc.SSAGenState, b, next *ssa.Block) {
    353 	// Empty the 387's FP stack before the block ends.
    354 	flush387(s)
    355 
    356 	ssaGenBlock(s, b, next)
    357 }
    358