1 ; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17 } 18 19 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 20 ;CHECK-LABEL: stack_fold_addps 21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x float> %a0, %a1 24 ret <4 x float> %2 25 } 26 27 define double @stack_fold_addsd(double %a0, double %a1) { 28 ;CHECK-LABEL: stack_fold_addsd 29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd double %a0, %a1 32 ret double %2 33 } 34 35 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 36 ;CHECK-LABEL: stack_fold_addsd_int 37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = extractelement <2 x double> %a0, i32 0 40 %3 = extractelement <2 x double> %a1, i32 0 41 %4 = fadd double %2, %3 42 %5 = insertelement <2 x double> %a0, double %4, i32 0 43 ret <2 x double> %5 44 } 45 46 define float @stack_fold_addss(float %a0, float %a1) { 47 ;CHECK-LABEL: stack_fold_addss 48 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 49 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 50 %2 = fadd float %a0, %a1 51 ret float %2 52 } 53 54 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 55 ;CHECK-LABEL: stack_fold_addss_int 56 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 57 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 58 %2 = extractelement <4 x float> %a0, i32 0 59 %3 = extractelement <4 x float> %a1, i32 0 60 %4 = fadd float %2, %3 61 %5 = insertelement <4 x float> %a0, float %4, i32 0 62 ret <4 x float> %5 63 } 64 65 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 66 ;CHECK-LABEL: stack_fold_addsubpd 67 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 68 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 69 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 70 ret <2 x double> %2 71 } 72 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 73 74 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 75 ;CHECK-LABEL: stack_fold_addsubps 76 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 77 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 78 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 79 ret <4 x float> %2 80 } 81 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 82 83 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 84 ;CHECK-LABEL: stack_fold_andnpd 85 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 86 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 87 %2 = bitcast <2 x double> %a0 to <2 x i64> 88 %3 = bitcast <2 x double> %a1 to <2 x i64> 89 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 90 %5 = and <2 x i64> %4, %3 91 %6 = bitcast <2 x i64> %5 to <2 x double> 92 ; fadd forces execution domain 93 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 94 ret <2 x double> %7 95 } 96 97 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 98 ;CHECK-LABEL: stack_fold_andnps 99 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 100 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 101 %2 = bitcast <4 x float> %a0 to <2 x i64> 102 %3 = bitcast <4 x float> %a1 to <2 x i64> 103 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 104 %5 = and <2 x i64> %4, %3 105 %6 = bitcast <2 x i64> %5 to <4 x float> 106 ; fadd forces execution domain 107 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 108 ret <4 x float> %7 109 } 110 111 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 112 ;CHECK-LABEL: stack_fold_andpd 113 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 114 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 115 %2 = bitcast <2 x double> %a0 to <2 x i64> 116 %3 = bitcast <2 x double> %a1 to <2 x i64> 117 %4 = and <2 x i64> %2, %3 118 %5 = bitcast <2 x i64> %4 to <2 x double> 119 ; fadd forces execution domain 120 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 121 ret <2 x double> %6 122 } 123 124 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 125 ;CHECK-LABEL: stack_fold_andps 126 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 127 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 128 %2 = bitcast <4 x float> %a0 to <2 x i64> 129 %3 = bitcast <4 x float> %a1 to <2 x i64> 130 %4 = and <2 x i64> %2, %3 131 %5 = bitcast <2 x i64> %4 to <4 x float> 132 ; fadd forces execution domain 133 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 134 ret <4 x float> %6 135 } 136 137 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 138 ;CHECK-LABEL: stack_fold_blendpd 139 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 140 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 141 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 142 ; fadd forces execution domain 143 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 144 ret <2 x double> %3 145 } 146 147 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 148 ;CHECK-LABEL: stack_fold_blendps 149 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 150 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 151 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 152 ; fadd forces execution domain 153 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 154 ret <4 x float> %3 155 } 156 157 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 158 ;CHECK-LABEL: stack_fold_blendvpd 159 ;CHECK: blendvpd %xmm0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 160 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 161 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 162 ret <2 x double> %2 163 } 164 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 165 166 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 167 ;CHECK-LABEL: stack_fold_blendvps 168 ;CHECK: blendvps %xmm0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 169 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 170 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 171 ret <4 x float> %2 172 } 173 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 174 175 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 176 ;CHECK-LABEL: stack_fold_cmppd 177 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 178 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 179 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 180 ret <2 x double> %2 181 } 182 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 183 184 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 185 ;CHECK-LABEL: stack_fold_cmpps 186 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 187 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 188 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 189 ret <4 x float> %2 190 } 191 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 192 193 define i32 @stack_fold_cmpsd(double %a0, double %a1) { 194 ;CHECK-LABEL: stack_fold_cmpsd 195 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 196 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 197 %2 = fcmp oeq double %a0, %a1 198 %3 = zext i1 %2 to i32 199 ret i32 %3 200 } 201 202 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 203 ;CHECK-LABEL: stack_fold_cmpsd_int 204 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 205 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 206 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 207 ret <2 x double> %2 208 } 209 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 210 211 define i32 @stack_fold_cmpss(float %a0, float %a1) { 212 ;CHECK-LABEL: stack_fold_cmpss 213 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 214 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 215 %2 = fcmp oeq float %a0, %a1 216 %3 = zext i1 %2 to i32 217 ret i32 %3 218 } 219 220 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 221 ;CHECK-LABEL: stack_fold_cmpss_int 222 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 223 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 224 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 225 ret <4 x float> %2 226 } 227 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 228 229 ; TODO stack_fold_comisd 230 231 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 232 ;CHECK-LABEL: stack_fold_comisd_int 233 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 234 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 235 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 236 ret i32 %2 237 } 238 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 239 240 ; TODO stack_fold_comiss 241 242 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 243 ;CHECK-LABEL: stack_fold_comiss_int 244 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 245 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 246 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 247 ret i32 %2 248 } 249 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 250 251 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 252 ;CHECK-LABEL: stack_fold_cvtdq2pd 253 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 254 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 255 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 256 %3 = sitofp <2 x i32> %2 to <2 x double> 257 ret <2 x double> %3 258 } 259 260 define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) { 261 ;CHECK-LABEL: stack_fold_cvtdq2pd_int 262 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 264 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1> 265 %cvt = sitofp <2 x i32> %2 to <2 x double> 266 ret <2 x double> %cvt 267 } 268 269 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 270 ;CHECK-LABEL: stack_fold_cvtdq2ps 271 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 273 %2 = sitofp <4 x i32> %a0 to <4 x float> 274 ret <4 x float> %2 275 } 276 277 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 278 ;CHECK-LABEL: stack_fold_cvtpd2dq 279 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 280 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 281 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 282 ret <4 x i32> %2 283 } 284 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 285 286 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 287 ;CHECK-LABEL: stack_fold_cvtpd2ps 288 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 290 %2 = fptrunc <2 x double> %a0 to <2 x float> 291 ret <2 x float> %2 292 } 293 294 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 295 ;CHECK-LABEL: stack_fold_cvtps2dq 296 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 297 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 298 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 299 ret <4 x i32> %2 300 } 301 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 302 303 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 304 ;CHECK-LABEL: stack_fold_cvtps2pd 305 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 306 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 307 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> 308 %3 = fpext <2 x float> %2 to <2 x double> 309 ret <2 x double> %3 310 } 311 312 define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) { 313 ;CHECK-LABEL: stack_fold_cvtps2pd_int 314 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 315 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 316 %2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> 317 %cvtps2pd = fpext <2 x float> %2 to <2 x double> 318 ret <2 x double> %cvtps2pd 319 } 320 321 ; TODO stack_fold_cvtsd2si 322 323 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 324 ;CHECK-LABEL: stack_fold_cvtsd2si_int 325 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 326 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 327 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 328 ret i32 %2 329 } 330 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 331 332 ; TODO stack_fold_cvtsd2si64 333 334 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 335 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 336 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 338 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 339 ret i64 %2 340 } 341 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 342 343 define float @stack_fold_cvtsd2ss(double %a0) minsize { 344 ;CHECK-LABEL: stack_fold_cvtsd2ss 345 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 347 %2 = fptrunc double %a0 to float 348 ret float %2 349 } 350 351 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { 352 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 353 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 354 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 355 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 356 ret <4 x float> %2 357 } 358 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 359 360 define double @stack_fold_cvtsi2sd(i32 %a0) minsize { 361 ;CHECK-LABEL: stack_fold_cvtsi2sd 362 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 363 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 364 %2 = sitofp i32 %a0 to double 365 ret double %2 366 } 367 368 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0, <2 x double> %b0) { 369 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 370 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 371 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 372 %2 = sitofp i32 %a0 to double 373 %3 = insertelement <2 x double> %b0, double %2, i64 0 374 ret <2 x double> %3 375 } 376 377 define double @stack_fold_cvtsi642sd(i64 %a0) optsize { 378 ;CHECK-LABEL: stack_fold_cvtsi642sd 379 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 380 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 381 %2 = sitofp i64 %a0 to double 382 ret double %2 383 } 384 385 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0, <2 x double> %b0) { 386 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 387 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 388 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 389 %2 = sitofp i64 %a0 to double 390 %3 = insertelement <2 x double> %b0, double %2, i64 0 391 ret <2 x double> %3 392 } 393 394 define float @stack_fold_cvtsi2ss(i32 %a0) minsize { 395 ;CHECK-LABEL: stack_fold_cvtsi2ss 396 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 397 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 398 %2 = sitofp i32 %a0 to float 399 ret float %2 400 } 401 402 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0, <4 x float> %b0) { 403 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 404 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 406 %2 = sitofp i32 %a0 to float 407 %3 = insertelement <4 x float> %b0, float %2, i64 0 408 ret <4 x float> %3 409 } 410 411 define float @stack_fold_cvtsi642ss(i64 %a0) optsize { 412 ;CHECK-LABEL: stack_fold_cvtsi642ss 413 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 414 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 415 %2 = sitofp i64 %a0 to float 416 ret float %2 417 } 418 419 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0, <4 x float> %b0) { 420 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 421 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 422 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 423 %2 = sitofp i64 %a0 to float 424 %3 = insertelement <4 x float> %b0, float %2, i64 0 425 ret <4 x float> %3 426 } 427 428 define double @stack_fold_cvtss2sd(float %a0) minsize { 429 ;CHECK-LABEL: stack_fold_cvtss2sd 430 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 431 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 432 %2 = fpext float %a0 to double 433 ret double %2 434 } 435 436 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize { 437 ;CHECK-LABEL: stack_fold_cvtss2sd_int 438 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 439 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 440 %2 = extractelement <4 x float> %a0, i64 0 441 %3 = fpext float %2 to double 442 %4 = insertelement <2 x double> zeroinitializer, double %3, i64 0 443 ret <2 x double> %4 444 } 445 446 ; TODO stack_fold_cvtss2si 447 448 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 449 ;CHECK-LABEL: stack_fold_cvtss2si_int 450 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 452 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 453 ret i32 %2 454 } 455 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 456 457 ; TODO stack_fold_cvtss2si64 458 459 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 460 ;CHECK-LABEL: stack_fold_cvtss2si64_int 461 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 462 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 463 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 464 ret i64 %2 465 } 466 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 467 468 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 469 ;CHECK-LABEL: stack_fold_cvttpd2dq 470 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 471 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 472 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 473 ret <4 x i32> %2 474 } 475 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 476 477 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 478 ;CHECK-LABEL: stack_fold_cvttps2dq 479 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 481 %2 = fptosi <4 x float> %a0 to <4 x i32> 482 ret <4 x i32> %2 483 } 484 485 define i32 @stack_fold_cvttsd2si(double %a0) { 486 ;CHECK-LABEL: stack_fold_cvttsd2si 487 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 488 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 489 %2 = fptosi double %a0 to i32 490 ret i32 %2 491 } 492 493 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 494 ;CHECK-LABEL: stack_fold_cvttsd2si_int 495 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 497 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 498 ret i32 %2 499 } 500 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 501 502 define i64 @stack_fold_cvttsd2si64(double %a0) { 503 ;CHECK-LABEL: stack_fold_cvttsd2si64 504 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 505 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 506 %2 = fptosi double %a0 to i64 507 ret i64 %2 508 } 509 510 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 511 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 512 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 513 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 514 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 515 ret i64 %2 516 } 517 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 518 519 define i32 @stack_fold_cvttss2si(float %a0) { 520 ;CHECK-LABEL: stack_fold_cvttss2si 521 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 522 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 523 %2 = fptosi float %a0 to i32 524 ret i32 %2 525 } 526 527 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 528 ;CHECK-LABEL: stack_fold_cvttss2si_int 529 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 530 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 531 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 532 ret i32 %2 533 } 534 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 535 536 define i64 @stack_fold_cvttss2si64(float %a0) { 537 ;CHECK-LABEL: stack_fold_cvttss2si64 538 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 540 %2 = fptosi float %a0 to i64 541 ret i64 %2 542 } 543 544 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 545 ;CHECK-LABEL: stack_fold_cvttss2si64_int 546 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 547 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 548 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 549 ret i64 %2 550 } 551 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 552 553 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 554 ;CHECK-LABEL: stack_fold_divpd 555 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 556 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 557 %2 = fdiv <2 x double> %a0, %a1 558 ret <2 x double> %2 559 } 560 561 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 562 ;CHECK-LABEL: stack_fold_divps 563 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 564 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 565 %2 = fdiv <4 x float> %a0, %a1 566 ret <4 x float> %2 567 } 568 569 define double @stack_fold_divsd(double %a0, double %a1) { 570 ;CHECK-LABEL: stack_fold_divsd 571 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 572 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 573 %2 = fdiv double %a0, %a1 574 ret double %2 575 } 576 577 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 578 ;CHECK-LABEL: stack_fold_divsd_int 579 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 580 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 581 %2 = extractelement <2 x double> %a0, i32 0 582 %3 = extractelement <2 x double> %a1, i32 0 583 %4 = fdiv double %2, %3 584 %5 = insertelement <2 x double> %a0, double %4, i32 0 585 ret <2 x double> %5 586 } 587 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 588 589 define float @stack_fold_divss(float %a0, float %a1) { 590 ;CHECK-LABEL: stack_fold_divss 591 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 593 %2 = fdiv float %a0, %a1 594 ret float %2 595 } 596 597 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 598 ;CHECK-LABEL: stack_fold_divss_int 599 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 600 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 601 %2 = extractelement <4 x float> %a0, i32 0 602 %3 = extractelement <4 x float> %a1, i32 0 603 %4 = fdiv float %2, %3 604 %5 = insertelement <4 x float> %a0, float %4, i32 0 605 ret <4 x float> %5 606 } 607 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 608 609 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 610 ;CHECK-LABEL: stack_fold_dppd 611 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 612 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 613 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 614 ret <2 x double> %2 615 } 616 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 617 618 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 619 ;CHECK-LABEL: stack_fold_dpps 620 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 621 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 622 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 623 ret <4 x float> %2 624 } 625 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 626 627 define i32 @stack_fold_extractps(<4 x float> %a0) { 628 ;CHECK-LABEL: stack_fold_extractps 629 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 630 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 631 ; fadd forces execution domain 632 %1 = fadd <4 x float> %a0, <float 1.0, float 2.0, float 3.0, float 4.0> 633 %2 = extractelement <4 x float> %1, i32 1 634 %3 = bitcast float %2 to i32 635 %4 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 636 ret i32 %3 637 } 638 639 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 640 ;CHECK-LABEL: stack_fold_haddpd 641 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 642 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 643 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 644 ret <2 x double> %2 645 } 646 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 647 648 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 649 ;CHECK-LABEL: stack_fold_haddps 650 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 652 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 653 ret <4 x float> %2 654 } 655 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 656 657 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 658 ;CHECK-LABEL: stack_fold_hsubpd 659 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 660 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 661 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 662 ret <2 x double> %2 663 } 664 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 665 666 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 667 ;CHECK-LABEL: stack_fold_hsubps 668 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 669 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 670 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 671 ret <4 x float> %2 672 } 673 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 674 675 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 676 ;CHECK-LABEL: stack_fold_insertps 677 ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 678 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 679 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 680 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 681 ret <4 x float> %2 682 } 683 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 684 685 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { 686 ;CHECK-LABEL: stack_fold_maxpd 687 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 688 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 689 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 690 ret <2 x double> %2 691 } 692 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 693 694 define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { 695 ;CHECK-LABEL: stack_fold_maxpd_commutable 696 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 697 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 698 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 699 ret <2 x double> %2 700 } 701 702 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 { 703 ;CHECK-LABEL: stack_fold_maxps 704 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 705 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 706 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 707 ret <4 x float> %2 708 } 709 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 710 711 define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { 712 ;CHECK-LABEL: stack_fold_maxps_commutable 713 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 714 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 715 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 716 ret <4 x float> %2 717 } 718 719 define double @stack_fold_maxsd(double %a0, double %a1) #0 { 720 ;CHECK-LABEL: stack_fold_maxsd 721 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 722 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 723 %2 = fcmp ogt double %a0, %a1 724 %3 = select i1 %2, double %a0, double %a1 725 ret double %3 726 } 727 728 define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 { 729 ;CHECK-LABEL: stack_fold_maxsd_commutable 730 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 731 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 732 %2 = fcmp ogt double %a0, %a1 733 %3 = select i1 %2, double %a0, double %a1 734 ret double %3 735 } 736 737 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 { 738 ;CHECK-LABEL: stack_fold_maxsd_int 739 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 740 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 741 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 742 ret <2 x double> %2 743 } 744 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 745 746 define float @stack_fold_maxss(float %a0, float %a1) #0 { 747 ;CHECK-LABEL: stack_fold_maxss 748 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 749 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 750 %2 = fcmp ogt float %a0, %a1 751 %3 = select i1 %2, float %a0, float %a1 752 ret float %3 753 } 754 755 define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 { 756 ;CHECK-LABEL: stack_fold_maxss_commutable 757 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 758 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 759 %2 = fcmp ogt float %a0, %a1 760 %3 = select i1 %2, float %a0, float %a1 761 ret float %3 762 } 763 764 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 { 765 ;CHECK-LABEL: stack_fold_maxss_int 766 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 767 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 768 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 769 ret <4 x float> %2 770 } 771 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 772 773 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 { 774 ;CHECK-LABEL: stack_fold_minpd 775 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 776 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 777 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 778 ret <2 x double> %2 779 } 780 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 781 782 define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { 783 ;CHECK-LABEL: stack_fold_minpd_commutable 784 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 786 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 787 ret <2 x double> %2 788 } 789 790 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 { 791 ;CHECK-LABEL: stack_fold_minps 792 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 793 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 794 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 795 ret <4 x float> %2 796 } 797 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 798 799 define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { 800 ;CHECK-LABEL: stack_fold_minps_commutable 801 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 802 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 803 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 804 ret <4 x float> %2 805 } 806 807 define double @stack_fold_minsd(double %a0, double %a1) #0 { 808 ;CHECK-LABEL: stack_fold_minsd 809 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 811 %2 = fcmp olt double %a0, %a1 812 %3 = select i1 %2, double %a0, double %a1 813 ret double %3 814 } 815 816 define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 { 817 ;CHECK-LABEL: stack_fold_minsd_commutable 818 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 819 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 820 %2 = fcmp olt double %a0, %a1 821 %3 = select i1 %2, double %a0, double %a1 822 ret double %3 823 } 824 825 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0 { 826 ;CHECK-LABEL: stack_fold_minsd_int 827 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 828 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 829 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 830 ret <2 x double> %2 831 } 832 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 833 834 define float @stack_fold_minss(float %a0, float %a1) #0 { 835 ;CHECK-LABEL: stack_fold_minss 836 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 837 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 838 %2 = fcmp olt float %a0, %a1 839 %3 = select i1 %2, float %a0, float %a1 840 ret float %3 841 } 842 843 define float @stack_fold_minss_commutable(float %a0, float %a1) #1 { 844 ;CHECK-LABEL: stack_fold_minss_commutable 845 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 846 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 847 %2 = fcmp olt float %a0, %a1 848 %3 = select i1 %2, float %a0, float %a1 849 ret float %3 850 } 851 852 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 { 853 ;CHECK-LABEL: stack_fold_minss_int 854 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 855 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 856 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 857 ret <4 x float> %2 858 } 859 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 860 861 define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 862 ;CHECK-LABEL: stack_fold_movddup 863 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 864 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 865 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 866 ret <2 x double> %2 867 } 868 ; TODO stack_fold_movhpd (load / store) 869 ; TODO stack_fold_movhps (load / store) 870 871 ; TODO stack_fold_movlpd (load / store) 872 ; TODO stack_fold_movlps (load / store) 873 874 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 875 ;CHECK-LABEL: stack_fold_movshdup 876 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 877 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 878 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 879 ret <4 x float> %2 880 } 881 882 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 883 ;CHECK-LABEL: stack_fold_movsldup 884 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 885 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 886 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 887 ret <4 x float> %2 888 } 889 890 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 891 ;CHECK-LABEL: stack_fold_mulpd 892 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 893 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 894 %2 = fmul <2 x double> %a0, %a1 895 ret <2 x double> %2 896 } 897 898 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 899 ;CHECK-LABEL: stack_fold_mulps 900 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 901 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 902 %2 = fmul <4 x float> %a0, %a1 903 ret <4 x float> %2 904 } 905 906 define double @stack_fold_mulsd(double %a0, double %a1) { 907 ;CHECK-LABEL: stack_fold_mulsd 908 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 909 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 910 %2 = fmul double %a0, %a1 911 ret double %2 912 } 913 914 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 915 ;CHECK-LABEL: stack_fold_mulsd_int 916 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 917 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 918 %2 = extractelement <2 x double> %a0, i32 0 919 %3 = extractelement <2 x double> %a1, i32 0 920 %4 = fmul double %2, %3 921 %5 = insertelement <2 x double> %a0, double %4, i32 0 922 ret <2 x double> %5 923 } 924 925 define float @stack_fold_mulss(float %a0, float %a1) { 926 ;CHECK-LABEL: stack_fold_mulss 927 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 928 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 929 %2 = fmul float %a0, %a1 930 ret float %2 931 } 932 933 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 934 ;CHECK-LABEL: stack_fold_mulss_int 935 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 936 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 937 %2 = extractelement <4 x float> %a0, i32 0 938 %3 = extractelement <4 x float> %a1, i32 0 939 %4 = fmul float %2, %3 940 %5 = insertelement <4 x float> %a0, float %4, i32 0 941 ret <4 x float> %5 942 } 943 944 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 945 ;CHECK-LABEL: stack_fold_orpd 946 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 947 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 948 %2 = bitcast <2 x double> %a0 to <2 x i64> 949 %3 = bitcast <2 x double> %a1 to <2 x i64> 950 %4 = or <2 x i64> %2, %3 951 %5 = bitcast <2 x i64> %4 to <2 x double> 952 ; fadd forces execution domain 953 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 954 ret <2 x double> %6 955 } 956 957 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 958 ;CHECK-LABEL: stack_fold_orps 959 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 960 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 961 %2 = bitcast <4 x float> %a0 to <2 x i64> 962 %3 = bitcast <4 x float> %a1 to <2 x i64> 963 %4 = or <2 x i64> %2, %3 964 %5 = bitcast <2 x i64> %4 to <4 x float> 965 ; fadd forces execution domain 966 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 967 ret <4 x float> %6 968 } 969 970 ; TODO stack_fold_rcpps 971 972 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 973 ;CHECK-LABEL: stack_fold_rcpps_int 974 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 975 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 976 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 977 ret <4 x float> %2 978 } 979 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 980 981 ; TODO stack_fold_rcpss 982 983 define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0, <4 x float> %a1) optsize { 984 ;CHECK-LABEL: stack_fold_rcpss_int 985 ;CHECK: rcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 986 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 987 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a1) 988 %3 = extractelement <4 x float> %2, i32 0 989 %4 = insertelement <4 x float> %a0, float %3, i32 0 990 ret <4 x float> %4 991 } 992 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) 993 994 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 995 ;CHECK-LABEL: stack_fold_roundpd 996 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 997 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 998 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 999 ret <2 x double> %2 1000 } 1001 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1002 1003 define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 1004 ;CHECK-LABEL: stack_fold_roundps 1005 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1006 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1007 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 1008 ret <4 x float> %2 1009 } 1010 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1011 1012 define double @stack_fold_roundsd(double %a0) optsize { 1013 ;CHECK-LABEL: stack_fold_roundsd 1014 ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1015 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1016 %2 = call double @llvm.floor.f64(double %a0) 1017 ret double %2 1018 } 1019 declare double @llvm.floor.f64(double) nounwind readnone 1020 1021 define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize { 1022 ;CHECK-LABEL: stack_fold_roundsd_int 1023 ;CHECK: roundsd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1024 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1025 %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) 1026 ret <2 x double> %2 1027 } 1028 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1029 1030 define float @stack_fold_roundss(float %a0) minsize { 1031 ;CHECK-LABEL: stack_fold_roundss 1032 ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1033 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1034 %2 = call float @llvm.floor.f32(float %a0) 1035 ret float %2 1036 } 1037 declare float @llvm.floor.f32(float) nounwind readnone 1038 1039 define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize { 1040 ;CHECK-LABEL: stack_fold_roundss_int 1041 ;CHECK: roundss $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1042 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1043 %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) 1044 ret <4 x float> %2 1045 } 1046 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1047 1048 ; TODO stack_fold_rsqrtps 1049 1050 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 1051 ;CHECK-LABEL: stack_fold_rsqrtps_int 1052 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1054 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1055 ret <4 x float> %2 1056 } 1057 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1058 1059 ; TODO stack_fold_rsqrtss 1060 1061 define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize { 1062 ;CHECK-LABEL: stack_fold_rsqrtss_int 1063 ;CHECK: rsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1064 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1065 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a1) 1066 %3 = extractelement <4 x float> %2, i32 0 1067 %4 = insertelement <4 x float> %a0, float %3, i32 0 1068 ret <4 x float> %4 1069 } 1070 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) 1071 1072 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 1073 ;CHECK-LABEL: stack_fold_shufpd 1074 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1075 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1076 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 1077 ; fadd forces execution domain 1078 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1079 ret <2 x double> %3 1080 } 1081 1082 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 1083 ;CHECK-LABEL: stack_fold_shufps 1084 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1085 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1086 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 1087 ret <4 x float> %2 1088 } 1089 1090 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 1091 ;CHECK-LABEL: stack_fold_sqrtpd 1092 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1093 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1094 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 1095 ret <2 x double> %2 1096 } 1097 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 1098 1099 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 1100 ;CHECK-LABEL: stack_fold_sqrtps 1101 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1102 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1103 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 1104 ret <4 x float> %2 1105 } 1106 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1107 1108 define double @stack_fold_sqrtsd(double %a0) optsize { 1109 ;CHECK-LABEL: stack_fold_sqrtsd 1110 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1111 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1112 %2 = call double @llvm.sqrt.f64(double %a0) 1113 ret double %2 1114 } 1115 declare double @llvm.sqrt.f64(double) nounwind readnone 1116 1117 define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0, <2 x double> %a1) optsize { 1118 ;CHECK-LABEL: stack_fold_sqrtsd_int 1119 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1120 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1121 %2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) 1122 %3 = extractelement <2 x double> %2, i32 0 1123 %4 = insertelement <2 x double> %a0, double %3, i32 0 1124 ret <2 x double> %4 1125 } 1126 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 1127 1128 define float @stack_fold_sqrtss(float %a0) minsize { 1129 ;CHECK-LABEL: stack_fold_sqrtss 1130 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1131 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1132 %2 = call float @llvm.sqrt.f32(float %a0) 1133 ret float %2 1134 } 1135 declare float @llvm.sqrt.f32(float) nounwind readnone 1136 1137 define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize { 1138 ;CHECK-LABEL: stack_fold_sqrtss_int 1139 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1140 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1141 %2 = extractelement <4 x float> %a1, i64 0 1142 %3 = call float @llvm.sqrt.f32(float %2) 1143 %4 = insertelement <4 x float> %a1, float %3, i64 0 1144 %5 = extractelement <4 x float> %4, i32 0 1145 %6 = insertelement <4 x float> %a0, float %5, i32 0 1146 ret <4 x float> %6 1147 } 1148 1149 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 1150 ;CHECK-LABEL: stack_fold_subpd 1151 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1153 %2 = fsub <2 x double> %a0, %a1 1154 ret <2 x double> %2 1155 } 1156 1157 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1158 ;CHECK-LABEL: stack_fold_subps 1159 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1160 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1161 %2 = fsub <4 x float> %a0, %a1 1162 ret <4 x float> %2 1163 } 1164 1165 define double @stack_fold_subsd(double %a0, double %a1) { 1166 ;CHECK-LABEL: stack_fold_subsd 1167 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1168 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1169 %2 = fsub double %a0, %a1 1170 ret double %2 1171 } 1172 1173 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1174 ;CHECK-LABEL: stack_fold_subsd_int 1175 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1176 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1177 %2 = extractelement <2 x double> %a0, i32 0 1178 %3 = extractelement <2 x double> %a1, i32 0 1179 %4 = fsub double %2, %3 1180 %5 = insertelement <2 x double> %a0, double %4, i32 0 1181 ret <2 x double> %5 1182 } 1183 1184 define float @stack_fold_subss(float %a0, float %a1) { 1185 ;CHECK-LABEL: stack_fold_subss 1186 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1187 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1188 %2 = fsub float %a0, %a1 1189 ret float %2 1190 } 1191 1192 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1193 ;CHECK-LABEL: stack_fold_subss_int 1194 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1195 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1196 %2 = extractelement <4 x float> %a0, i32 0 1197 %3 = extractelement <4 x float> %a1, i32 0 1198 %4 = fsub float %2, %3 1199 %5 = insertelement <4 x float> %a0, float %4, i32 0 1200 ret <4 x float> %5 1201 } 1202 1203 define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1204 ;CHECK-LABEL: stack_fold_ucomisd 1205 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1207 %2 = fcmp ueq double %a0, %a1 1208 %3 = select i1 %2, i32 1, i32 -1 1209 ret i32 %3 1210 } 1211 1212 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1213 ;CHECK-LABEL: stack_fold_ucomisd_int 1214 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1216 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1217 ret i32 %2 1218 } 1219 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1220 1221 define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1222 ;CHECK-LABEL: stack_fold_ucomiss 1223 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1224 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1225 %2 = fcmp ueq float %a0, %a1 1226 %3 = select i1 %2, i32 1, i32 -1 1227 ret i32 %3 1228 } 1229 1230 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1231 ;CHECK-LABEL: stack_fold_ucomiss_int 1232 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1233 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1234 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1235 ret i32 %2 1236 } 1237 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1238 1239 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1240 ;CHECK-LABEL: stack_fold_unpckhpd 1241 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1242 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1243 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1244 ; fadd forces execution domain 1245 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1246 ret <2 x double> %3 1247 } 1248 1249 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1250 ;CHECK-LABEL: stack_fold_unpckhps 1251 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1253 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1254 ; fadd forces execution domain 1255 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1256 ret <4 x float> %3 1257 } 1258 1259 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1260 ;CHECK-LABEL: stack_fold_unpcklpd 1261 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1262 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1263 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1264 ; fadd forces execution domain 1265 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1266 ret <2 x double> %3 1267 } 1268 1269 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1270 ;CHECK-LABEL: stack_fold_unpcklps 1271 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1273 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1274 ; fadd forces execution domain 1275 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1276 ret <4 x float> %3 1277 } 1278 1279 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1280 ;CHECK-LABEL: stack_fold_xorpd 1281 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1282 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1283 %2 = bitcast <2 x double> %a0 to <2 x i64> 1284 %3 = bitcast <2 x double> %a1 to <2 x i64> 1285 %4 = xor <2 x i64> %2, %3 1286 %5 = bitcast <2 x i64> %4 to <2 x double> 1287 ; fadd forces execution domain 1288 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1289 ret <2 x double> %6 1290 } 1291 1292 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1293 ;CHECK-LABEL: stack_fold_xorps 1294 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1295 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1296 %2 = bitcast <4 x float> %a0 to <2 x i64> 1297 %3 = bitcast <4 x float> %a1 to <2 x i64> 1298 %4 = xor <2 x i64> %2, %3 1299 %5 = bitcast <2 x i64> %4 to <4 x float> 1300 ; fadd forces execution domain 1301 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1302 ret <4 x float> %6 1303 } 1304 1305 attributes #0 = { "unsafe-fp-math"="false" } 1306 attributes #1 = { "unsafe-fp-math"="true" } 1307