1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17 } 18 19 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 20 ;CHECK-LABEL: stack_fold_addps 21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x float> %a0, %a1 24 ret <4 x float> %2 25 } 26 27 define double @stack_fold_addsd(double %a0, double %a1) { 28 ;CHECK-LABEL: stack_fold_addsd 29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd double %a0, %a1 32 ret double %2 33 } 34 35 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 36 ;CHECK-LABEL: stack_fold_addsd_int 37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 40 ret <2 x double> %2 41 } 42 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 43 44 define float @stack_fold_addss(float %a0, float %a1) { 45 ;CHECK-LABEL: stack_fold_addss 46 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 47 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 48 %2 = fadd float %a0, %a1 49 ret float %2 50 } 51 52 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 53 ;CHECK-LABEL: stack_fold_addss_int 54 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 55 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 56 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 57 ret <4 x float> %2 58 } 59 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 60 61 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 62 ;CHECK-LABEL: stack_fold_addsubpd 63 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 64 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 65 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 66 ret <2 x double> %2 67 } 68 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 69 70 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 71 ;CHECK-LABEL: stack_fold_addsubps 72 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 73 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 74 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 75 ret <4 x float> %2 76 } 77 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 78 79 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 80 ;CHECK-LABEL: stack_fold_andnpd 81 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 82 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 83 %2 = bitcast <2 x double> %a0 to <2 x i64> 84 %3 = bitcast <2 x double> %a1 to <2 x i64> 85 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 86 %5 = and <2 x i64> %4, %3 87 %6 = bitcast <2 x i64> %5 to <2 x double> 88 ; fadd forces execution domain 89 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 90 ret <2 x double> %7 91 } 92 93 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 94 ;CHECK-LABEL: stack_fold_andnps 95 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 96 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 97 %2 = bitcast <4 x float> %a0 to <2 x i64> 98 %3 = bitcast <4 x float> %a1 to <2 x i64> 99 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 100 %5 = and <2 x i64> %4, %3 101 %6 = bitcast <2 x i64> %5 to <4 x float> 102 ; fadd forces execution domain 103 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 104 ret <4 x float> %7 105 } 106 107 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 108 ;CHECK-LABEL: stack_fold_andpd 109 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 111 %2 = bitcast <2 x double> %a0 to <2 x i64> 112 %3 = bitcast <2 x double> %a1 to <2 x i64> 113 %4 = and <2 x i64> %2, %3 114 %5 = bitcast <2 x i64> %4 to <2 x double> 115 ; fadd forces execution domain 116 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 117 ret <2 x double> %6 118 } 119 120 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 121 ;CHECK-LABEL: stack_fold_andps 122 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 123 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 124 %2 = bitcast <4 x float> %a0 to <2 x i64> 125 %3 = bitcast <4 x float> %a1 to <2 x i64> 126 %4 = and <2 x i64> %2, %3 127 %5 = bitcast <2 x i64> %4 to <4 x float> 128 ; fadd forces execution domain 129 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 130 ret <4 x float> %6 131 } 132 133 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 134 ;CHECK-LABEL: stack_fold_blendpd 135 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 137 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 138 ret <2 x double> %2 139 } 140 141 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_blendps 143 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 146 ret <4 x float> %2 147 } 148 149 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 150 ;CHECK-LABEL: stack_fold_blendvpd 151 ;CHECK: blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 153 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 154 ret <2 x double> %2 155 } 156 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 157 158 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 159 ;CHECK-LABEL: stack_fold_blendvps 160 ;CHECK: blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 162 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 163 ret <4 x float> %2 164 } 165 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 166 167 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 168 ;CHECK-LABEL: stack_fold_cmppd 169 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 170 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 171 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 172 ret <2 x double> %2 173 } 174 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 175 176 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 177 ;CHECK-LABEL: stack_fold_cmpps 178 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 180 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 181 ret <4 x float> %2 182 } 183 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 184 185 define i32 @stack_fold_cmpsd(double %a0, double %a1) { 186 ;CHECK-LABEL: stack_fold_cmpsd 187 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 189 %2 = fcmp oeq double %a0, %a1 190 %3 = zext i1 %2 to i32 191 ret i32 %3 192 } 193 194 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 195 ;CHECK-LABEL: stack_fold_cmpsd_int 196 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 197 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 198 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 199 ret <2 x double> %2 200 } 201 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 202 203 define i32 @stack_fold_cmpss(float %a0, float %a1) { 204 ;CHECK-LABEL: stack_fold_cmpss 205 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 207 %2 = fcmp oeq float %a0, %a1 208 %3 = zext i1 %2 to i32 209 ret i32 %3 210 } 211 212 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 213 ;CHECK-LABEL: stack_fold_cmpss_int 214 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 216 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 217 ret <4 x float> %2 218 } 219 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 220 221 ; TODO stack_fold_comisd 222 223 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 224 ;CHECK-LABEL: stack_fold_comisd_int 225 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 227 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 228 ret i32 %2 229 } 230 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 231 232 ; TODO stack_fold_comiss 233 234 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 235 ;CHECK-LABEL: stack_fold_comiss_int 236 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 237 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 238 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 239 ret i32 %2 240 } 241 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 242 243 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 244 ;CHECK-LABEL: stack_fold_cvtdq2pd 245 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 247 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 248 %3 = sitofp <2 x i32> %2 to <2 x double> 249 ret <2 x double> %3 250 } 251 252 define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) { 253 ;CHECK-LABEL: stack_fold_cvtdq2pd_int 254 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 255 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 256 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 257 ret <2 x double> %2 258 } 259 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 260 261 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 262 ;CHECK-LABEL: stack_fold_cvtdq2ps 263 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 264 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 265 %2 = sitofp <4 x i32> %a0 to <4 x float> 266 ret <4 x float> %2 267 } 268 269 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 270 ;CHECK-LABEL: stack_fold_cvtpd2dq 271 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 273 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 274 ret <4 x i32> %2 275 } 276 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 277 278 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 279 ;CHECK-LABEL: stack_fold_cvtpd2ps 280 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 281 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 282 %2 = fptrunc <2 x double> %a0 to <2 x float> 283 ret <2 x float> %2 284 } 285 286 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 287 ;CHECK-LABEL: stack_fold_cvtps2dq 288 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 290 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 291 ret <4 x i32> %2 292 } 293 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 294 295 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 296 ;CHECK-LABEL: stack_fold_cvtps2pd 297 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 298 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 299 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> 300 %3 = fpext <2 x float> %2 to <2 x double> 301 ret <2 x double> %3 302 } 303 304 define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) { 305 ;CHECK-LABEL: stack_fold_cvtps2pd_int 306 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 307 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 308 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 309 ret <2 x double> %2 310 } 311 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 312 313 ; TODO stack_fold_cvtsd2si 314 315 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 316 ;CHECK-LABEL: stack_fold_cvtsd2si_int 317 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 318 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 319 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 320 ret i32 %2 321 } 322 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 323 324 ; TODO stack_fold_cvtsd2si64 325 326 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 327 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 328 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 329 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 330 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 331 ret i64 %2 332 } 333 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 334 335 define float @stack_fold_cvtsd2ss(double %a0) minsize { 336 ;CHECK-LABEL: stack_fold_cvtsd2ss 337 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 338 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 339 %2 = fptrunc double %a0 to float 340 ret float %2 341 } 342 343 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { 344 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 345 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 347 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 348 ret <4 x float> %2 349 } 350 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 351 352 define double @stack_fold_cvtsi2sd(i32 %a0) minsize { 353 ;CHECK-LABEL: stack_fold_cvtsi2sd 354 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 355 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 356 %2 = sitofp i32 %a0 to double 357 ret double %2 358 } 359 360 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 361 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 362 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 363 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 364 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 365 ret <2 x double> %2 366 } 367 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 368 369 define double @stack_fold_cvtsi642sd(i64 %a0) optsize { 370 ;CHECK-LABEL: stack_fold_cvtsi642sd 371 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 372 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 373 %2 = sitofp i64 %a0 to double 374 ret double %2 375 } 376 377 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 378 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 379 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 380 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 381 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 382 ret <2 x double> %2 383 } 384 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 385 386 define float @stack_fold_cvtsi2ss(i32 %a0) minsize { 387 ;CHECK-LABEL: stack_fold_cvtsi2ss 388 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 389 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 390 %2 = sitofp i32 %a0 to float 391 ret float %2 392 } 393 394 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 395 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 396 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 397 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 398 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 399 ret <4 x float> %2 400 } 401 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 402 403 define float @stack_fold_cvtsi642ss(i64 %a0) optsize { 404 ;CHECK-LABEL: stack_fold_cvtsi642ss 405 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 406 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 407 %2 = sitofp i64 %a0 to float 408 ret float %2 409 } 410 411 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 412 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 413 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 414 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 415 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 416 ret <4 x float> %2 417 } 418 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 419 420 define double @stack_fold_cvtss2sd(float %a0) minsize { 421 ;CHECK-LABEL: stack_fold_cvtss2sd 422 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 424 %2 = fpext float %a0 to double 425 ret double %2 426 } 427 428 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize { 429 ;CHECK-LABEL: stack_fold_cvtss2sd_int 430 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 431 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 432 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 433 ret <2 x double> %2 434 } 435 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 436 437 ; TODO stack_fold_cvtss2si 438 439 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 440 ;CHECK-LABEL: stack_fold_cvtss2si_int 441 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 442 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 443 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 444 ret i32 %2 445 } 446 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 447 448 ; TODO stack_fold_cvtss2si64 449 450 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 451 ;CHECK-LABEL: stack_fold_cvtss2si64_int 452 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 453 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 454 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 455 ret i64 %2 456 } 457 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 458 459 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 460 ;CHECK-LABEL: stack_fold_cvttpd2dq 461 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 462 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 463 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 464 ret <4 x i32> %2 465 } 466 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 467 468 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 469 ;CHECK-LABEL: stack_fold_cvttps2dq 470 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 471 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 472 %2 = fptosi <4 x float> %a0 to <4 x i32> 473 ret <4 x i32> %2 474 } 475 476 define i32 @stack_fold_cvttsd2si(double %a0) { 477 ;CHECK-LABEL: stack_fold_cvttsd2si 478 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 479 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 480 %2 = fptosi double %a0 to i32 481 ret i32 %2 482 } 483 484 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 485 ;CHECK-LABEL: stack_fold_cvttsd2si_int 486 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 487 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 488 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 489 ret i32 %2 490 } 491 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 492 493 define i64 @stack_fold_cvttsd2si64(double %a0) { 494 ;CHECK-LABEL: stack_fold_cvttsd2si64 495 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 497 %2 = fptosi double %a0 to i64 498 ret i64 %2 499 } 500 501 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 502 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 503 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 504 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 505 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 506 ret i64 %2 507 } 508 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 509 510 define i32 @stack_fold_cvttss2si(float %a0) { 511 ;CHECK-LABEL: stack_fold_cvttss2si 512 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 513 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 514 %2 = fptosi float %a0 to i32 515 ret i32 %2 516 } 517 518 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 519 ;CHECK-LABEL: stack_fold_cvttss2si_int 520 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 521 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 522 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 523 ret i32 %2 524 } 525 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 526 527 define i64 @stack_fold_cvttss2si64(float %a0) { 528 ;CHECK-LABEL: stack_fold_cvttss2si64 529 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 530 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 531 %2 = fptosi float %a0 to i64 532 ret i64 %2 533 } 534 535 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 536 ;CHECK-LABEL: stack_fold_cvttss2si64_int 537 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 538 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 539 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 540 ret i64 %2 541 } 542 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 543 544 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 545 ;CHECK-LABEL: stack_fold_divpd 546 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 547 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 548 %2 = fdiv <2 x double> %a0, %a1 549 ret <2 x double> %2 550 } 551 552 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 553 ;CHECK-LABEL: stack_fold_divps 554 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 555 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 556 %2 = fdiv <4 x float> %a0, %a1 557 ret <4 x float> %2 558 } 559 560 define double @stack_fold_divsd(double %a0, double %a1) { 561 ;CHECK-LABEL: stack_fold_divsd 562 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 563 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 564 %2 = fdiv double %a0, %a1 565 ret double %2 566 } 567 568 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 569 ;CHECK-LABEL: stack_fold_divsd_int 570 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 571 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 572 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 573 ret <2 x double> %2 574 } 575 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 576 577 define float @stack_fold_divss(float %a0, float %a1) { 578 ;CHECK-LABEL: stack_fold_divss 579 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 580 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 581 %2 = fdiv float %a0, %a1 582 ret float %2 583 } 584 585 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 586 ;CHECK-LABEL: stack_fold_divss_int 587 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 589 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 590 ret <4 x float> %2 591 } 592 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 593 594 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 595 ;CHECK-LABEL: stack_fold_dppd 596 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 597 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 598 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 599 ret <2 x double> %2 600 } 601 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 602 603 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 604 ;CHECK-LABEL: stack_fold_dpps 605 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 606 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 607 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 608 ret <4 x float> %2 609 } 610 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 611 612 define i32 @stack_fold_extractps(<4 x float> %a0) { 613 ;CHECK-LABEL: stack_fold_extractps 614 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 615 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 616 %1 = extractelement <4 x float> %a0, i32 1 617 %2 = bitcast float %1 to i32 618 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 619 ret i32 %2 620 } 621 622 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 623 ;CHECK-LABEL: stack_fold_haddpd 624 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 625 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 626 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 627 ret <2 x double> %2 628 } 629 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 630 631 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 632 ;CHECK-LABEL: stack_fold_haddps 633 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 634 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 635 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 636 ret <4 x float> %2 637 } 638 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 639 640 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 641 ;CHECK-LABEL: stack_fold_hsubpd 642 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 643 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 644 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 645 ret <2 x double> %2 646 } 647 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 648 649 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 650 ;CHECK-LABEL: stack_fold_hsubps 651 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 652 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 653 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 654 ret <4 x float> %2 655 } 656 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 657 658 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 659 ;CHECK-LABEL: stack_fold_insertps 660 ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 661 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 662 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 663 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 664 ret <4 x float> %2 665 } 666 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 667 668 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 669 ;CHECK-LABEL: stack_fold_maxpd 670 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 671 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 672 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 673 ret <2 x double> %2 674 } 675 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 676 677 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 678 ;CHECK-LABEL: stack_fold_maxps 679 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 680 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 681 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 682 ret <4 x float> %2 683 } 684 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 685 686 define double @stack_fold_maxsd(double %a0, double %a1) { 687 ;CHECK-LABEL: stack_fold_maxsd 688 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 689 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 690 %2 = fcmp ogt double %a0, %a1 691 %3 = select i1 %2, double %a0, double %a1 692 ret double %3 693 } 694 695 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 696 ;CHECK-LABEL: stack_fold_maxsd_int 697 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 698 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 699 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 700 ret <2 x double> %2 701 } 702 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 703 704 define float @stack_fold_maxss(float %a0, float %a1) { 705 ;CHECK-LABEL: stack_fold_maxss 706 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 707 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 708 %2 = fcmp ogt float %a0, %a1 709 %3 = select i1 %2, float %a0, float %a1 710 ret float %3 711 } 712 713 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 714 ;CHECK-LABEL: stack_fold_maxss_int 715 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 716 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 717 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 718 ret <4 x float> %2 719 } 720 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 721 722 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 723 ;CHECK-LABEL: stack_fold_minpd 724 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 725 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 726 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 727 ret <2 x double> %2 728 } 729 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 730 731 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 732 ;CHECK-LABEL: stack_fold_minps 733 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 734 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 735 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 736 ret <4 x float> %2 737 } 738 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 739 740 define double @stack_fold_minsd(double %a0, double %a1) { 741 ;CHECK-LABEL: stack_fold_minsd 742 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 743 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 744 %2 = fcmp olt double %a0, %a1 745 %3 = select i1 %2, double %a0, double %a1 746 ret double %3 747 } 748 749 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 750 ;CHECK-LABEL: stack_fold_minsd_int 751 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 753 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 754 ret <2 x double> %2 755 } 756 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 757 758 define float @stack_fold_minss(float %a0, float %a1) { 759 ;CHECK-LABEL: stack_fold_minss 760 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 762 %2 = fcmp olt float %a0, %a1 763 %3 = select i1 %2, float %a0, float %a1 764 ret float %3 765 } 766 767 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 768 ;CHECK-LABEL: stack_fold_minss_int 769 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 770 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 771 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 772 ret <4 x float> %2 773 } 774 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 775 776 define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 777 ;CHECK-LABEL: stack_fold_movddup 778 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 779 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 780 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 781 ret <2 x double> %2 782 } 783 ; TODO stack_fold_movhpd (load / store) 784 ; TODO stack_fold_movhps (load / store) 785 786 ; TODO stack_fold_movlpd (load / store) 787 ; TODO stack_fold_movlps (load / store) 788 789 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 790 ;CHECK-LABEL: stack_fold_movshdup 791 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 792 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 793 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 794 ret <4 x float> %2 795 } 796 797 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 798 ;CHECK-LABEL: stack_fold_movsldup 799 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 800 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 801 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 802 ret <4 x float> %2 803 } 804 805 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 806 ;CHECK-LABEL: stack_fold_mulpd 807 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 808 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 809 %2 = fmul <2 x double> %a0, %a1 810 ret <2 x double> %2 811 } 812 813 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 814 ;CHECK-LABEL: stack_fold_mulps 815 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 816 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 817 %2 = fmul <4 x float> %a0, %a1 818 ret <4 x float> %2 819 } 820 821 define double @stack_fold_mulsd(double %a0, double %a1) { 822 ;CHECK-LABEL: stack_fold_mulsd 823 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 824 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 825 %2 = fmul double %a0, %a1 826 ret double %2 827 } 828 829 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 830 ;CHECK-LABEL: stack_fold_mulsd_int 831 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 832 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 833 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 834 ret <2 x double> %2 835 } 836 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 837 838 define float @stack_fold_mulss(float %a0, float %a1) { 839 ;CHECK-LABEL: stack_fold_mulss 840 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 842 %2 = fmul float %a0, %a1 843 ret float %2 844 } 845 846 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 847 ;CHECK-LABEL: stack_fold_mulss_int 848 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 849 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 850 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 851 ret <4 x float> %2 852 } 853 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 854 855 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 856 ;CHECK-LABEL: stack_fold_orpd 857 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 858 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 859 %2 = bitcast <2 x double> %a0 to <2 x i64> 860 %3 = bitcast <2 x double> %a1 to <2 x i64> 861 %4 = or <2 x i64> %2, %3 862 %5 = bitcast <2 x i64> %4 to <2 x double> 863 ; fadd forces execution domain 864 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 865 ret <2 x double> %6 866 } 867 868 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 869 ;CHECK-LABEL: stack_fold_orps 870 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 871 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 872 %2 = bitcast <4 x float> %a0 to <2 x i64> 873 %3 = bitcast <4 x float> %a1 to <2 x i64> 874 %4 = or <2 x i64> %2, %3 875 %5 = bitcast <2 x i64> %4 to <4 x float> 876 ; fadd forces execution domain 877 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 878 ret <4 x float> %6 879 } 880 881 ; TODO stack_fold_rcpps 882 883 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 884 ;CHECK-LABEL: stack_fold_rcpps_int 885 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 886 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 887 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 888 ret <4 x float> %2 889 } 890 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 891 892 ; TODO stack_fold_rcpss 893 ; TODO stack_fold_rcpss_int 894 895 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 896 ;CHECK-LABEL: stack_fold_roundpd 897 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 898 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 899 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 900 ret <2 x double> %2 901 } 902 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 903 904 define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 905 ;CHECK-LABEL: stack_fold_roundps 906 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 907 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 908 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 909 ret <4 x float> %2 910 } 911 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 912 913 define double @stack_fold_roundsd(double %a0) optsize { 914 ;CHECK-LABEL: stack_fold_roundsd 915 ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 916 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 917 %2 = call double @llvm.floor.f64(double %a0) 918 ret double %2 919 } 920 declare double @llvm.floor.f64(double) nounwind readnone 921 922 ; TODO stack_fold_roundsd_int 923 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 924 925 define float @stack_fold_roundss(float %a0) minsize { 926 ;CHECK-LABEL: stack_fold_roundss 927 ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 928 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 929 %2 = call float @llvm.floor.f32(float %a0) 930 ret float %2 931 } 932 declare float @llvm.floor.f32(float) nounwind readnone 933 934 ; TODO stack_fold_roundss_int 935 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 936 937 ; TODO stack_fold_rsqrtps 938 939 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 940 ;CHECK-LABEL: stack_fold_rsqrtps_int 941 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 942 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 943 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 944 ret <4 x float> %2 945 } 946 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 947 948 ; TODO stack_fold_rsqrtss 949 ; TODO stack_fold_rsqrtss_int 950 951 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 952 ;CHECK-LABEL: stack_fold_shufpd 953 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 954 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 955 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 956 ret <2 x double> %2 957 } 958 959 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 960 ;CHECK-LABEL: stack_fold_shufps 961 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 963 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 964 ret <4 x float> %2 965 } 966 967 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 968 ;CHECK-LABEL: stack_fold_sqrtpd 969 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 970 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 971 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 972 ret <2 x double> %2 973 } 974 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 975 976 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 977 ;CHECK-LABEL: stack_fold_sqrtps 978 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 979 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 980 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 981 ret <4 x float> %2 982 } 983 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 984 985 define double @stack_fold_sqrtsd(double %a0) optsize { 986 ;CHECK-LABEL: stack_fold_sqrtsd 987 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 988 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 989 %2 = call double @llvm.sqrt.f64(double %a0) 990 ret double %2 991 } 992 declare double @llvm.sqrt.f64(double) nounwind readnone 993 994 ; TODO stack_fold_sqrtsd_int 995 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 996 997 define float @stack_fold_sqrtss(float %a0) minsize { 998 ;CHECK-LABEL: stack_fold_sqrtss 999 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1000 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1001 %2 = call float @llvm.sqrt.f32(float %a0) 1002 ret float %2 1003 } 1004 declare float @llvm.sqrt.f32(float) nounwind readnone 1005 1006 ; TODO stack_fold_sqrtss_int 1007 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1008 1009 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 1010 ;CHECK-LABEL: stack_fold_subpd 1011 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1012 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1013 %2 = fsub <2 x double> %a0, %a1 1014 ret <2 x double> %2 1015 } 1016 1017 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1018 ;CHECK-LABEL: stack_fold_subps 1019 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1020 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1021 %2 = fsub <4 x float> %a0, %a1 1022 ret <4 x float> %2 1023 } 1024 1025 define double @stack_fold_subsd(double %a0, double %a1) { 1026 ;CHECK-LABEL: stack_fold_subsd 1027 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1028 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1029 %2 = fsub double %a0, %a1 1030 ret double %2 1031 } 1032 1033 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1034 ;CHECK-LABEL: stack_fold_subsd_int 1035 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1036 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1037 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 1038 ret <2 x double> %2 1039 } 1040 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1041 1042 define float @stack_fold_subss(float %a0, float %a1) { 1043 ;CHECK-LABEL: stack_fold_subss 1044 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1045 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1046 %2 = fsub float %a0, %a1 1047 ret float %2 1048 } 1049 1050 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1051 ;CHECK-LABEL: stack_fold_subss_int 1052 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1054 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 1055 ret <4 x float> %2 1056 } 1057 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1058 1059 define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1060 ;CHECK-LABEL: stack_fold_ucomisd 1061 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1062 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1063 %2 = fcmp ueq double %a0, %a1 1064 %3 = select i1 %2, i32 1, i32 -1 1065 ret i32 %3 1066 } 1067 1068 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1069 ;CHECK-LABEL: stack_fold_ucomisd_int 1070 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1071 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1072 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1073 ret i32 %2 1074 } 1075 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1076 1077 define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1078 ;CHECK-LABEL: stack_fold_ucomiss 1079 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1080 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1081 %2 = fcmp ueq float %a0, %a1 1082 %3 = select i1 %2, i32 1, i32 -1 1083 ret i32 %3 1084 } 1085 1086 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1087 ;CHECK-LABEL: stack_fold_ucomiss_int 1088 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1089 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1090 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1091 ret i32 %2 1092 } 1093 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1094 1095 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1096 ;CHECK-LABEL: stack_fold_unpckhpd 1097 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1098 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1099 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1100 ; fadd forces execution domain 1101 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1102 ret <2 x double> %3 1103 } 1104 1105 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1106 ;CHECK-LABEL: stack_fold_unpckhps 1107 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1108 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1109 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1110 ; fadd forces execution domain 1111 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1112 ret <4 x float> %3 1113 } 1114 1115 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1116 ;CHECK-LABEL: stack_fold_unpcklpd 1117 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1118 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1119 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1120 ; fadd forces execution domain 1121 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1122 ret <2 x double> %3 1123 } 1124 1125 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1126 ;CHECK-LABEL: stack_fold_unpcklps 1127 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1128 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1129 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1130 ; fadd forces execution domain 1131 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1132 ret <4 x float> %3 1133 } 1134 1135 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1136 ;CHECK-LABEL: stack_fold_xorpd 1137 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1138 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1139 %2 = bitcast <2 x double> %a0 to <2 x i64> 1140 %3 = bitcast <2 x double> %a1 to <2 x i64> 1141 %4 = xor <2 x i64> %2, %3 1142 %5 = bitcast <2 x i64> %4 to <2 x double> 1143 ; fadd forces execution domain 1144 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1145 ret <2 x double> %6 1146 } 1147 1148 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1149 ;CHECK-LABEL: stack_fold_xorps 1150 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1151 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1152 %2 = bitcast <4 x float> %a0 to <2 x i64> 1153 %3 = bitcast <4 x float> %a1 to <2 x i64> 1154 %4 = xor <2 x i64> %2, %3 1155 %5 = bitcast <2 x i64> %4 to <4 x float> 1156 ; fadd forces execution domain 1157 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1158 ret <4 x float> %6 1159 } 1160