1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17 } 18 19 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 20 ;CHECK-LABEL: stack_fold_addps 21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x float> %a0, %a1 24 ret <4 x float> %2 25 } 26 27 define double @stack_fold_addsd(double %a0, double %a1) { 28 ;CHECK-LABEL: stack_fold_addsd 29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd double %a0, %a1 32 ret double %2 33 } 34 35 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 36 ;CHECK-LABEL: stack_fold_addsd_int 37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 40 ret <2 x double> %2 41 } 42 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 43 44 define float @stack_fold_addss(float %a0, float %a1) { 45 ;CHECK-LABEL: stack_fold_addss 46 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 47 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 48 %2 = fadd float %a0, %a1 49 ret float %2 50 } 51 52 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 53 ;CHECK-LABEL: stack_fold_addss_int 54 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 55 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 56 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 57 ret <4 x float> %2 58 } 59 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 60 61 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 62 ;CHECK-LABEL: stack_fold_addsubpd 63 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 64 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 65 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 66 ret <2 x double> %2 67 } 68 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 69 70 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 71 ;CHECK-LABEL: stack_fold_addsubps 72 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 73 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 74 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 75 ret <4 x float> %2 76 } 77 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 78 79 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 80 ;CHECK-LABEL: stack_fold_andnpd 81 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 82 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 83 %2 = bitcast <2 x double> %a0 to <2 x i64> 84 %3 = bitcast <2 x double> %a1 to <2 x i64> 85 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 86 %5 = and <2 x i64> %4, %3 87 %6 = bitcast <2 x i64> %5 to <2 x double> 88 ; fadd forces execution domain 89 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 90 ret <2 x double> %7 91 } 92 93 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 94 ;CHECK-LABEL: stack_fold_andnps 95 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 96 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 97 %2 = bitcast <4 x float> %a0 to <2 x i64> 98 %3 = bitcast <4 x float> %a1 to <2 x i64> 99 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 100 %5 = and <2 x i64> %4, %3 101 %6 = bitcast <2 x i64> %5 to <4 x float> 102 ; fadd forces execution domain 103 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 104 ret <4 x float> %7 105 } 106 107 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 108 ;CHECK-LABEL: stack_fold_andpd 109 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 111 %2 = bitcast <2 x double> %a0 to <2 x i64> 112 %3 = bitcast <2 x double> %a1 to <2 x i64> 113 %4 = and <2 x i64> %2, %3 114 %5 = bitcast <2 x i64> %4 to <2 x double> 115 ; fadd forces execution domain 116 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 117 ret <2 x double> %6 118 } 119 120 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 121 ;CHECK-LABEL: stack_fold_andps 122 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 123 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 124 %2 = bitcast <4 x float> %a0 to <2 x i64> 125 %3 = bitcast <4 x float> %a1 to <2 x i64> 126 %4 = and <2 x i64> %2, %3 127 %5 = bitcast <2 x i64> %4 to <4 x float> 128 ; fadd forces execution domain 129 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 130 ret <4 x float> %6 131 } 132 133 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 134 ;CHECK-LABEL: stack_fold_blendpd 135 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 137 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 138 ret <2 x double> %2 139 } 140 141 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_blendps 143 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 146 ret <4 x float> %2 147 } 148 149 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 150 ;CHECK-LABEL: stack_fold_blendvpd 151 ;CHECK: blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 153 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 154 ret <2 x double> %2 155 } 156 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 157 158 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 159 ;CHECK-LABEL: stack_fold_blendvps 160 ;CHECK: blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 162 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 163 ret <4 x float> %2 164 } 165 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 166 167 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 168 ;CHECK-LABEL: stack_fold_cmppd 169 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 170 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 171 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 172 ret <2 x double> %2 173 } 174 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 175 176 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 177 ;CHECK-LABEL: stack_fold_cmpps 178 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 180 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 181 ret <4 x float> %2 182 } 183 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 184 185 define i32 @stack_fold_cmpsd(double %a0, double %a1) { 186 ;CHECK-LABEL: stack_fold_cmpsd 187 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 189 %2 = fcmp oeq double %a0, %a1 190 %3 = zext i1 %2 to i32 191 ret i32 %3 192 } 193 194 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 195 ;CHECK-LABEL: stack_fold_cmpsd_int 196 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 197 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 198 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 199 ret <2 x double> %2 200 } 201 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 202 203 define i32 @stack_fold_cmpss(float %a0, float %a1) { 204 ;CHECK-LABEL: stack_fold_cmpss 205 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 207 %2 = fcmp oeq float %a0, %a1 208 %3 = zext i1 %2 to i32 209 ret i32 %3 210 } 211 212 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 213 ;CHECK-LABEL: stack_fold_cmpss_int 214 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 216 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 217 ret <4 x float> %2 218 } 219 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 220 221 ; TODO stack_fold_comisd 222 223 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 224 ;CHECK-LABEL: stack_fold_comisd_int 225 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 227 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 228 ret i32 %2 229 } 230 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 231 232 ; TODO stack_fold_comiss 233 234 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 235 ;CHECK-LABEL: stack_fold_comiss_int 236 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 237 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 238 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 239 ret i32 %2 240 } 241 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 242 243 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 244 ;CHECK-LABEL: stack_fold_cvtdq2pd 245 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 247 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 248 ret <2 x double> %2 249 } 250 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 251 252 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 253 ;CHECK-LABEL: stack_fold_cvtdq2ps 254 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 255 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 256 %2 = sitofp <4 x i32> %a0 to <4 x float> 257 ret <4 x float> %2 258 } 259 260 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 261 ;CHECK-LABEL: stack_fold_cvtpd2dq 262 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 264 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 265 ret <4 x i32> %2 266 } 267 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 268 269 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 270 ;CHECK-LABEL: stack_fold_cvtpd2ps 271 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 273 %2 = fptrunc <2 x double> %a0 to <2 x float> 274 ret <2 x float> %2 275 } 276 277 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 278 ;CHECK-LABEL: stack_fold_cvtps2dq 279 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 280 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 281 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 282 ret <4 x i32> %2 283 } 284 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 285 286 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 287 ;CHECK-LABEL: stack_fold_cvtps2pd 288 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 290 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 291 ret <2 x double> %2 292 } 293 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 294 295 ; TODO stack_fold_cvtsd2si 296 297 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 298 ;CHECK-LABEL: stack_fold_cvtsd2si_int 299 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 300 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 301 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 302 ret i32 %2 303 } 304 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 305 306 ; TODO stack_fold_cvtsd2si64 307 308 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 309 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 310 ;CHECK: cvtsd2siq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 312 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 313 ret i64 %2 314 } 315 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 316 317 define float @stack_fold_cvtsd2ss(double %a0) minsize { 318 ;CHECK-LABEL: stack_fold_cvtsd2ss 319 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 320 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 321 %2 = fptrunc double %a0 to float 322 ret float %2 323 } 324 325 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { 326 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 327 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 329 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 330 ret <4 x float> %2 331 } 332 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 333 334 define double @stack_fold_cvtsi2sd(i32 %a0) minsize { 335 ;CHECK-LABEL: stack_fold_cvtsi2sd 336 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 338 %2 = sitofp i32 %a0 to double 339 ret double %2 340 } 341 342 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 343 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 344 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 345 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 346 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 347 ret <2 x double> %2 348 } 349 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 350 351 define double @stack_fold_cvtsi642sd(i64 %a0) optsize { 352 ;CHECK-LABEL: stack_fold_cvtsi642sd 353 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 354 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 355 %2 = sitofp i64 %a0 to double 356 ret double %2 357 } 358 359 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 360 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 361 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 362 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 363 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 364 ret <2 x double> %2 365 } 366 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 367 368 define float @stack_fold_cvtsi2ss(i32 %a0) minsize { 369 ;CHECK-LABEL: stack_fold_cvtsi2ss 370 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 371 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 372 %2 = sitofp i32 %a0 to float 373 ret float %2 374 } 375 376 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 377 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 378 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 380 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 381 ret <4 x float> %2 382 } 383 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 384 385 define float @stack_fold_cvtsi642ss(i64 %a0) optsize { 386 ;CHECK-LABEL: stack_fold_cvtsi642ss 387 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 388 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 389 %2 = sitofp i64 %a0 to float 390 ret float %2 391 } 392 393 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 394 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 395 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 396 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 397 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 398 ret <4 x float> %2 399 } 400 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 401 402 define double @stack_fold_cvtss2sd(float %a0) minsize { 403 ;CHECK-LABEL: stack_fold_cvtss2sd 404 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 406 %2 = fpext float %a0 to double 407 ret double %2 408 } 409 410 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize { 411 ;CHECK-LABEL: stack_fold_cvtss2sd_int 412 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 413 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 414 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 415 ret <2 x double> %2 416 } 417 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 418 419 ; TODO stack_fold_cvtss2si 420 421 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 422 ;CHECK-LABEL: stack_fold_cvtss2si_int 423 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 424 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 425 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 426 ret i32 %2 427 } 428 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 429 430 ; TODO stack_fold_cvtss2si64 431 432 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 433 ;CHECK-LABEL: stack_fold_cvtss2si64_int 434 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 435 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 436 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 437 ret i64 %2 438 } 439 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 440 441 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 442 ;CHECK-LABEL: stack_fold_cvttpd2dq 443 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 444 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 445 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 446 ret <4 x i32> %2 447 } 448 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 449 450 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 451 ;CHECK-LABEL: stack_fold_cvttps2dq 452 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 453 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 454 %2 = fptosi <4 x float> %a0 to <4 x i32> 455 ret <4 x i32> %2 456 } 457 458 define i32 @stack_fold_cvttsd2si(double %a0) { 459 ;CHECK-LABEL: stack_fold_cvttsd2si 460 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 461 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 462 %2 = fptosi double %a0 to i32 463 ret i32 %2 464 } 465 466 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 467 ;CHECK-LABEL: stack_fold_cvttsd2si_int 468 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 469 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 470 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 471 ret i32 %2 472 } 473 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 474 475 define i64 @stack_fold_cvttsd2si64(double %a0) { 476 ;CHECK-LABEL: stack_fold_cvttsd2si64 477 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 478 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 479 %2 = fptosi double %a0 to i64 480 ret i64 %2 481 } 482 483 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 484 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 485 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 486 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 487 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 488 ret i64 %2 489 } 490 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 491 492 define i32 @stack_fold_cvttss2si(float %a0) { 493 ;CHECK-LABEL: stack_fold_cvttss2si 494 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 495 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 496 %2 = fptosi float %a0 to i32 497 ret i32 %2 498 } 499 500 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 501 ;CHECK-LABEL: stack_fold_cvttss2si_int 502 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 503 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 504 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 505 ret i32 %2 506 } 507 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 508 509 define i64 @stack_fold_cvttss2si64(float %a0) { 510 ;CHECK-LABEL: stack_fold_cvttss2si64 511 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 512 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 513 %2 = fptosi float %a0 to i64 514 ret i64 %2 515 } 516 517 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 518 ;CHECK-LABEL: stack_fold_cvttss2si64_int 519 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 520 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 521 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 522 ret i64 %2 523 } 524 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 525 526 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 527 ;CHECK-LABEL: stack_fold_divpd 528 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 529 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 530 %2 = fdiv <2 x double> %a0, %a1 531 ret <2 x double> %2 532 } 533 534 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 535 ;CHECK-LABEL: stack_fold_divps 536 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 537 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 538 %2 = fdiv <4 x float> %a0, %a1 539 ret <4 x float> %2 540 } 541 542 define double @stack_fold_divsd(double %a0, double %a1) { 543 ;CHECK-LABEL: stack_fold_divsd 544 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 545 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 546 %2 = fdiv double %a0, %a1 547 ret double %2 548 } 549 550 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 551 ;CHECK-LABEL: stack_fold_divsd_int 552 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 553 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 554 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 555 ret <2 x double> %2 556 } 557 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 558 559 define float @stack_fold_divss(float %a0, float %a1) { 560 ;CHECK-LABEL: stack_fold_divss 561 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 562 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 563 %2 = fdiv float %a0, %a1 564 ret float %2 565 } 566 567 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 568 ;CHECK-LABEL: stack_fold_divss_int 569 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 570 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 571 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 572 ret <4 x float> %2 573 } 574 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 575 576 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 577 ;CHECK-LABEL: stack_fold_dppd 578 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 579 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 580 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 581 ret <2 x double> %2 582 } 583 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 584 585 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 586 ;CHECK-LABEL: stack_fold_dpps 587 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 589 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 590 ret <4 x float> %2 591 } 592 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 593 594 define i32 @stack_fold_extractps(<4 x float> %a0) { 595 ;CHECK-LABEL: stack_fold_extractps 596 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 597 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 598 %1 = extractelement <4 x float> %a0, i32 1 599 %2 = bitcast float %1 to i32 600 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 601 ret i32 %2 602 } 603 604 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 605 ;CHECK-LABEL: stack_fold_haddpd 606 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 607 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 608 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 609 ret <2 x double> %2 610 } 611 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 612 613 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 614 ;CHECK-LABEL: stack_fold_haddps 615 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 616 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 617 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 618 ret <4 x float> %2 619 } 620 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 621 622 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 623 ;CHECK-LABEL: stack_fold_hsubpd 624 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 625 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 626 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 627 ret <2 x double> %2 628 } 629 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 630 631 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 632 ;CHECK-LABEL: stack_fold_hsubps 633 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 634 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 635 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 636 ret <4 x float> %2 637 } 638 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 639 640 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 641 ;CHECK-LABEL: stack_fold_insertps 642 ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 643 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 644 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 645 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 646 ret <4 x float> %2 647 } 648 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 649 650 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 651 ;CHECK-LABEL: stack_fold_maxpd 652 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 653 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 654 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 655 ret <2 x double> %2 656 } 657 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 658 659 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 660 ;CHECK-LABEL: stack_fold_maxps 661 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 662 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 663 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 664 ret <4 x float> %2 665 } 666 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 667 668 define double @stack_fold_maxsd(double %a0, double %a1) { 669 ;CHECK-LABEL: stack_fold_maxsd 670 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 671 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 672 %2 = fcmp ogt double %a0, %a1 673 %3 = select i1 %2, double %a0, double %a1 674 ret double %3 675 } 676 677 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 678 ;CHECK-LABEL: stack_fold_maxsd_int 679 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 680 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 681 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 682 ret <2 x double> %2 683 } 684 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 685 686 define float @stack_fold_maxss(float %a0, float %a1) { 687 ;CHECK-LABEL: stack_fold_maxss 688 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 689 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 690 %2 = fcmp ogt float %a0, %a1 691 %3 = select i1 %2, float %a0, float %a1 692 ret float %3 693 } 694 695 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 696 ;CHECK-LABEL: stack_fold_maxss_int 697 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 698 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 699 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 700 ret <4 x float> %2 701 } 702 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 703 704 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 705 ;CHECK-LABEL: stack_fold_minpd 706 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 707 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 708 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 709 ret <2 x double> %2 710 } 711 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 712 713 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 714 ;CHECK-LABEL: stack_fold_minps 715 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 716 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 717 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 718 ret <4 x float> %2 719 } 720 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 721 722 define double @stack_fold_minsd(double %a0, double %a1) { 723 ;CHECK-LABEL: stack_fold_minsd 724 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 725 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 726 %2 = fcmp olt double %a0, %a1 727 %3 = select i1 %2, double %a0, double %a1 728 ret double %3 729 } 730 731 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 732 ;CHECK-LABEL: stack_fold_minsd_int 733 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 734 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 735 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 736 ret <2 x double> %2 737 } 738 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 739 740 define float @stack_fold_minss(float %a0, float %a1) { 741 ;CHECK-LABEL: stack_fold_minss 742 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 743 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 744 %2 = fcmp olt float %a0, %a1 745 %3 = select i1 %2, float %a0, float %a1 746 ret float %3 747 } 748 749 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 750 ;CHECK-LABEL: stack_fold_minss_int 751 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 753 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 754 ret <4 x float> %2 755 } 756 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 757 758 define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 759 ;CHECK-LABEL: stack_fold_movddup 760 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 762 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 763 ret <2 x double> %2 764 } 765 ; TODO stack_fold_movhpd (load / store) 766 ; TODO stack_fold_movhps (load / store) 767 768 ; TODO stack_fold_movlpd (load / store) 769 ; TODO stack_fold_movlps (load / store) 770 771 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 772 ;CHECK-LABEL: stack_fold_movshdup 773 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 774 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 775 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 776 ret <4 x float> %2 777 } 778 779 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 780 ;CHECK-LABEL: stack_fold_movsldup 781 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 783 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 784 ret <4 x float> %2 785 } 786 787 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 788 ;CHECK-LABEL: stack_fold_mulpd 789 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 790 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 791 %2 = fmul <2 x double> %a0, %a1 792 ret <2 x double> %2 793 } 794 795 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 796 ;CHECK-LABEL: stack_fold_mulps 797 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 798 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 799 %2 = fmul <4 x float> %a0, %a1 800 ret <4 x float> %2 801 } 802 803 define double @stack_fold_mulsd(double %a0, double %a1) { 804 ;CHECK-LABEL: stack_fold_mulsd 805 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 806 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 807 %2 = fmul double %a0, %a1 808 ret double %2 809 } 810 811 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 812 ;CHECK-LABEL: stack_fold_mulsd_int 813 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 814 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 815 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 816 ret <2 x double> %2 817 } 818 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 819 820 define float @stack_fold_mulss(float %a0, float %a1) { 821 ;CHECK-LABEL: stack_fold_mulss 822 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 823 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 824 %2 = fmul float %a0, %a1 825 ret float %2 826 } 827 828 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 829 ;CHECK-LABEL: stack_fold_mulss_int 830 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 831 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 832 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 833 ret <4 x float> %2 834 } 835 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 836 837 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 838 ;CHECK-LABEL: stack_fold_orpd 839 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 840 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 841 %2 = bitcast <2 x double> %a0 to <2 x i64> 842 %3 = bitcast <2 x double> %a1 to <2 x i64> 843 %4 = or <2 x i64> %2, %3 844 %5 = bitcast <2 x i64> %4 to <2 x double> 845 ; fadd forces execution domain 846 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 847 ret <2 x double> %6 848 } 849 850 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 851 ;CHECK-LABEL: stack_fold_orps 852 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 853 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 854 %2 = bitcast <4 x float> %a0 to <2 x i64> 855 %3 = bitcast <4 x float> %a1 to <2 x i64> 856 %4 = or <2 x i64> %2, %3 857 %5 = bitcast <2 x i64> %4 to <4 x float> 858 ; fadd forces execution domain 859 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 860 ret <4 x float> %6 861 } 862 863 ; TODO stack_fold_rcpps 864 865 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 866 ;CHECK-LABEL: stack_fold_rcpps_int 867 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 868 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 869 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 870 ret <4 x float> %2 871 } 872 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 873 874 ; TODO stack_fold_rcpss 875 ; TODO stack_fold_rcpss_int 876 877 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 878 ;CHECK-LABEL: stack_fold_roundpd 879 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 880 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 881 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 882 ret <2 x double> %2 883 } 884 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 885 886 define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 887 ;CHECK-LABEL: stack_fold_roundps 888 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 889 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 890 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 891 ret <4 x float> %2 892 } 893 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 894 895 define double @stack_fold_roundsd(double %a0) optsize { 896 ;CHECK-LABEL: stack_fold_roundsd 897 ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 898 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 899 %2 = call double @llvm.floor.f64(double %a0) 900 ret double %2 901 } 902 declare double @llvm.floor.f64(double) nounwind readnone 903 904 ; TODO stack_fold_roundsd_int 905 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 906 907 define float @stack_fold_roundss(float %a0) minsize { 908 ;CHECK-LABEL: stack_fold_roundss 909 ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 910 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 911 %2 = call float @llvm.floor.f32(float %a0) 912 ret float %2 913 } 914 declare float @llvm.floor.f32(float) nounwind readnone 915 916 ; TODO stack_fold_roundss_int 917 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 918 919 ; TODO stack_fold_rsqrtps 920 921 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 922 ;CHECK-LABEL: stack_fold_rsqrtps_int 923 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 924 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 925 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 926 ret <4 x float> %2 927 } 928 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 929 930 ; TODO stack_fold_rsqrtss 931 ; TODO stack_fold_rsqrtss_int 932 933 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 934 ;CHECK-LABEL: stack_fold_shufpd 935 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 936 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 937 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 938 ret <2 x double> %2 939 } 940 941 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 942 ;CHECK-LABEL: stack_fold_shufps 943 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 945 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 946 ret <4 x float> %2 947 } 948 949 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 950 ;CHECK-LABEL: stack_fold_sqrtpd 951 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 952 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 953 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 954 ret <2 x double> %2 955 } 956 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 957 958 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 959 ;CHECK-LABEL: stack_fold_sqrtps 960 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 961 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 962 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 963 ret <4 x float> %2 964 } 965 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 966 967 define double @stack_fold_sqrtsd(double %a0) optsize { 968 ;CHECK-LABEL: stack_fold_sqrtsd 969 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 970 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 971 %2 = call double @llvm.sqrt.f64(double %a0) 972 ret double %2 973 } 974 declare double @llvm.sqrt.f64(double) nounwind readnone 975 976 ; TODO stack_fold_sqrtsd_int 977 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 978 979 define float @stack_fold_sqrtss(float %a0) minsize { 980 ;CHECK-LABEL: stack_fold_sqrtss 981 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 982 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 983 %2 = call float @llvm.sqrt.f32(float %a0) 984 ret float %2 985 } 986 declare float @llvm.sqrt.f32(float) nounwind readnone 987 988 ; TODO stack_fold_sqrtss_int 989 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 990 991 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 992 ;CHECK-LABEL: stack_fold_subpd 993 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 994 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 995 %2 = fsub <2 x double> %a0, %a1 996 ret <2 x double> %2 997 } 998 999 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1000 ;CHECK-LABEL: stack_fold_subps 1001 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1002 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1003 %2 = fsub <4 x float> %a0, %a1 1004 ret <4 x float> %2 1005 } 1006 1007 define double @stack_fold_subsd(double %a0, double %a1) { 1008 ;CHECK-LABEL: stack_fold_subsd 1009 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1010 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1011 %2 = fsub double %a0, %a1 1012 ret double %2 1013 } 1014 1015 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1016 ;CHECK-LABEL: stack_fold_subsd_int 1017 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1018 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1019 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 1020 ret <2 x double> %2 1021 } 1022 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1023 1024 define float @stack_fold_subss(float %a0, float %a1) { 1025 ;CHECK-LABEL: stack_fold_subss 1026 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1027 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1028 %2 = fsub float %a0, %a1 1029 ret float %2 1030 } 1031 1032 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1033 ;CHECK-LABEL: stack_fold_subss_int 1034 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1035 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1036 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 1037 ret <4 x float> %2 1038 } 1039 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1040 1041 define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1042 ;CHECK-LABEL: stack_fold_ucomisd 1043 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1044 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1045 %2 = fcmp ueq double %a0, %a1 1046 %3 = select i1 %2, i32 1, i32 -1 1047 ret i32 %3 1048 } 1049 1050 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1051 ;CHECK-LABEL: stack_fold_ucomisd_int 1052 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1054 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1055 ret i32 %2 1056 } 1057 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1058 1059 define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1060 ;CHECK-LABEL: stack_fold_ucomiss 1061 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1062 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1063 %2 = fcmp ueq float %a0, %a1 1064 %3 = select i1 %2, i32 1, i32 -1 1065 ret i32 %3 1066 } 1067 1068 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1069 ;CHECK-LABEL: stack_fold_ucomiss_int 1070 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1071 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1072 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1073 ret i32 %2 1074 } 1075 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1076 1077 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1078 ;CHECK-LABEL: stack_fold_unpckhpd 1079 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1080 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1081 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1082 ; fadd forces execution domain 1083 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1084 ret <2 x double> %3 1085 } 1086 1087 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1088 ;CHECK-LABEL: stack_fold_unpckhps 1089 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1090 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1091 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1092 ; fadd forces execution domain 1093 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1094 ret <4 x float> %3 1095 } 1096 1097 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1098 ;CHECK-LABEL: stack_fold_unpcklpd 1099 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1100 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1101 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1102 ; fadd forces execution domain 1103 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1104 ret <2 x double> %3 1105 } 1106 1107 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1108 ;CHECK-LABEL: stack_fold_unpcklps 1109 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1111 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1112 ; fadd forces execution domain 1113 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1114 ret <4 x float> %3 1115 } 1116 1117 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1118 ;CHECK-LABEL: stack_fold_xorpd 1119 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1120 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1121 %2 = bitcast <2 x double> %a0 to <2 x i64> 1122 %3 = bitcast <2 x double> %a1 to <2 x i64> 1123 %4 = xor <2 x i64> %2, %3 1124 %5 = bitcast <2 x i64> %4 to <2 x double> 1125 ; fadd forces execution domain 1126 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1127 ret <2 x double> %6 1128 } 1129 1130 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1131 ;CHECK-LABEL: stack_fold_xorps 1132 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1133 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1134 %2 = bitcast <4 x float> %a0 to <2 x i64> 1135 %3 = bitcast <4 x float> %a1 to <2 x i64> 1136 %4 = xor <2 x i64> %2, %3 1137 %5 = bitcast <2 x i64> %4 to <4 x float> 1138 ; fadd forces execution domain 1139 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1140 ret <4 x float> %6 1141 } 1142