1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17 } 18 19 define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) { 20 ;CHECK-LABEL: stack_fold_addpd_ymm 21 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x double> %a0, %a1 24 ret <4 x double> %2 25 } 26 27 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 28 ;CHECK-LABEL: stack_fold_addps 29 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd <4 x float> %a0, %a1 32 ret <4 x float> %2 33 } 34 35 define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) { 36 ;CHECK-LABEL: stack_fold_addps_ymm 37 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = fadd <8 x float> %a0, %a1 40 ret <8 x float> %2 41 } 42 43 define double @stack_fold_addsd(double %a0, double %a1) { 44 ;CHECK-LABEL: stack_fold_addsd 45 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 46 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 47 %2 = fadd double %a0, %a1 48 ret double %2 49 } 50 51 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 52 ;CHECK-LABEL: stack_fold_addsd_int 53 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 54 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 55 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 56 ret <2 x double> %2 57 } 58 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 59 60 define float @stack_fold_addss(float %a0, float %a1) { 61 ;CHECK-LABEL: stack_fold_addss 62 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 63 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 64 %2 = fadd float %a0, %a1 65 ret float %2 66 } 67 68 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 69 ;CHECK-LABEL: stack_fold_addss_int 70 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 71 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 72 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 73 ret <4 x float> %2 74 } 75 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 76 77 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 78 ;CHECK-LABEL: stack_fold_addsubpd 79 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 80 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 81 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 82 ret <2 x double> %2 83 } 84 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 85 86 define <4 x double> @stack_fold_addsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 87 ;CHECK-LABEL: stack_fold_addsubpd_ymm 88 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 89 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 90 %2 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) 91 ret <4 x double> %2 92 } 93 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 94 95 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 96 ;CHECK-LABEL: stack_fold_addsubps 97 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 98 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 99 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 100 ret <4 x float> %2 101 } 102 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 103 104 define <8 x float> @stack_fold_addsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 105 ;CHECK-LABEL: stack_fold_addsubps_ymm 106 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 107 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 108 %2 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) 109 ret <8 x float> %2 110 } 111 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 112 113 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 114 ;CHECK-LABEL: stack_fold_andnpd 115 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 116 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 117 %2 = bitcast <2 x double> %a0 to <2 x i64> 118 %3 = bitcast <2 x double> %a1 to <2 x i64> 119 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 120 %5 = and <2 x i64> %4, %3 121 %6 = bitcast <2 x i64> %5 to <2 x double> 122 ; fadd forces execution domain 123 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 124 ret <2 x double> %7 125 } 126 127 define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) { 128 ;CHECK-LABEL: stack_fold_andnpd_ymm 129 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 130 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 131 %2 = bitcast <4 x double> %a0 to <4 x i64> 132 %3 = bitcast <4 x double> %a1 to <4 x i64> 133 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 134 %5 = and <4 x i64> %4, %3 135 %6 = bitcast <4 x i64> %5 to <4 x double> 136 ; fadd forces execution domain 137 %7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0> 138 ret <4 x double> %7 139 } 140 141 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_andnps 143 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = bitcast <4 x float> %a0 to <2 x i64> 146 %3 = bitcast <4 x float> %a1 to <2 x i64> 147 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 148 %5 = and <2 x i64> %4, %3 149 %6 = bitcast <2 x i64> %5 to <4 x float> 150 ; fadd forces execution domain 151 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 152 ret <4 x float> %7 153 } 154 155 define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) { 156 ;CHECK-LABEL: stack_fold_andnps_ymm 157 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 158 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 159 %2 = bitcast <8 x float> %a0 to <4 x i64> 160 %3 = bitcast <8 x float> %a1 to <4 x i64> 161 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 162 %5 = and <4 x i64> %4, %3 163 %6 = bitcast <4 x i64> %5 to <8 x float> 164 ; fadd forces execution domain 165 %7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 166 ret <8 x float> %7 167 } 168 169 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 170 ;CHECK-LABEL: stack_fold_andpd 171 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 173 %2 = bitcast <2 x double> %a0 to <2 x i64> 174 %3 = bitcast <2 x double> %a1 to <2 x i64> 175 %4 = and <2 x i64> %2, %3 176 %5 = bitcast <2 x i64> %4 to <2 x double> 177 ; fadd forces execution domain 178 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 179 ret <2 x double> %6 180 } 181 182 define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) { 183 ;CHECK-LABEL: stack_fold_andpd_ymm 184 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 185 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 186 %2 = bitcast <4 x double> %a0 to <4 x i64> 187 %3 = bitcast <4 x double> %a1 to <4 x i64> 188 %4 = and <4 x i64> %2, %3 189 %5 = bitcast <4 x i64> %4 to <4 x double> 190 ; fadd forces execution domain 191 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 192 ret <4 x double> %6 193 } 194 195 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 196 ;CHECK-LABEL: stack_fold_andps 197 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 198 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 199 %2 = bitcast <4 x float> %a0 to <2 x i64> 200 %3 = bitcast <4 x float> %a1 to <2 x i64> 201 %4 = and <2 x i64> %2, %3 202 %5 = bitcast <2 x i64> %4 to <4 x float> 203 ; fadd forces execution domain 204 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 205 ret <4 x float> %6 206 } 207 208 define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) { 209 ;CHECK-LABEL: stack_fold_andps_ymm 210 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 211 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 212 %2 = bitcast <8 x float> %a0 to <4 x i64> 213 %3 = bitcast <8 x float> %a1 to <4 x i64> 214 %4 = and <4 x i64> %2, %3 215 %5 = bitcast <4 x i64> %4 to <8 x float> 216 ; fadd forces execution domain 217 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 218 ret <8 x float> %6 219 } 220 221 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 222 ;CHECK-LABEL: stack_fold_blendpd 223 ;CHECK: vblendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 224 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 225 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 226 ret <2 x double> %2 227 } 228 229 define <4 x double> @stack_fold_blendpd_ymm(<4 x double> %a0, <4 x double> %a1) { 230 ;CHECK-LABEL: stack_fold_blendpd_ymm 231 ;CHECK: vblendpd $6, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 233 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x double> %a0, <4 x double> %a1 234 ret <4 x double> %2 235 } 236 237 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 238 ;CHECK-LABEL: stack_fold_blendps 239 ;CHECK: vblendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 240 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 241 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 242 ret <4 x float> %2 243 } 244 245 define <8 x float> @stack_fold_blendps_ymm(<8 x float> %a0, <8 x float> %a1) { 246 ;CHECK-LABEL: stack_fold_blendps_ymm 247 ;CHECK: vblendps $102, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 248 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 249 %2 = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %a0, <8 x float> %a1 250 ret <8 x float> %2 251 } 252 253 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 254 ;CHECK-LABEL: stack_fold_blendvpd 255 ;CHECK: vblendvpd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 256 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 257 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 258 ret <2 x double> %2 259 } 260 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 261 262 define <4 x double> @stack_fold_blendvpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %c) { 263 ;CHECK-LABEL: stack_fold_blendvpd_ymm 264 ;CHECK: vblendvpd {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 265 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 266 %2 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a1, <4 x double> %c, <4 x double> %a0) 267 ret <4 x double> %2 268 } 269 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 270 271 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 272 ;CHECK-LABEL: stack_fold_blendvps 273 ;CHECK: vblendvps {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 274 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 275 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 276 ret <4 x float> %2 277 } 278 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 279 280 define <8 x float> @stack_fold_blendvps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %c) { 281 ;CHECK-LABEL: stack_fold_blendvps_ymm 282 ;CHECK: vblendvps {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 283 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 284 %2 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a1, <8 x float> %c, <8 x float> %a0) 285 ret <8 x float> %2 286 } 287 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 288 289 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 290 ;CHECK-LABEL: stack_fold_cmppd 291 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 292 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 293 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 294 ret <2 x double> %2 295 } 296 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 297 298 define <4 x double> @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) { 299 ;CHECK-LABEL: stack_fold_cmppd_ymm 300 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 301 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 302 %2 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) 303 ret <4 x double> %2 304 } 305 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 306 307 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 308 ;CHECK-LABEL: stack_fold_cmpps 309 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 310 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 311 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 312 ret <4 x float> %2 313 } 314 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 315 316 define <8 x float> @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { 317 ;CHECK-LABEL: stack_fold_cmpps_ymm 318 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 319 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 320 %2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) 321 ret <8 x float> %2 322 } 323 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 324 325 define i32 @stack_fold_cmpsd(double %a0, double %a1) { 326 ;CHECK-LABEL: stack_fold_cmpsd 327 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 329 %2 = fcmp oeq double %a0, %a1 330 %3 = zext i1 %2 to i32 331 ret i32 %3 332 } 333 334 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 335 ;CHECK-LABEL: stack_fold_cmpsd_int 336 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 338 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 339 ret <2 x double> %2 340 } 341 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 342 343 define i32 @stack_fold_cmpss(float %a0, float %a1) { 344 ;CHECK-LABEL: stack_fold_cmpss 345 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 347 %2 = fcmp oeq float %a0, %a1 348 %3 = zext i1 %2 to i32 349 ret i32 %3 350 } 351 352 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 353 ;CHECK-LABEL: stack_fold_cmpss_int 354 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 355 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 356 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 357 ret <4 x float> %2 358 } 359 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 360 361 ; TODO stack_fold_comisd 362 363 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 364 ;CHECK-LABEL: stack_fold_comisd_int 365 ;CHECK: vcomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 366 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 367 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 368 ret i32 %2 369 } 370 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 371 372 ; TODO stack_fold_comiss 373 374 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 375 ;CHECK-LABEL: stack_fold_comiss_int 376 ;CHECK: vcomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 377 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 378 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 379 ret i32 %2 380 } 381 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 382 383 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 384 ;CHECK-LABEL: stack_fold_cvtdq2pd 385 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 386 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 387 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 388 %3 = sitofp <2 x i32> %2 to <2 x double> 389 ret <2 x double> %3 390 } 391 define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) { 392 ;CHECK-LABEL: stack_fold_cvtdq2pd_int 393 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 394 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 395 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 396 ret <2 x double> %2 397 } 398 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 399 400 define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) { 401 ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm 402 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 403 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 404 %2 = sitofp <4 x i32> %a0 to <4 x double> 405 ret <4 x double> %2 406 } 407 408 define <4 x double> @stack_fold_cvtdq2pd_ymm_int(<4 x i32> %a0) { 409 ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm_int 410 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 411 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 412 %2 = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) 413 ret <4 x double> %2 414 } 415 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 416 417 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 418 ;CHECK-LABEL: stack_fold_cvtdq2ps 419 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 420 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 421 %2 = sitofp <4 x i32> %a0 to <4 x float> 422 ret <4 x float> %2 423 } 424 425 define <8 x float> @stack_fold_cvtdq2ps_ymm(<8 x i32> %a0) { 426 ;CHECK-LABEL: stack_fold_cvtdq2ps_ymm 427 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 428 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 429 %2 = sitofp <8 x i32> %a0 to <8 x float> 430 ret <8 x float> %2 431 } 432 433 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 434 ;CHECK-LABEL: stack_fold_cvtpd2dq 435 ;CHECK: vcvtpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 436 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 437 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 438 ret <4 x i32> %2 439 } 440 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 441 442 define <4 x i32> @stack_fold_cvtpd2dq_ymm(<4 x double> %a0) { 443 ;CHECK-LABEL: stack_fold_cvtpd2dq_ymm 444 ;CHECK: vcvtpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 445 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 446 %2 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) 447 ret <4 x i32> %2 448 } 449 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 450 451 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 452 ;CHECK-LABEL: stack_fold_cvtpd2ps 453 ;CHECK: vcvtpd2psx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 454 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 455 %2 = fptrunc <2 x double> %a0 to <2 x float> 456 ret <2 x float> %2 457 } 458 459 define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) { 460 ;CHECK-LABEL: stack_fold_cvtpd2ps_ymm 461 ;CHECK: vcvtpd2psy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 462 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 463 %2 = fptrunc <4 x double> %a0 to <4 x float> 464 ret <4 x float> %2 465 } 466 467 define <4 x float> @stack_fold_cvtph2ps(<8 x i16> %a0) { 468 ;CHECK-LABEL: stack_fold_cvtph2ps 469 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 470 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 471 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) 472 ret <4 x float> %2 473 } 474 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly 475 476 define <8 x float> @stack_fold_cvtph2ps_ymm(<8 x i16> %a0) { 477 ;CHECK-LABEL: stack_fold_cvtph2ps_ymm 478 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 479 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 480 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) 481 ret <8 x float> %2 482 } 483 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly 484 485 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 486 ;CHECK-LABEL: stack_fold_cvtps2dq 487 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 488 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 489 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 490 ret <4 x i32> %2 491 } 492 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 493 494 define <8 x i32> @stack_fold_cvtps2dq_ymm(<8 x float> %a0) { 495 ;CHECK-LABEL: stack_fold_cvtps2dq_ymm 496 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 497 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 498 %2 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) 499 ret <8 x i32> %2 500 } 501 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 502 503 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 504 ;CHECK-LABEL: stack_fold_cvtps2pd 505 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 506 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 507 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> 508 %3 = fpext <2 x float> %2 to <2 x double> 509 ret <2 x double> %3 510 } 511 512 define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) { 513 ;CHECK-LABEL: stack_fold_cvtps2pd_int 514 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 515 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 516 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 517 ret <2 x double> %2 518 } 519 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 520 521 define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) { 522 ;CHECK-LABEL: stack_fold_cvtps2pd_ymm 523 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 524 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 525 %2 = fpext <4 x float> %a0 to <4 x double> 526 ret <4 x double> %2 527 } 528 529 define <4 x double> @stack_fold_cvtps2pd_ymm_int(<4 x float> %a0) { 530 ;CHECK-LABEL: stack_fold_cvtps2pd_ymm_int 531 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 532 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 533 %2 = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) 534 ret <4 x double> %2 535 } 536 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 537 538 define <8 x i16> @stack_fold_cvtps2ph(<4 x float> %a0) { 539 ;CHECK-LABEL: stack_fold_cvtps2ph 540 ;CHECK: vcvtps2ph $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 541 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) 542 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 543 ret <8 x i16> %1 544 } 545 declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly 546 547 define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) { 548 ;CHECK-LABEL: stack_fold_cvtps2ph_ymm 549 ;CHECK: vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 550 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) 551 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 552 ret <8 x i16> %1 553 } 554 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly 555 556 ; TODO stack_fold_cvtsd2si 557 558 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 559 ;CHECK-LABEL: stack_fold_cvtsd2si_int 560 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 561 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 562 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 563 ret i32 %2 564 } 565 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 566 567 ; TODO stack_fold_cvtsd2si64 568 569 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 570 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 571 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 572 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 573 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 574 ret i64 %2 575 } 576 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 577 578 ; TODO stack_fold_cvtsd2ss 579 580 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) { 581 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 582 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 583 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 584 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 585 ret <4 x float> %2 586 } 587 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 588 589 define double @stack_fold_cvtsi2sd(i32 %a0) { 590 ;CHECK-LABEL: stack_fold_cvtsi2sd 591 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 593 %2 = sitofp i32 %a0 to double 594 ret double %2 595 } 596 597 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 598 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 599 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 600 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 601 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 602 ret <2 x double> %2 603 } 604 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 605 606 define double @stack_fold_cvtsi642sd(i64 %a0) { 607 ;CHECK-LABEL: stack_fold_cvtsi642sd 608 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 609 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 610 %2 = sitofp i64 %a0 to double 611 ret double %2 612 } 613 614 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 615 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 616 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 617 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 618 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 619 ret <2 x double> %2 620 } 621 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 622 623 define float @stack_fold_cvtsi2ss(i32 %a0) { 624 ;CHECK-LABEL: stack_fold_cvtsi2ss 625 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 626 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 627 %2 = sitofp i32 %a0 to float 628 ret float %2 629 } 630 631 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 632 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 633 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 634 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 635 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 636 ret <4 x float> %2 637 } 638 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 639 640 define float @stack_fold_cvtsi642ss(i64 %a0) { 641 ;CHECK-LABEL: stack_fold_cvtsi642ss 642 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 643 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 644 %2 = sitofp i64 %a0 to float 645 ret float %2 646 } 647 648 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 649 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 650 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 652 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 653 ret <4 x float> %2 654 } 655 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 656 657 ; TODO stack_fold_cvtss2sd 658 659 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) { 660 ;CHECK-LABEL: stack_fold_cvtss2sd_int 661 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 662 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 663 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 664 ret <2 x double> %2 665 } 666 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 667 668 ; TODO stack_fold_cvtss2si 669 670 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 671 ;CHECK-LABEL: stack_fold_cvtss2si_int 672 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 673 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 674 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 675 ret i32 %2 676 } 677 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 678 679 ; TODO stack_fold_cvtss2si64 680 681 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 682 ;CHECK-LABEL: stack_fold_cvtss2si64_int 683 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 684 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 685 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 686 ret i64 %2 687 } 688 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 689 690 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 691 ;CHECK-LABEL: stack_fold_cvttpd2dq 692 ;CHECK: vcvttpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 693 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 694 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 695 ret <4 x i32> %2 696 } 697 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 698 699 define <4 x i32> @stack_fold_cvttpd2dq_ymm(<4 x double> %a0) { 700 ;CHECK-LABEL: stack_fold_cvttpd2dq_ymm 701 ;CHECK: vcvttpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 702 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 703 %2 = fptosi <4 x double> %a0 to <4 x i32> 704 ret <4 x i32> %2 705 } 706 707 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 708 ;CHECK-LABEL: stack_fold_cvttps2dq 709 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 710 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 711 %2 = fptosi <4 x float> %a0 to <4 x i32> 712 ret <4 x i32> %2 713 } 714 715 define <8 x i32> @stack_fold_cvttps2dq_ymm(<8 x float> %a0) { 716 ;CHECK-LABEL: stack_fold_cvttps2dq_ymm 717 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 718 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 719 %2 = fptosi <8 x float> %a0 to <8 x i32> 720 ret <8 x i32> %2 721 } 722 723 define i32 @stack_fold_cvttsd2si(double %a0) { 724 ;CHECK-LABEL: stack_fold_cvttsd2si 725 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 726 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 727 %2 = fptosi double %a0 to i32 728 ret i32 %2 729 } 730 731 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 732 ;CHECK-LABEL: stack_fold_cvttsd2si_int 733 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 734 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 735 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 736 ret i32 %2 737 } 738 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 739 740 define i64 @stack_fold_cvttsd2si64(double %a0) { 741 ;CHECK-LABEL: stack_fold_cvttsd2si64 742 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 743 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 744 %2 = fptosi double %a0 to i64 745 ret i64 %2 746 } 747 748 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 749 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 750 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 751 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 752 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 753 ret i64 %2 754 } 755 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 756 757 define i32 @stack_fold_cvttss2si(float %a0) { 758 ;CHECK-LABEL: stack_fold_cvttss2si 759 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 760 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 761 %2 = fptosi float %a0 to i32 762 ret i32 %2 763 } 764 765 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 766 ;CHECK-LABEL: stack_fold_cvttss2si_int 767 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 768 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 769 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 770 ret i32 %2 771 } 772 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 773 774 define i64 @stack_fold_cvttss2si64(float %a0) { 775 ;CHECK-LABEL: stack_fold_cvttss2si64 776 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 777 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 778 %2 = fptosi float %a0 to i64 779 ret i64 %2 780 } 781 782 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 783 ;CHECK-LABEL: stack_fold_cvttss2si64_int 784 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 786 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 787 ret i64 %2 788 } 789 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 790 791 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 792 ;CHECK-LABEL: stack_fold_divpd 793 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 794 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 795 %2 = fdiv <2 x double> %a0, %a1 796 ret <2 x double> %2 797 } 798 799 define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) { 800 ;CHECK-LABEL: stack_fold_divpd_ymm 801 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 802 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 803 %2 = fdiv <4 x double> %a0, %a1 804 ret <4 x double> %2 805 } 806 807 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 808 ;CHECK-LABEL: stack_fold_divps 809 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 811 %2 = fdiv <4 x float> %a0, %a1 812 ret <4 x float> %2 813 } 814 815 define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) { 816 ;CHECK-LABEL: stack_fold_divps_ymm 817 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 818 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 819 %2 = fdiv <8 x float> %a0, %a1 820 ret <8 x float> %2 821 } 822 823 define double @stack_fold_divsd(double %a0, double %a1) { 824 ;CHECK-LABEL: stack_fold_divsd 825 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 826 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 827 %2 = fdiv double %a0, %a1 828 ret double %2 829 } 830 831 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 832 ;CHECK-LABEL: stack_fold_divsd_int 833 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 834 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 835 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 836 ret <2 x double> %2 837 } 838 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 839 840 define float @stack_fold_divss(float %a0, float %a1) { 841 ;CHECK-LABEL: stack_fold_divss 842 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 843 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 844 %2 = fdiv float %a0, %a1 845 ret float %2 846 } 847 848 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 849 ;CHECK-LABEL: stack_fold_divss_int 850 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 851 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 852 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 853 ret <4 x float> %2 854 } 855 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 856 857 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 858 ;CHECK-LABEL: stack_fold_dppd 859 ;CHECK: vdppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 860 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 861 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 862 ret <2 x double> %2 863 } 864 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 865 866 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 867 ;CHECK-LABEL: stack_fold_dpps 868 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 869 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 870 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 871 ret <4 x float> %2 872 } 873 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 874 875 define <8 x float> @stack_fold_dpps_ymm(<8 x float> %a0, <8 x float> %a1) { 876 ;CHECK-LABEL: stack_fold_dpps_ymm 877 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 878 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 879 %2 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) 880 ret <8 x float> %2 881 } 882 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 883 884 define <4 x float> @stack_fold_extractf128(<8 x float> %a0, <8 x float> %a1) { 885 ;CHECK-LABEL: stack_fold_extractf128 886 ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 887 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 888 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 889 ret <4 x float> %1 890 } 891 892 define i32 @stack_fold_extractps(<4 x float> %a0) { 893 ;CHECK-LABEL: stack_fold_extractps 894 ;CHECK: vextractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 895 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 896 %1 = extractelement <4 x float> %a0, i32 1 897 %2 = bitcast float %1 to i32 898 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 899 ret i32 %2 900 } 901 902 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 903 ;CHECK-LABEL: stack_fold_haddpd 904 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 905 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 906 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 907 ret <2 x double> %2 908 } 909 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 910 911 define <4 x double> @stack_fold_haddpd_ymm(<4 x double> %a0, <4 x double> %a1) { 912 ;CHECK-LABEL: stack_fold_haddpd_ymm 913 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 914 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 915 %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) 916 ret <4 x double> %2 917 } 918 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 919 920 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 921 ;CHECK-LABEL: stack_fold_haddps 922 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 923 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 924 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 925 ret <4 x float> %2 926 } 927 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 928 929 define <8 x float> @stack_fold_haddps_ymm(<8 x float> %a0, <8 x float> %a1) { 930 ;CHECK-LABEL: stack_fold_haddps_ymm 931 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 932 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 933 %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) 934 ret <8 x float> %2 935 } 936 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 937 938 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 939 ;CHECK-LABEL: stack_fold_hsubpd 940 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 941 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 942 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 943 ret <2 x double> %2 944 } 945 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 946 947 define <4 x double> @stack_fold_hsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 948 ;CHECK-LABEL: stack_fold_hsubpd_ymm 949 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 950 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 951 %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) 952 ret <4 x double> %2 953 } 954 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 955 956 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 957 ;CHECK-LABEL: stack_fold_hsubps 958 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 959 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 960 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 961 ret <4 x float> %2 962 } 963 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 964 965 define <8 x float> @stack_fold_hsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 966 ;CHECK-LABEL: stack_fold_hsubps_ymm 967 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 968 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 969 %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) 970 ret <8 x float> %2 971 } 972 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 973 974 define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) { 975 ;CHECK-LABEL: stack_fold_insertf128 976 ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 977 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 978 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 979 ret <8 x float> %2 980 } 981 982 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 983 ;CHECK-LABEL: stack_fold_insertps 984 ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 985 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 986 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 987 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 988 ret <4 x float> %2 989 } 990 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 991 992 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 993 ;CHECK-LABEL: stack_fold_maxpd 994 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 995 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 996 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 997 ret <2 x double> %2 998 } 999 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 1000 1001 define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1002 ;CHECK-LABEL: stack_fold_maxpd_ymm 1003 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1004 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1005 %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) 1006 ret <4 x double> %2 1007 } 1008 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 1009 1010 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 1011 ;CHECK-LABEL: stack_fold_maxps 1012 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1013 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1014 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1015 ret <4 x float> %2 1016 } 1017 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1018 1019 define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) { 1020 ;CHECK-LABEL: stack_fold_maxps_ymm 1021 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1022 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1023 %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) 1024 ret <8 x float> %2 1025 } 1026 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 1027 1028 define double @stack_fold_maxsd(double %a0, double %a1) { 1029 ;CHECK-LABEL: stack_fold_maxsd 1030 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1031 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1032 %2 = fcmp ogt double %a0, %a1 1033 %3 = select i1 %2, double %a0, double %a1 1034 ret double %3 1035 } 1036 1037 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 1038 ;CHECK-LABEL: stack_fold_maxsd_int 1039 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1040 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1041 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 1042 ret <2 x double> %2 1043 } 1044 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 1045 1046 define float @stack_fold_maxss(float %a0, float %a1) { 1047 ;CHECK-LABEL: stack_fold_maxss 1048 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1049 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1050 %2 = fcmp ogt float %a0, %a1 1051 %3 = select i1 %2, float %a0, float %a1 1052 ret float %3 1053 } 1054 1055 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 1056 ;CHECK-LABEL: stack_fold_maxss_int 1057 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1058 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1059 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1060 ret <4 x float> %2 1061 } 1062 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1063 1064 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 1065 ;CHECK-LABEL: stack_fold_minpd 1066 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1067 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1068 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1069 ret <2 x double> %2 1070 } 1071 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 1072 1073 define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1074 ;CHECK-LABEL: stack_fold_minpd_ymm 1075 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1076 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1077 %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) 1078 ret <4 x double> %2 1079 } 1080 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 1081 1082 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 1083 ;CHECK-LABEL: stack_fold_minps 1084 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1085 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1086 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1087 ret <4 x float> %2 1088 } 1089 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1090 1091 define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) { 1092 ;CHECK-LABEL: stack_fold_minps_ymm 1093 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1094 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1095 %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) 1096 ret <8 x float> %2 1097 } 1098 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 1099 1100 define double @stack_fold_minsd(double %a0, double %a1) { 1101 ;CHECK-LABEL: stack_fold_minsd 1102 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1103 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1104 %2 = fcmp olt double %a0, %a1 1105 %3 = select i1 %2, double %a0, double %a1 1106 ret double %3 1107 } 1108 1109 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 1110 ;CHECK-LABEL: stack_fold_minsd_int 1111 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1112 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1113 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 1114 ret <2 x double> %2 1115 } 1116 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 1117 1118 define float @stack_fold_minss(float %a0, float %a1) { 1119 ;CHECK-LABEL: stack_fold_minss 1120 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1121 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1122 %2 = fcmp olt float %a0, %a1 1123 %3 = select i1 %2, float %a0, float %a1 1124 ret float %3 1125 } 1126 1127 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 1128 ;CHECK-LABEL: stack_fold_minss_int 1129 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1130 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1131 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1132 ret <4 x float> %2 1133 } 1134 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1135 1136 define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 1137 ;CHECK-LABEL: stack_fold_movddup 1138 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1139 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1140 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1141 ret <2 x double> %2 1142 } 1143 1144 define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) { 1145 ;CHECK-LABEL: stack_fold_movddup_ymm 1146 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1147 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1148 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1149 ret <4 x double> %2 1150 } 1151 1152 ; TODO stack_fold_movhpd (load / store) 1153 ; TODO stack_fold_movhps (load / store) 1154 1155 ; TODO stack_fold_movlpd (load / store) 1156 ; TODO stack_fold_movlps (load / store) 1157 1158 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 1159 ;CHECK-LABEL: stack_fold_movshdup 1160 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1162 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1163 ret <4 x float> %2 1164 } 1165 1166 define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) { 1167 ;CHECK-LABEL: stack_fold_movshdup_ymm 1168 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1169 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1170 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1171 ret <8 x float> %2 1172 } 1173 1174 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 1175 ;CHECK-LABEL: stack_fold_movsldup 1176 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1177 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1178 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1179 ret <4 x float> %2 1180 } 1181 1182 define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) { 1183 ;CHECK-LABEL: stack_fold_movsldup_ymm 1184 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1185 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1186 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1187 ret <8 x float> %2 1188 } 1189 1190 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 1191 ;CHECK-LABEL: stack_fold_mulpd 1192 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1193 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1194 %2 = fmul <2 x double> %a0, %a1 1195 ret <2 x double> %2 1196 } 1197 1198 define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1199 ;CHECK-LABEL: stack_fold_mulpd_ymm 1200 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1201 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1202 %2 = fmul <4 x double> %a0, %a1 1203 ret <4 x double> %2 1204 } 1205 1206 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 1207 ;CHECK-LABEL: stack_fold_mulps 1208 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1209 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1210 %2 = fmul <4 x float> %a0, %a1 1211 ret <4 x float> %2 1212 } 1213 1214 define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) { 1215 ;CHECK-LABEL: stack_fold_mulps_ymm 1216 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1217 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1218 %2 = fmul <8 x float> %a0, %a1 1219 ret <8 x float> %2 1220 } 1221 1222 define double @stack_fold_mulsd(double %a0, double %a1) { 1223 ;CHECK-LABEL: stack_fold_mulsd 1224 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1225 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1226 %2 = fmul double %a0, %a1 1227 ret double %2 1228 } 1229 1230 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 1231 ;CHECK-LABEL: stack_fold_mulsd_int 1232 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1233 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1234 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 1235 ret <2 x double> %2 1236 } 1237 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 1238 1239 define float @stack_fold_mulss(float %a0, float %a1) { 1240 ;CHECK-LABEL: stack_fold_mulss 1241 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1242 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1243 %2 = fmul float %a0, %a1 1244 ret float %2 1245 } 1246 1247 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 1248 ;CHECK-LABEL: stack_fold_mulss_int 1249 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1250 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1251 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 1252 ret <4 x float> %2 1253 } 1254 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1255 1256 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 1257 ;CHECK-LABEL: stack_fold_orpd 1258 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1259 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1260 %2 = bitcast <2 x double> %a0 to <2 x i64> 1261 %3 = bitcast <2 x double> %a1 to <2 x i64> 1262 %4 = or <2 x i64> %2, %3 1263 %5 = bitcast <2 x i64> %4 to <2 x double> 1264 ; fadd forces execution domain 1265 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1266 ret <2 x double> %6 1267 } 1268 1269 define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1270 ;CHECK-LABEL: stack_fold_orpd_ymm 1271 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1273 %2 = bitcast <4 x double> %a0 to <4 x i64> 1274 %3 = bitcast <4 x double> %a1 to <4 x i64> 1275 %4 = or <4 x i64> %2, %3 1276 %5 = bitcast <4 x i64> %4 to <4 x double> 1277 ; fadd forces execution domain 1278 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1279 ret <4 x double> %6 1280 } 1281 1282 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 1283 ;CHECK-LABEL: stack_fold_orps 1284 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1285 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1286 %2 = bitcast <4 x float> %a0 to <2 x i64> 1287 %3 = bitcast <4 x float> %a1 to <2 x i64> 1288 %4 = or <2 x i64> %2, %3 1289 %5 = bitcast <2 x i64> %4 to <4 x float> 1290 ; fadd forces execution domain 1291 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1292 ret <4 x float> %6 1293 } 1294 1295 define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) { 1296 ;CHECK-LABEL: stack_fold_orps_ymm 1297 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1298 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1299 %2 = bitcast <8 x float> %a0 to <4 x i64> 1300 %3 = bitcast <8 x float> %a1 to <4 x i64> 1301 %4 = or <4 x i64> %2, %3 1302 %5 = bitcast <4 x i64> %4 to <8 x float> 1303 ; fadd forces execution domain 1304 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1305 ret <8 x float> %6 1306 } 1307 1308 define <8 x float> @stack_fold_perm2f128(<8 x float> %a0, <8 x float> %a1) { 1309 ;CHECK-LABEL: stack_fold_perm2f128 1310 ;CHECK: vperm2f128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1312 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1313 ret <8 x float> %2 1314 } 1315 1316 define <2 x double> @stack_fold_permilpd(<2 x double> %a0) { 1317 ;CHECK-LABEL: stack_fold_permilpd 1318 ;CHECK: vpermilpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1319 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1320 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1321 ret <2 x double> %2 1322 } 1323 1324 define <4 x double> @stack_fold_permilpd_ymm(<4 x double> %a0) { 1325 ;CHECK-LABEL: stack_fold_permilpd_ymm 1326 ;CHECK: vpermilpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1327 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1328 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 1329 ret <4 x double> %2 1330 } 1331 1332 define <2 x double> @stack_fold_permilpdvar(<2 x double> %a0, <2 x i64> %a1) { 1333 ;CHECK-LABEL: stack_fold_permilpdvar 1334 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1335 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1336 %2 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) 1337 ret <2 x double> %2 1338 } 1339 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 1340 1341 define <4 x double> @stack_fold_permilpdvar_ymm(<4 x double> %a0, <4 x i64> %a1) { 1342 ;CHECK-LABEL: stack_fold_permilpdvar_ymm 1343 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1344 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1345 %2 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) 1346 ret <4 x double> %2 1347 } 1348 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 1349 1350 define <4 x float> @stack_fold_permilps(<4 x float> %a0) { 1351 ;CHECK-LABEL: stack_fold_permilps 1352 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1353 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1354 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1355 ret <4 x float> %2 1356 } 1357 1358 define <8 x float> @stack_fold_permilps_ymm(<8 x float> %a0) { 1359 ;CHECK-LABEL: stack_fold_permilps_ymm 1360 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1361 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1362 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1363 ret <8 x float> %2 1364 } 1365 1366 define <4 x float> @stack_fold_permilpsvar(<4 x float> %a0, <4 x i32> %a1) { 1367 ;CHECK-LABEL: stack_fold_permilpsvar 1368 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1369 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1370 %2 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) 1371 ret <4 x float> %2 1372 } 1373 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 1374 1375 define <8 x float> @stack_fold_permilpsvar_ymm(<8 x float> %a0, <8 x i32> %a1) { 1376 ;CHECK-LABEL: stack_fold_permilpsvar_ymm 1377 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1378 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1379 %2 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) 1380 ret <8 x float> %2 1381 } 1382 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 1383 1384 ; TODO stack_fold_rcpps 1385 1386 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 1387 ;CHECK-LABEL: stack_fold_rcpps_int 1388 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1389 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1390 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1391 ret <4 x float> %2 1392 } 1393 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1394 1395 ; TODO stack_fold_rcpps_ymm 1396 1397 define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) { 1398 ;CHECK-LABEL: stack_fold_rcpps_ymm_int 1399 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1400 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1401 %2 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) 1402 ret <8 x float> %2 1403 } 1404 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 1405 1406 ; TODO stack_fold_rcpss 1407 1408 define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0) { 1409 ;CHECK-LABEL: stack_fold_rcpss_int 1410 ;CHECK: vrcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1411 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1412 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1413 ret <4 x float> %2 1414 } 1415 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1416 1417 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 1418 ;CHECK-LABEL: stack_fold_roundpd 1419 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1420 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1421 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 1422 ret <2 x double> %2 1423 } 1424 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1425 1426 define <4 x double> @stack_fold_roundpd_ymm(<4 x double> %a0) { 1427 ;CHECK-LABEL: stack_fold_roundpd_ymm 1428 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1429 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1430 %2 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) 1431 ret <4 x double> %2 1432 } 1433 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 1434 1435 define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 1436 ;CHECK-LABEL: stack_fold_roundps 1437 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1438 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1439 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 1440 ret <4 x float> %2 1441 } 1442 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1443 1444 define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) { 1445 ;CHECK-LABEL: stack_fold_roundps_ymm 1446 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1447 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1448 %2 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) 1449 ret <8 x float> %2 1450 } 1451 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 1452 1453 define double @stack_fold_roundsd(double %a0) optsize { 1454 ;CHECK-LABEL: stack_fold_roundsd 1455 ;CHECK: vroundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1456 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1457 %2 = call double @llvm.floor.f64(double %a0) 1458 ret double %2 1459 } 1460 declare double @llvm.floor.f64(double) nounwind readnone 1461 1462 ; TODO stack_fold_roundsd_int 1463 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1464 1465 define float @stack_fold_roundss(float %a0) optsize { 1466 ;CHECK-LABEL: stack_fold_roundss 1467 ;CHECK: vroundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1468 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1469 %2 = call float @llvm.floor.f32(float %a0) 1470 ret float %2 1471 } 1472 declare float @llvm.floor.f32(float) nounwind readnone 1473 1474 ; TODO stack_fold_roundss_int 1475 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1476 1477 ; TODO stack_fold_rsqrtps 1478 1479 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 1480 ;CHECK-LABEL: stack_fold_rsqrtps_int 1481 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1482 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1483 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1484 ret <4 x float> %2 1485 } 1486 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1487 1488 ; TODO stack_fold_rsqrtps_ymm 1489 1490 define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) { 1491 ;CHECK-LABEL: stack_fold_rsqrtps_ymm_int 1492 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1493 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1494 %2 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) 1495 ret <8 x float> %2 1496 } 1497 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 1498 1499 ; TODO stack_fold_rsqrtss 1500 1501 define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0) { 1502 ;CHECK-LABEL: stack_fold_rsqrtss_int 1503 ;CHECK: vrsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1504 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1505 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1506 ret <4 x float> %2 1507 } 1508 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1509 1510 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 1511 ;CHECK-LABEL: stack_fold_shufpd 1512 ;CHECK: vshufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1513 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1514 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 1515 ret <2 x double> %2 1516 } 1517 1518 define <4 x double> @stack_fold_shufpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1519 ;CHECK-LABEL: stack_fold_shufpd_ymm 1520 ;CHECK: vshufpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1521 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1522 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 1523 ret <4 x double> %2 1524 } 1525 1526 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 1527 ;CHECK-LABEL: stack_fold_shufps 1528 ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1529 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1530 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 1531 ret <4 x float> %2 1532 } 1533 1534 define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) { 1535 ;CHECK-LABEL: stack_fold_shufps_ymm 1536 ;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1537 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1538 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14> 1539 ret <8 x float> %2 1540 } 1541 1542 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 1543 ;CHECK-LABEL: stack_fold_sqrtpd 1544 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1545 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1546 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 1547 ret <2 x double> %2 1548 } 1549 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 1550 1551 define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) { 1552 ;CHECK-LABEL: stack_fold_sqrtpd_ymm 1553 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1554 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1555 %2 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) 1556 ret <4 x double> %2 1557 } 1558 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 1559 1560 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 1561 ;CHECK-LABEL: stack_fold_sqrtps 1562 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1563 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1564 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 1565 ret <4 x float> %2 1566 } 1567 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1568 1569 define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) { 1570 ;CHECK-LABEL: stack_fold_sqrtps_ymm 1571 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1572 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1573 %2 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) 1574 ret <8 x float> %2 1575 } 1576 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 1577 1578 define double @stack_fold_sqrtsd(double %a0) { 1579 ;CHECK-LABEL: stack_fold_sqrtsd 1580 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1581 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1582 %2 = call double @llvm.sqrt.f64(double %a0) 1583 ret double %2 1584 } 1585 declare double @llvm.sqrt.f64(double) nounwind readnone 1586 1587 define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0) { 1588 ;CHECK-LABEL: stack_fold_sqrtsd_int 1589 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1590 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1591 %2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) 1592 ret <2 x double> %2 1593 } 1594 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 1595 1596 define float @stack_fold_sqrtss(float %a0) { 1597 ;CHECK-LABEL: stack_fold_sqrtss 1598 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1599 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1600 %2 = call float @llvm.sqrt.f32(float %a0) 1601 ret float %2 1602 } 1603 declare float @llvm.sqrt.f32(float) nounwind readnone 1604 1605 define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0) { 1606 ;CHECK-LABEL: stack_fold_sqrtss_int 1607 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1608 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1609 %2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) 1610 ret <4 x float> %2 1611 } 1612 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1613 1614 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 1615 ;CHECK-LABEL: stack_fold_subpd 1616 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1617 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1618 %2 = fsub <2 x double> %a0, %a1 1619 ret <2 x double> %2 1620 } 1621 1622 define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1623 ;CHECK-LABEL: stack_fold_subpd_ymm 1624 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1625 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1626 %2 = fsub <4 x double> %a0, %a1 1627 ret <4 x double> %2 1628 } 1629 1630 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1631 ;CHECK-LABEL: stack_fold_subps 1632 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1633 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1634 %2 = fsub <4 x float> %a0, %a1 1635 ret <4 x float> %2 1636 } 1637 1638 define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) { 1639 ;CHECK-LABEL: stack_fold_subps_ymm 1640 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1641 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1642 %2 = fsub <8 x float> %a0, %a1 1643 ret <8 x float> %2 1644 } 1645 1646 define double @stack_fold_subsd(double %a0, double %a1) { 1647 ;CHECK-LABEL: stack_fold_subsd 1648 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1649 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1650 %2 = fsub double %a0, %a1 1651 ret double %2 1652 } 1653 1654 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1655 ;CHECK-LABEL: stack_fold_subsd_int 1656 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1657 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1658 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 1659 ret <2 x double> %2 1660 } 1661 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1662 1663 define float @stack_fold_subss(float %a0, float %a1) { 1664 ;CHECK-LABEL: stack_fold_subss 1665 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1666 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1667 %2 = fsub float %a0, %a1 1668 ret float %2 1669 } 1670 1671 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1672 ;CHECK-LABEL: stack_fold_subss_int 1673 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1674 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1675 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 1676 ret <4 x float> %2 1677 } 1678 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1679 1680 define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) { 1681 ;CHECK-LABEL: stack_fold_testpd 1682 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1683 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1684 %2 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) 1685 ret i32 %2 1686 } 1687 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 1688 1689 define i32 @stack_fold_testpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1690 ;CHECK-LABEL: stack_fold_testpd_ymm 1691 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1692 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1693 %2 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) 1694 ret i32 %2 1695 } 1696 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 1697 1698 define i32 @stack_fold_testps(<4 x float> %a0, <4 x float> %a1) { 1699 ;CHECK-LABEL: stack_fold_testps 1700 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1701 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1702 %2 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) 1703 ret i32 %2 1704 } 1705 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 1706 1707 define i32 @stack_fold_testps_ymm(<8 x float> %a0, <8 x float> %a1) { 1708 ;CHECK-LABEL: stack_fold_testps_ymm 1709 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1710 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1711 %2 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) 1712 ret i32 %2 1713 } 1714 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 1715 1716 define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1717 ;CHECK-LABEL: stack_fold_ucomisd 1718 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1719 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1720 %2 = fcmp ueq double %a0, %a1 1721 %3 = select i1 %2, i32 1, i32 -1 1722 ret i32 %3 1723 } 1724 1725 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1726 ;CHECK-LABEL: stack_fold_ucomisd_int 1727 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1728 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1729 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1730 ret i32 %2 1731 } 1732 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1733 1734 define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1735 ;CHECK-LABEL: stack_fold_ucomiss 1736 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1737 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1738 %2 = fcmp ueq float %a0, %a1 1739 %3 = select i1 %2, i32 1, i32 -1 1740 ret i32 %3 1741 } 1742 1743 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1744 ;CHECK-LABEL: stack_fold_ucomiss_int 1745 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1746 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1747 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1748 ret i32 %2 1749 } 1750 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1751 1752 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1753 ;CHECK-LABEL: stack_fold_unpckhpd 1754 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1755 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1756 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1757 ; fadd forces execution domain 1758 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1759 ret <2 x double> %3 1760 } 1761 1762 define <4 x double> @stack_fold_unpckhpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1763 ;CHECK-LABEL: stack_fold_unpckhpd_ymm 1764 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1765 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1766 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 1767 ; fadd forces execution domain 1768 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1769 ret <4 x double> %3 1770 } 1771 1772 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1773 ;CHECK-LABEL: stack_fold_unpckhps 1774 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1775 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1776 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1777 ; fadd forces execution domain 1778 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1779 ret <4 x float> %3 1780 } 1781 1782 define <8 x float> @stack_fold_unpckhps_ymm(<8 x float> %a0, <8 x float> %a1) { 1783 ;CHECK-LABEL: stack_fold_unpckhps_ymm 1784 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1786 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1787 ; fadd forces execution domain 1788 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1789 ret <8 x float> %3 1790 } 1791 1792 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1793 ;CHECK-LABEL: stack_fold_unpcklpd 1794 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1795 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1796 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1797 ; fadd forces execution domain 1798 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1799 ret <2 x double> %3 1800 } 1801 1802 define <4 x double> @stack_fold_unpcklpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1803 ;CHECK-LABEL: stack_fold_unpcklpd_ymm 1804 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1805 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1806 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1807 ; fadd forces execution domain 1808 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1809 ret <4 x double> %3 1810 } 1811 1812 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1813 ;CHECK-LABEL: stack_fold_unpcklps 1814 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1815 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1816 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1817 ; fadd forces execution domain 1818 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1819 ret <4 x float> %3 1820 } 1821 1822 define <8 x float> @stack_fold_unpcklps_ymm(<8 x float> %a0, <8 x float> %a1) { 1823 ;CHECK-LABEL: stack_fold_unpcklps_ymm 1824 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1825 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1826 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1827 ; fadd forces execution domain 1828 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1829 ret <8 x float> %3 1830 } 1831 1832 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1833 ;CHECK-LABEL: stack_fold_xorpd 1834 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1835 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1836 %2 = bitcast <2 x double> %a0 to <2 x i64> 1837 %3 = bitcast <2 x double> %a1 to <2 x i64> 1838 %4 = xor <2 x i64> %2, %3 1839 %5 = bitcast <2 x i64> %4 to <2 x double> 1840 ; fadd forces execution domain 1841 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1842 ret <2 x double> %6 1843 } 1844 1845 define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1846 ;CHECK-LABEL: stack_fold_xorpd_ymm 1847 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1848 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1849 %2 = bitcast <4 x double> %a0 to <4 x i64> 1850 %3 = bitcast <4 x double> %a1 to <4 x i64> 1851 %4 = xor <4 x i64> %2, %3 1852 %5 = bitcast <4 x i64> %4 to <4 x double> 1853 ; fadd forces execution domain 1854 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1855 ret <4 x double> %6 1856 } 1857 1858 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1859 ;CHECK-LABEL: stack_fold_xorps 1860 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1861 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1862 %2 = bitcast <4 x float> %a0 to <2 x i64> 1863 %3 = bitcast <4 x float> %a1 to <2 x i64> 1864 %4 = xor <2 x i64> %2, %3 1865 %5 = bitcast <2 x i64> %4 to <4 x float> 1866 ; fadd forces execution domain 1867 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1868 ret <4 x float> %6 1869 } 1870 1871 define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { 1872 ;CHECK-LABEL: stack_fold_xorps_ymm 1873 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1874 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1875 %2 = bitcast <8 x float> %a0 to <4 x i64> 1876 %3 = bitcast <8 x float> %a1 to <4 x i64> 1877 %4 = xor <4 x i64> %2, %3 1878 %5 = bitcast <4 x i64> %4 to <8 x float> 1879 ; fadd forces execution domain 1880 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1881 ret <8 x float> %6 1882 } 1883