1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17 } 18 19 define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) { 20 ;CHECK-LABEL: stack_fold_addpd_ymm 21 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x double> %a0, %a1 24 ret <4 x double> %2 25 } 26 27 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 28 ;CHECK-LABEL: stack_fold_addps 29 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd <4 x float> %a0, %a1 32 ret <4 x float> %2 33 } 34 35 define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) { 36 ;CHECK-LABEL: stack_fold_addps_ymm 37 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = fadd <8 x float> %a0, %a1 40 ret <8 x float> %2 41 } 42 43 define double @stack_fold_addsd(double %a0, double %a1) { 44 ;CHECK-LABEL: stack_fold_addsd 45 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 46 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 47 %2 = fadd double %a0, %a1 48 ret double %2 49 } 50 51 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 52 ;CHECK-LABEL: stack_fold_addsd_int 53 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 54 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 55 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 56 ret <2 x double> %2 57 } 58 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 59 60 define float @stack_fold_addss(float %a0, float %a1) { 61 ;CHECK-LABEL: stack_fold_addss 62 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 63 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 64 %2 = fadd float %a0, %a1 65 ret float %2 66 } 67 68 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 69 ;CHECK-LABEL: stack_fold_addss_int 70 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 71 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 72 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 73 ret <4 x float> %2 74 } 75 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 76 77 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 78 ;CHECK-LABEL: stack_fold_addsubpd 79 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 80 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 81 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 82 ret <2 x double> %2 83 } 84 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 85 86 define <4 x double> @stack_fold_addsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 87 ;CHECK-LABEL: stack_fold_addsubpd_ymm 88 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 89 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 90 %2 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) 91 ret <4 x double> %2 92 } 93 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 94 95 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 96 ;CHECK-LABEL: stack_fold_addsubps 97 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 98 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 99 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 100 ret <4 x float> %2 101 } 102 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 103 104 define <8 x float> @stack_fold_addsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 105 ;CHECK-LABEL: stack_fold_addsubps_ymm 106 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 107 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 108 %2 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) 109 ret <8 x float> %2 110 } 111 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 112 113 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 114 ;CHECK-LABEL: stack_fold_andnpd 115 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 116 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 117 %2 = bitcast <2 x double> %a0 to <2 x i64> 118 %3 = bitcast <2 x double> %a1 to <2 x i64> 119 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 120 %5 = and <2 x i64> %4, %3 121 %6 = bitcast <2 x i64> %5 to <2 x double> 122 ; fadd forces execution domain 123 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 124 ret <2 x double> %7 125 } 126 127 define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) { 128 ;CHECK-LABEL: stack_fold_andnpd_ymm 129 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 130 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 131 %2 = bitcast <4 x double> %a0 to <4 x i64> 132 %3 = bitcast <4 x double> %a1 to <4 x i64> 133 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 134 %5 = and <4 x i64> %4, %3 135 %6 = bitcast <4 x i64> %5 to <4 x double> 136 ; fadd forces execution domain 137 %7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0> 138 ret <4 x double> %7 139 } 140 141 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_andnps 143 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = bitcast <4 x float> %a0 to <2 x i64> 146 %3 = bitcast <4 x float> %a1 to <2 x i64> 147 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 148 %5 = and <2 x i64> %4, %3 149 %6 = bitcast <2 x i64> %5 to <4 x float> 150 ; fadd forces execution domain 151 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 152 ret <4 x float> %7 153 } 154 155 define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) { 156 ;CHECK-LABEL: stack_fold_andnps_ymm 157 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 158 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 159 %2 = bitcast <8 x float> %a0 to <4 x i64> 160 %3 = bitcast <8 x float> %a1 to <4 x i64> 161 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 162 %5 = and <4 x i64> %4, %3 163 %6 = bitcast <4 x i64> %5 to <8 x float> 164 ; fadd forces execution domain 165 %7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 166 ret <8 x float> %7 167 } 168 169 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 170 ;CHECK-LABEL: stack_fold_andpd 171 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 173 %2 = bitcast <2 x double> %a0 to <2 x i64> 174 %3 = bitcast <2 x double> %a1 to <2 x i64> 175 %4 = and <2 x i64> %2, %3 176 %5 = bitcast <2 x i64> %4 to <2 x double> 177 ; fadd forces execution domain 178 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 179 ret <2 x double> %6 180 } 181 182 define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) { 183 ;CHECK-LABEL: stack_fold_andpd_ymm 184 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 185 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 186 %2 = bitcast <4 x double> %a0 to <4 x i64> 187 %3 = bitcast <4 x double> %a1 to <4 x i64> 188 %4 = and <4 x i64> %2, %3 189 %5 = bitcast <4 x i64> %4 to <4 x double> 190 ; fadd forces execution domain 191 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 192 ret <4 x double> %6 193 } 194 195 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 196 ;CHECK-LABEL: stack_fold_andps 197 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 198 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 199 %2 = bitcast <4 x float> %a0 to <2 x i64> 200 %3 = bitcast <4 x float> %a1 to <2 x i64> 201 %4 = and <2 x i64> %2, %3 202 %5 = bitcast <2 x i64> %4 to <4 x float> 203 ; fadd forces execution domain 204 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 205 ret <4 x float> %6 206 } 207 208 define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) { 209 ;CHECK-LABEL: stack_fold_andps_ymm 210 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 211 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 212 %2 = bitcast <8 x float> %a0 to <4 x i64> 213 %3 = bitcast <8 x float> %a1 to <4 x i64> 214 %4 = and <4 x i64> %2, %3 215 %5 = bitcast <4 x i64> %4 to <8 x float> 216 ; fadd forces execution domain 217 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 218 ret <8 x float> %6 219 } 220 221 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 222 ;CHECK-LABEL: stack_fold_blendpd 223 ;CHECK: vblendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 224 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 225 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 226 ret <2 x double> %2 227 } 228 229 define <4 x double> @stack_fold_blendpd_ymm(<4 x double> %a0, <4 x double> %a1) { 230 ;CHECK-LABEL: stack_fold_blendpd_ymm 231 ;CHECK: vblendpd $6, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 233 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x double> %a0, <4 x double> %a1 234 ret <4 x double> %2 235 } 236 237 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 238 ;CHECK-LABEL: stack_fold_blendps 239 ;CHECK: vblendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 240 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 241 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 242 ret <4 x float> %2 243 } 244 245 define <8 x float> @stack_fold_blendps_ymm(<8 x float> %a0, <8 x float> %a1) { 246 ;CHECK-LABEL: stack_fold_blendps_ymm 247 ;CHECK: vblendps $102, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 248 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 249 %2 = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %a0, <8 x float> %a1 250 ret <8 x float> %2 251 } 252 253 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 254 ;CHECK-LABEL: stack_fold_blendvpd 255 ;CHECK: vblendvpd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 256 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 257 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 258 ret <2 x double> %2 259 } 260 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 261 262 define <4 x double> @stack_fold_blendvpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %c) { 263 ;CHECK-LABEL: stack_fold_blendvpd_ymm 264 ;CHECK: vblendvpd {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 265 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 266 %2 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a1, <4 x double> %c, <4 x double> %a0) 267 ret <4 x double> %2 268 } 269 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 270 271 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 272 ;CHECK-LABEL: stack_fold_blendvps 273 ;CHECK: vblendvps {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 274 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 275 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 276 ret <4 x float> %2 277 } 278 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 279 280 define <8 x float> @stack_fold_blendvps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %c) { 281 ;CHECK-LABEL: stack_fold_blendvps_ymm 282 ;CHECK: vblendvps {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 283 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 284 %2 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a1, <8 x float> %c, <8 x float> %a0) 285 ret <8 x float> %2 286 } 287 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 288 289 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 290 ;CHECK-LABEL: stack_fold_cmppd 291 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 292 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 293 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 294 ret <2 x double> %2 295 } 296 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 297 298 define <4 x double> @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) { 299 ;CHECK-LABEL: stack_fold_cmppd_ymm 300 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 301 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 302 %2 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) 303 ret <4 x double> %2 304 } 305 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 306 307 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 308 ;CHECK-LABEL: stack_fold_cmpps 309 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 310 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 311 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 312 ret <4 x float> %2 313 } 314 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 315 316 define <8 x float> @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { 317 ;CHECK-LABEL: stack_fold_cmpps_ymm 318 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 319 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 320 %2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) 321 ret <8 x float> %2 322 } 323 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 324 325 define i32 @stack_fold_cmpsd(double %a0, double %a1) { 326 ;CHECK-LABEL: stack_fold_cmpsd 327 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 329 %2 = fcmp oeq double %a0, %a1 330 %3 = zext i1 %2 to i32 331 ret i32 %3 332 } 333 334 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 335 ;CHECK-LABEL: stack_fold_cmpsd_int 336 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 338 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 339 ret <2 x double> %2 340 } 341 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 342 343 define i32 @stack_fold_cmpss(float %a0, float %a1) { 344 ;CHECK-LABEL: stack_fold_cmpss 345 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 347 %2 = fcmp oeq float %a0, %a1 348 %3 = zext i1 %2 to i32 349 ret i32 %3 350 } 351 352 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 353 ;CHECK-LABEL: stack_fold_cmpss_int 354 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 355 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 356 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 357 ret <4 x float> %2 358 } 359 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 360 361 ; TODO stack_fold_comisd 362 363 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 364 ;CHECK-LABEL: stack_fold_comisd_int 365 ;CHECK: vcomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 366 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 367 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 368 ret i32 %2 369 } 370 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 371 372 ; TODO stack_fold_comiss 373 374 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 375 ;CHECK-LABEL: stack_fold_comiss_int 376 ;CHECK: vcomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 377 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 378 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 379 ret i32 %2 380 } 381 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 382 383 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 384 ;CHECK-LABEL: stack_fold_cvtdq2pd 385 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 386 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 387 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 388 ret <2 x double> %2 389 } 390 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 391 392 define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) { 393 ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm 394 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 395 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 396 %2 = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) 397 ret <4 x double> %2 398 } 399 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 400 401 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 402 ;CHECK-LABEL: stack_fold_cvtdq2ps 403 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 404 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 405 %2 = sitofp <4 x i32> %a0 to <4 x float> 406 ret <4 x float> %2 407 } 408 409 define <8 x float> @stack_fold_cvtdq2ps_ymm(<8 x i32> %a0) { 410 ;CHECK-LABEL: stack_fold_cvtdq2ps_ymm 411 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 412 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 413 %2 = sitofp <8 x i32> %a0 to <8 x float> 414 ret <8 x float> %2 415 } 416 417 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 418 ;CHECK-LABEL: stack_fold_cvtpd2dq 419 ;CHECK: vcvtpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 420 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 421 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 422 ret <4 x i32> %2 423 } 424 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 425 426 define <4 x i32> @stack_fold_cvtpd2dq_ymm(<4 x double> %a0) { 427 ;CHECK-LABEL: stack_fold_cvtpd2dq_ymm 428 ;CHECK: vcvtpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 429 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 430 %2 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) 431 ret <4 x i32> %2 432 } 433 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 434 435 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 436 ;CHECK-LABEL: stack_fold_cvtpd2ps 437 ;CHECK: vcvtpd2psx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 438 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 439 %2 = fptrunc <2 x double> %a0 to <2 x float> 440 ret <2 x float> %2 441 } 442 443 define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) { 444 ;CHECK-LABEL: stack_fold_cvtpd2ps_ymm 445 ;CHECK: vcvtpd2psy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 446 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 447 %2 = fptrunc <4 x double> %a0 to <4 x float> 448 ret <4 x float> %2 449 } 450 451 define <4 x float> @stack_fold_cvtph2ps(<8 x i16> %a0) { 452 ;CHECK-LABEL: stack_fold_cvtph2ps 453 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 454 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 455 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) 456 ret <4 x float> %2 457 } 458 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly 459 460 define <8 x float> @stack_fold_cvtph2ps_ymm(<8 x i16> %a0) { 461 ;CHECK-LABEL: stack_fold_cvtph2ps_ymm 462 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 463 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 464 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) 465 ret <8 x float> %2 466 } 467 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly 468 469 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 470 ;CHECK-LABEL: stack_fold_cvtps2dq 471 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 472 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 473 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 474 ret <4 x i32> %2 475 } 476 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 477 478 define <8 x i32> @stack_fold_cvtps2dq_ymm(<8 x float> %a0) { 479 ;CHECK-LABEL: stack_fold_cvtps2dq_ymm 480 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 481 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 482 %2 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) 483 ret <8 x i32> %2 484 } 485 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 486 487 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 488 ;CHECK-LABEL: stack_fold_cvtps2pd 489 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 490 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 491 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 492 ret <2 x double> %2 493 } 494 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 495 496 define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) { 497 ;CHECK-LABEL: stack_fold_cvtps2pd_ymm 498 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 499 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 500 %2 = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) 501 ret <4 x double> %2 502 } 503 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 504 505 define <8 x i16> @stack_fold_cvtps2ph(<4 x float> %a0) { 506 ;CHECK-LABEL: stack_fold_cvtps2ph 507 ;CHECK: vcvtps2ph $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 508 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) 509 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 510 ret <8 x i16> %1 511 } 512 declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly 513 514 define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) { 515 ;CHECK-LABEL: stack_fold_cvtps2ph_ymm 516 ;CHECK: vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 517 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) 518 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 519 ret <8 x i16> %1 520 } 521 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly 522 523 ; TODO stack_fold_cvtsd2si 524 525 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 526 ;CHECK-LABEL: stack_fold_cvtsd2si_int 527 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 528 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 529 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 530 ret i32 %2 531 } 532 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 533 534 ; TODO stack_fold_cvtsd2si64 535 536 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 537 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 538 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 540 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 541 ret i64 %2 542 } 543 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 544 545 ; TODO stack_fold_cvtsd2ss 546 547 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) { 548 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 549 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 550 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 551 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 552 ret <4 x float> %2 553 } 554 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 555 556 define double @stack_fold_cvtsi2sd(i32 %a0) { 557 ;CHECK-LABEL: stack_fold_cvtsi2sd 558 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 559 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 560 %2 = sitofp i32 %a0 to double 561 ret double %2 562 } 563 564 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 565 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 566 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 567 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 568 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 569 ret <2 x double> %2 570 } 571 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 572 573 define double @stack_fold_cvtsi642sd(i64 %a0) { 574 ;CHECK-LABEL: stack_fold_cvtsi642sd 575 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 576 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 577 %2 = sitofp i64 %a0 to double 578 ret double %2 579 } 580 581 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 582 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 583 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 585 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 586 ret <2 x double> %2 587 } 588 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 589 590 define float @stack_fold_cvtsi2ss(i32 %a0) { 591 ;CHECK-LABEL: stack_fold_cvtsi2ss 592 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 593 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 594 %2 = sitofp i32 %a0 to float 595 ret float %2 596 } 597 598 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 599 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 600 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 601 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 602 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 603 ret <4 x float> %2 604 } 605 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 606 607 define float @stack_fold_cvtsi642ss(i64 %a0) { 608 ;CHECK-LABEL: stack_fold_cvtsi642ss 609 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 610 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 611 %2 = sitofp i64 %a0 to float 612 ret float %2 613 } 614 615 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 616 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 617 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 618 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 619 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 620 ret <4 x float> %2 621 } 622 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 623 624 ; TODO stack_fold_cvtss2sd 625 626 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) { 627 ;CHECK-LABEL: stack_fold_cvtss2sd_int 628 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 629 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 630 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 631 ret <2 x double> %2 632 } 633 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 634 635 ; TODO stack_fold_cvtss2si 636 637 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 638 ;CHECK-LABEL: stack_fold_cvtss2si_int 639 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 640 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 641 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 642 ret i32 %2 643 } 644 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 645 646 ; TODO stack_fold_cvtss2si64 647 648 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 649 ;CHECK-LABEL: stack_fold_cvtss2si64_int 650 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 652 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 653 ret i64 %2 654 } 655 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 656 657 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 658 ;CHECK-LABEL: stack_fold_cvttpd2dq 659 ;CHECK: vcvttpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 660 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 661 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 662 ret <4 x i32> %2 663 } 664 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 665 666 define <4 x i32> @stack_fold_cvttpd2dq_ymm(<4 x double> %a0) { 667 ;CHECK-LABEL: stack_fold_cvttpd2dq_ymm 668 ;CHECK: vcvttpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 669 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 670 %2 = fptosi <4 x double> %a0 to <4 x i32> 671 ret <4 x i32> %2 672 } 673 674 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 675 ;CHECK-LABEL: stack_fold_cvttps2dq 676 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 677 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 678 %2 = fptosi <4 x float> %a0 to <4 x i32> 679 ret <4 x i32> %2 680 } 681 682 define <8 x i32> @stack_fold_cvttps2dq_ymm(<8 x float> %a0) { 683 ;CHECK-LABEL: stack_fold_cvttps2dq_ymm 684 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 685 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 686 %2 = fptosi <8 x float> %a0 to <8 x i32> 687 ret <8 x i32> %2 688 } 689 690 define i32 @stack_fold_cvttsd2si(double %a0) { 691 ;CHECK-LABEL: stack_fold_cvttsd2si 692 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 693 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 694 %2 = fptosi double %a0 to i32 695 ret i32 %2 696 } 697 698 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 699 ;CHECK-LABEL: stack_fold_cvttsd2si_int 700 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 701 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 702 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 703 ret i32 %2 704 } 705 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 706 707 define i64 @stack_fold_cvttsd2si64(double %a0) { 708 ;CHECK-LABEL: stack_fold_cvttsd2si64 709 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 710 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 711 %2 = fptosi double %a0 to i64 712 ret i64 %2 713 } 714 715 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 716 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 717 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 718 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 719 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 720 ret i64 %2 721 } 722 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 723 724 define i32 @stack_fold_cvttss2si(float %a0) { 725 ;CHECK-LABEL: stack_fold_cvttss2si 726 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 727 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 728 %2 = fptosi float %a0 to i32 729 ret i32 %2 730 } 731 732 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 733 ;CHECK-LABEL: stack_fold_cvttss2si_int 734 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 735 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 736 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 737 ret i32 %2 738 } 739 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 740 741 define i64 @stack_fold_cvttss2si64(float %a0) { 742 ;CHECK-LABEL: stack_fold_cvttss2si64 743 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 744 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 745 %2 = fptosi float %a0 to i64 746 ret i64 %2 747 } 748 749 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 750 ;CHECK-LABEL: stack_fold_cvttss2si64_int 751 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 753 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 754 ret i64 %2 755 } 756 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 757 758 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 759 ;CHECK-LABEL: stack_fold_divpd 760 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 762 %2 = fdiv <2 x double> %a0, %a1 763 ret <2 x double> %2 764 } 765 766 define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) { 767 ;CHECK-LABEL: stack_fold_divpd_ymm 768 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 769 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 770 %2 = fdiv <4 x double> %a0, %a1 771 ret <4 x double> %2 772 } 773 774 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 775 ;CHECK-LABEL: stack_fold_divps 776 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 777 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 778 %2 = fdiv <4 x float> %a0, %a1 779 ret <4 x float> %2 780 } 781 782 define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) { 783 ;CHECK-LABEL: stack_fold_divps_ymm 784 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 786 %2 = fdiv <8 x float> %a0, %a1 787 ret <8 x float> %2 788 } 789 790 define double @stack_fold_divsd(double %a0, double %a1) { 791 ;CHECK-LABEL: stack_fold_divsd 792 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 793 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 794 %2 = fdiv double %a0, %a1 795 ret double %2 796 } 797 798 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 799 ;CHECK-LABEL: stack_fold_divsd_int 800 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 801 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 802 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 803 ret <2 x double> %2 804 } 805 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 806 807 define float @stack_fold_divss(float %a0, float %a1) { 808 ;CHECK-LABEL: stack_fold_divss 809 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 811 %2 = fdiv float %a0, %a1 812 ret float %2 813 } 814 815 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 816 ;CHECK-LABEL: stack_fold_divss_int 817 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 818 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 819 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 820 ret <4 x float> %2 821 } 822 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 823 824 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 825 ;CHECK-LABEL: stack_fold_dppd 826 ;CHECK: vdppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 827 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 828 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 829 ret <2 x double> %2 830 } 831 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 832 833 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 834 ;CHECK-LABEL: stack_fold_dpps 835 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 836 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 837 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 838 ret <4 x float> %2 839 } 840 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 841 842 define <8 x float> @stack_fold_dpps_ymm(<8 x float> %a0, <8 x float> %a1) { 843 ;CHECK-LABEL: stack_fold_dpps_ymm 844 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 845 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 846 %2 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) 847 ret <8 x float> %2 848 } 849 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 850 851 define <4 x float> @stack_fold_extractf128(<8 x float> %a0, <8 x float> %a1) { 852 ;CHECK-LABEL: stack_fold_extractf128 853 ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 854 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 855 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 856 ret <4 x float> %1 857 } 858 859 define i32 @stack_fold_extractps(<4 x float> %a0) { 860 ;CHECK-LABEL: stack_fold_extractps 861 ;CHECK: vextractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 862 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 863 %1 = extractelement <4 x float> %a0, i32 1 864 %2 = bitcast float %1 to i32 865 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 866 ret i32 %2 867 } 868 869 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 870 ;CHECK-LABEL: stack_fold_haddpd 871 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 872 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 873 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 874 ret <2 x double> %2 875 } 876 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 877 878 define <4 x double> @stack_fold_haddpd_ymm(<4 x double> %a0, <4 x double> %a1) { 879 ;CHECK-LABEL: stack_fold_haddpd_ymm 880 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 881 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 882 %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) 883 ret <4 x double> %2 884 } 885 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 886 887 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 888 ;CHECK-LABEL: stack_fold_haddps 889 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 890 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 891 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 892 ret <4 x float> %2 893 } 894 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 895 896 define <8 x float> @stack_fold_haddps_ymm(<8 x float> %a0, <8 x float> %a1) { 897 ;CHECK-LABEL: stack_fold_haddps_ymm 898 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 899 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 900 %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) 901 ret <8 x float> %2 902 } 903 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 904 905 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 906 ;CHECK-LABEL: stack_fold_hsubpd 907 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 908 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 909 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 910 ret <2 x double> %2 911 } 912 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 913 914 define <4 x double> @stack_fold_hsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 915 ;CHECK-LABEL: stack_fold_hsubpd_ymm 916 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 917 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 918 %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) 919 ret <4 x double> %2 920 } 921 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 922 923 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 924 ;CHECK-LABEL: stack_fold_hsubps 925 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 926 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 927 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 928 ret <4 x float> %2 929 } 930 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 931 932 define <8 x float> @stack_fold_hsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 933 ;CHECK-LABEL: stack_fold_hsubps_ymm 934 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 935 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 936 %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) 937 ret <8 x float> %2 938 } 939 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 940 941 define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) { 942 ;CHECK-LABEL: stack_fold_insertf128 943 ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 945 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 946 ret <8 x float> %2 947 } 948 949 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 950 ;CHECK-LABEL: stack_fold_insertps 951 ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 952 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 953 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 954 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 955 ret <4 x float> %2 956 } 957 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 958 959 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 960 ;CHECK-LABEL: stack_fold_maxpd 961 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 963 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 964 ret <2 x double> %2 965 } 966 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 967 968 define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) { 969 ;CHECK-LABEL: stack_fold_maxpd_ymm 970 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 971 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 972 %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) 973 ret <4 x double> %2 974 } 975 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 976 977 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 978 ;CHECK-LABEL: stack_fold_maxps 979 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 980 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 981 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 982 ret <4 x float> %2 983 } 984 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 985 986 define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) { 987 ;CHECK-LABEL: stack_fold_maxps_ymm 988 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 989 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 990 %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) 991 ret <8 x float> %2 992 } 993 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 994 995 define double @stack_fold_maxsd(double %a0, double %a1) { 996 ;CHECK-LABEL: stack_fold_maxsd 997 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 998 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 999 %2 = fcmp ogt double %a0, %a1 1000 %3 = select i1 %2, double %a0, double %a1 1001 ret double %3 1002 } 1003 1004 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 1005 ;CHECK-LABEL: stack_fold_maxsd_int 1006 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1007 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1008 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 1009 ret <2 x double> %2 1010 } 1011 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 1012 1013 define float @stack_fold_maxss(float %a0, float %a1) { 1014 ;CHECK-LABEL: stack_fold_maxss 1015 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1016 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1017 %2 = fcmp ogt float %a0, %a1 1018 %3 = select i1 %2, float %a0, float %a1 1019 ret float %3 1020 } 1021 1022 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 1023 ;CHECK-LABEL: stack_fold_maxss_int 1024 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1025 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1026 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1027 ret <4 x float> %2 1028 } 1029 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1030 1031 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 1032 ;CHECK-LABEL: stack_fold_minpd 1033 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1034 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1035 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1036 ret <2 x double> %2 1037 } 1038 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 1039 1040 define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1041 ;CHECK-LABEL: stack_fold_minpd_ymm 1042 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1043 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1044 %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) 1045 ret <4 x double> %2 1046 } 1047 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 1048 1049 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 1050 ;CHECK-LABEL: stack_fold_minps 1051 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1052 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1053 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1054 ret <4 x float> %2 1055 } 1056 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1057 1058 define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) { 1059 ;CHECK-LABEL: stack_fold_minps_ymm 1060 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1061 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1062 %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) 1063 ret <8 x float> %2 1064 } 1065 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 1066 1067 define double @stack_fold_minsd(double %a0, double %a1) { 1068 ;CHECK-LABEL: stack_fold_minsd 1069 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1070 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1071 %2 = fcmp olt double %a0, %a1 1072 %3 = select i1 %2, double %a0, double %a1 1073 ret double %3 1074 } 1075 1076 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 1077 ;CHECK-LABEL: stack_fold_minsd_int 1078 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1079 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1080 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 1081 ret <2 x double> %2 1082 } 1083 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 1084 1085 define float @stack_fold_minss(float %a0, float %a1) { 1086 ;CHECK-LABEL: stack_fold_minss 1087 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1089 %2 = fcmp olt float %a0, %a1 1090 %3 = select i1 %2, float %a0, float %a1 1091 ret float %3 1092 } 1093 1094 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 1095 ;CHECK-LABEL: stack_fold_minss_int 1096 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1097 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1098 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1099 ret <4 x float> %2 1100 } 1101 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1102 1103 define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 1104 ;CHECK-LABEL: stack_fold_movddup 1105 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1106 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1107 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1108 ret <2 x double> %2 1109 } 1110 1111 define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) { 1112 ;CHECK-LABEL: stack_fold_movddup_ymm 1113 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1114 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1115 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1116 ret <4 x double> %2 1117 } 1118 1119 ; TODO stack_fold_movhpd (load / store) 1120 ; TODO stack_fold_movhps (load / store) 1121 1122 ; TODO stack_fold_movlpd (load / store) 1123 ; TODO stack_fold_movlps (load / store) 1124 1125 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 1126 ;CHECK-LABEL: stack_fold_movshdup 1127 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1128 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1129 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1130 ret <4 x float> %2 1131 } 1132 1133 define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) { 1134 ;CHECK-LABEL: stack_fold_movshdup_ymm 1135 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1137 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1138 ret <8 x float> %2 1139 } 1140 1141 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 1142 ;CHECK-LABEL: stack_fold_movsldup 1143 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1145 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1146 ret <4 x float> %2 1147 } 1148 1149 define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) { 1150 ;CHECK-LABEL: stack_fold_movsldup_ymm 1151 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1153 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1154 ret <8 x float> %2 1155 } 1156 1157 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 1158 ;CHECK-LABEL: stack_fold_mulpd 1159 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1160 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1161 %2 = fmul <2 x double> %a0, %a1 1162 ret <2 x double> %2 1163 } 1164 1165 define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1166 ;CHECK-LABEL: stack_fold_mulpd_ymm 1167 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1168 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1169 %2 = fmul <4 x double> %a0, %a1 1170 ret <4 x double> %2 1171 } 1172 1173 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 1174 ;CHECK-LABEL: stack_fold_mulps 1175 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1176 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1177 %2 = fmul <4 x float> %a0, %a1 1178 ret <4 x float> %2 1179 } 1180 1181 define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) { 1182 ;CHECK-LABEL: stack_fold_mulps_ymm 1183 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1184 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1185 %2 = fmul <8 x float> %a0, %a1 1186 ret <8 x float> %2 1187 } 1188 1189 define double @stack_fold_mulsd(double %a0, double %a1) { 1190 ;CHECK-LABEL: stack_fold_mulsd 1191 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1192 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1193 %2 = fmul double %a0, %a1 1194 ret double %2 1195 } 1196 1197 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 1198 ;CHECK-LABEL: stack_fold_mulsd_int 1199 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1200 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1201 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 1202 ret <2 x double> %2 1203 } 1204 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 1205 1206 define float @stack_fold_mulss(float %a0, float %a1) { 1207 ;CHECK-LABEL: stack_fold_mulss 1208 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1209 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1210 %2 = fmul float %a0, %a1 1211 ret float %2 1212 } 1213 1214 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 1215 ;CHECK-LABEL: stack_fold_mulss_int 1216 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1217 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1218 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 1219 ret <4 x float> %2 1220 } 1221 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1222 1223 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 1224 ;CHECK-LABEL: stack_fold_orpd 1225 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1227 %2 = bitcast <2 x double> %a0 to <2 x i64> 1228 %3 = bitcast <2 x double> %a1 to <2 x i64> 1229 %4 = or <2 x i64> %2, %3 1230 %5 = bitcast <2 x i64> %4 to <2 x double> 1231 ; fadd forces execution domain 1232 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1233 ret <2 x double> %6 1234 } 1235 1236 define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1237 ;CHECK-LABEL: stack_fold_orpd_ymm 1238 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1239 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1240 %2 = bitcast <4 x double> %a0 to <4 x i64> 1241 %3 = bitcast <4 x double> %a1 to <4 x i64> 1242 %4 = or <4 x i64> %2, %3 1243 %5 = bitcast <4 x i64> %4 to <4 x double> 1244 ; fadd forces execution domain 1245 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1246 ret <4 x double> %6 1247 } 1248 1249 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 1250 ;CHECK-LABEL: stack_fold_orps 1251 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1253 %2 = bitcast <4 x float> %a0 to <2 x i64> 1254 %3 = bitcast <4 x float> %a1 to <2 x i64> 1255 %4 = or <2 x i64> %2, %3 1256 %5 = bitcast <2 x i64> %4 to <4 x float> 1257 ; fadd forces execution domain 1258 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1259 ret <4 x float> %6 1260 } 1261 1262 define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) { 1263 ;CHECK-LABEL: stack_fold_orps_ymm 1264 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1265 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1266 %2 = bitcast <8 x float> %a0 to <4 x i64> 1267 %3 = bitcast <8 x float> %a1 to <4 x i64> 1268 %4 = or <4 x i64> %2, %3 1269 %5 = bitcast <4 x i64> %4 to <8 x float> 1270 ; fadd forces execution domain 1271 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1272 ret <8 x float> %6 1273 } 1274 1275 define <8 x float> @stack_fold_perm2f128(<8 x float> %a0, <8 x float> %a1) { 1276 ;CHECK-LABEL: stack_fold_perm2f128 1277 ;CHECK: vperm2f128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1278 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1279 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1280 ret <8 x float> %2 1281 } 1282 1283 define <2 x double> @stack_fold_permilpd(<2 x double> %a0) { 1284 ;CHECK-LABEL: stack_fold_permilpd 1285 ;CHECK: vpermilpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1286 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1287 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1288 ret <2 x double> %2 1289 } 1290 1291 define <4 x double> @stack_fold_permilpd_ymm(<4 x double> %a0) { 1292 ;CHECK-LABEL: stack_fold_permilpd_ymm 1293 ;CHECK: vpermilpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1294 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1295 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 1296 ret <4 x double> %2 1297 } 1298 1299 define <2 x double> @stack_fold_permilpdvar(<2 x double> %a0, <2 x i64> %a1) { 1300 ;CHECK-LABEL: stack_fold_permilpdvar 1301 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1302 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1303 %2 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) 1304 ret <2 x double> %2 1305 } 1306 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 1307 1308 define <4 x double> @stack_fold_permilpdvar_ymm(<4 x double> %a0, <4 x i64> %a1) { 1309 ;CHECK-LABEL: stack_fold_permilpdvar_ymm 1310 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1312 %2 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) 1313 ret <4 x double> %2 1314 } 1315 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 1316 1317 define <4 x float> @stack_fold_permilps(<4 x float> %a0) { 1318 ;CHECK-LABEL: stack_fold_permilps 1319 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1320 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1321 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1322 ret <4 x float> %2 1323 } 1324 1325 define <8 x float> @stack_fold_permilps_ymm(<8 x float> %a0) { 1326 ;CHECK-LABEL: stack_fold_permilps_ymm 1327 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1329 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1330 ret <8 x float> %2 1331 } 1332 1333 define <4 x float> @stack_fold_permilpsvar(<4 x float> %a0, <4 x i32> %a1) { 1334 ;CHECK-LABEL: stack_fold_permilpsvar 1335 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1336 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1337 %2 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) 1338 ret <4 x float> %2 1339 } 1340 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 1341 1342 define <8 x float> @stack_fold_permilpsvar_ymm(<8 x float> %a0, <8 x i32> %a1) { 1343 ;CHECK-LABEL: stack_fold_permilpsvar_ymm 1344 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1345 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1346 %2 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) 1347 ret <8 x float> %2 1348 } 1349 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 1350 1351 ; TODO stack_fold_rcpps 1352 1353 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 1354 ;CHECK-LABEL: stack_fold_rcpps_int 1355 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1356 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1357 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1358 ret <4 x float> %2 1359 } 1360 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1361 1362 ; TODO stack_fold_rcpps_ymm 1363 1364 define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) { 1365 ;CHECK-LABEL: stack_fold_rcpps_ymm_int 1366 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1367 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1368 %2 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) 1369 ret <8 x float> %2 1370 } 1371 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 1372 1373 ; TODO stack_fold_rcpss 1374 1375 define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0) { 1376 ;CHECK-LABEL: stack_fold_rcpss_int 1377 ;CHECK: vrcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1378 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1379 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1380 ret <4 x float> %2 1381 } 1382 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1383 1384 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 1385 ;CHECK-LABEL: stack_fold_roundpd 1386 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1387 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1388 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 1389 ret <2 x double> %2 1390 } 1391 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1392 1393 define <4 x double> @stack_fold_roundpd_ymm(<4 x double> %a0) { 1394 ;CHECK-LABEL: stack_fold_roundpd_ymm 1395 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1396 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1397 %2 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) 1398 ret <4 x double> %2 1399 } 1400 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 1401 1402 define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 1403 ;CHECK-LABEL: stack_fold_roundps 1404 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1406 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 1407 ret <4 x float> %2 1408 } 1409 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1410 1411 define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) { 1412 ;CHECK-LABEL: stack_fold_roundps_ymm 1413 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1414 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1415 %2 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) 1416 ret <8 x float> %2 1417 } 1418 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 1419 1420 define double @stack_fold_roundsd(double %a0) optsize { 1421 ;CHECK-LABEL: stack_fold_roundsd 1422 ;CHECK: vroundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1424 %2 = call double @llvm.floor.f64(double %a0) 1425 ret double %2 1426 } 1427 declare double @llvm.floor.f64(double) nounwind readnone 1428 1429 ; TODO stack_fold_roundsd_int 1430 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1431 1432 define float @stack_fold_roundss(float %a0) optsize { 1433 ;CHECK-LABEL: stack_fold_roundss 1434 ;CHECK: vroundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1435 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1436 %2 = call float @llvm.floor.f32(float %a0) 1437 ret float %2 1438 } 1439 declare float @llvm.floor.f32(float) nounwind readnone 1440 1441 ; TODO stack_fold_roundss_int 1442 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1443 1444 ; TODO stack_fold_rsqrtps 1445 1446 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 1447 ;CHECK-LABEL: stack_fold_rsqrtps_int 1448 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1449 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1450 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1451 ret <4 x float> %2 1452 } 1453 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1454 1455 ; TODO stack_fold_rsqrtps_ymm 1456 1457 define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) { 1458 ;CHECK-LABEL: stack_fold_rsqrtps_ymm_int 1459 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1460 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1461 %2 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) 1462 ret <8 x float> %2 1463 } 1464 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 1465 1466 ; TODO stack_fold_rsqrtss 1467 1468 define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0) { 1469 ;CHECK-LABEL: stack_fold_rsqrtss_int 1470 ;CHECK: vrsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1471 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1472 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1473 ret <4 x float> %2 1474 } 1475 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1476 1477 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 1478 ;CHECK-LABEL: stack_fold_shufpd 1479 ;CHECK: vshufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1481 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 1482 ret <2 x double> %2 1483 } 1484 1485 define <4 x double> @stack_fold_shufpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1486 ;CHECK-LABEL: stack_fold_shufpd_ymm 1487 ;CHECK: vshufpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1488 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1489 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 1490 ret <4 x double> %2 1491 } 1492 1493 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 1494 ;CHECK-LABEL: stack_fold_shufps 1495 ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1497 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 1498 ret <4 x float> %2 1499 } 1500 1501 define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) { 1502 ;CHECK-LABEL: stack_fold_shufps_ymm 1503 ;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1504 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1505 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14> 1506 ret <8 x float> %2 1507 } 1508 1509 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 1510 ;CHECK-LABEL: stack_fold_sqrtpd 1511 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1512 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1513 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 1514 ret <2 x double> %2 1515 } 1516 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 1517 1518 define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) { 1519 ;CHECK-LABEL: stack_fold_sqrtpd_ymm 1520 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1521 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1522 %2 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) 1523 ret <4 x double> %2 1524 } 1525 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 1526 1527 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 1528 ;CHECK-LABEL: stack_fold_sqrtps 1529 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1530 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1531 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 1532 ret <4 x float> %2 1533 } 1534 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1535 1536 define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) { 1537 ;CHECK-LABEL: stack_fold_sqrtps_ymm 1538 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1540 %2 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) 1541 ret <8 x float> %2 1542 } 1543 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 1544 1545 define double @stack_fold_sqrtsd(double %a0) { 1546 ;CHECK-LABEL: stack_fold_sqrtsd 1547 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1548 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1549 %2 = call double @llvm.sqrt.f64(double %a0) 1550 ret double %2 1551 } 1552 declare double @llvm.sqrt.f64(double) nounwind readnone 1553 1554 define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0) { 1555 ;CHECK-LABEL: stack_fold_sqrtsd_int 1556 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1557 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1558 %2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) 1559 ret <2 x double> %2 1560 } 1561 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 1562 1563 define float @stack_fold_sqrtss(float %a0) { 1564 ;CHECK-LABEL: stack_fold_sqrtss 1565 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1566 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1567 %2 = call float @llvm.sqrt.f32(float %a0) 1568 ret float %2 1569 } 1570 declare float @llvm.sqrt.f32(float) nounwind readnone 1571 1572 define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0) { 1573 ;CHECK-LABEL: stack_fold_sqrtss_int 1574 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1575 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1576 %2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) 1577 ret <4 x float> %2 1578 } 1579 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1580 1581 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 1582 ;CHECK-LABEL: stack_fold_subpd 1583 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1585 %2 = fsub <2 x double> %a0, %a1 1586 ret <2 x double> %2 1587 } 1588 1589 define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1590 ;CHECK-LABEL: stack_fold_subpd_ymm 1591 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1593 %2 = fsub <4 x double> %a0, %a1 1594 ret <4 x double> %2 1595 } 1596 1597 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1598 ;CHECK-LABEL: stack_fold_subps 1599 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1600 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1601 %2 = fsub <4 x float> %a0, %a1 1602 ret <4 x float> %2 1603 } 1604 1605 define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) { 1606 ;CHECK-LABEL: stack_fold_subps_ymm 1607 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1608 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1609 %2 = fsub <8 x float> %a0, %a1 1610 ret <8 x float> %2 1611 } 1612 1613 define double @stack_fold_subsd(double %a0, double %a1) { 1614 ;CHECK-LABEL: stack_fold_subsd 1615 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1616 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1617 %2 = fsub double %a0, %a1 1618 ret double %2 1619 } 1620 1621 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1622 ;CHECK-LABEL: stack_fold_subsd_int 1623 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1624 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1625 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 1626 ret <2 x double> %2 1627 } 1628 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1629 1630 define float @stack_fold_subss(float %a0, float %a1) { 1631 ;CHECK-LABEL: stack_fold_subss 1632 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1633 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1634 %2 = fsub float %a0, %a1 1635 ret float %2 1636 } 1637 1638 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1639 ;CHECK-LABEL: stack_fold_subss_int 1640 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1641 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1642 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 1643 ret <4 x float> %2 1644 } 1645 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1646 1647 define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) { 1648 ;CHECK-LABEL: stack_fold_testpd 1649 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1650 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1651 %2 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) 1652 ret i32 %2 1653 } 1654 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 1655 1656 define i32 @stack_fold_testpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1657 ;CHECK-LABEL: stack_fold_testpd_ymm 1658 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1659 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1660 %2 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) 1661 ret i32 %2 1662 } 1663 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 1664 1665 define i32 @stack_fold_testps(<4 x float> %a0, <4 x float> %a1) { 1666 ;CHECK-LABEL: stack_fold_testps 1667 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1668 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1669 %2 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) 1670 ret i32 %2 1671 } 1672 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 1673 1674 define i32 @stack_fold_testps_ymm(<8 x float> %a0, <8 x float> %a1) { 1675 ;CHECK-LABEL: stack_fold_testps_ymm 1676 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1677 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1678 %2 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) 1679 ret i32 %2 1680 } 1681 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 1682 1683 define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1684 ;CHECK-LABEL: stack_fold_ucomisd 1685 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1686 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1687 %2 = fcmp ueq double %a0, %a1 1688 %3 = select i1 %2, i32 1, i32 -1 1689 ret i32 %3 1690 } 1691 1692 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1693 ;CHECK-LABEL: stack_fold_ucomisd_int 1694 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1695 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1696 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1697 ret i32 %2 1698 } 1699 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1700 1701 define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1702 ;CHECK-LABEL: stack_fold_ucomiss 1703 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1704 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1705 %2 = fcmp ueq float %a0, %a1 1706 %3 = select i1 %2, i32 1, i32 -1 1707 ret i32 %3 1708 } 1709 1710 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1711 ;CHECK-LABEL: stack_fold_ucomiss_int 1712 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1713 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1714 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1715 ret i32 %2 1716 } 1717 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1718 1719 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1720 ;CHECK-LABEL: stack_fold_unpckhpd 1721 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1722 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1723 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1724 ; fadd forces execution domain 1725 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1726 ret <2 x double> %3 1727 } 1728 1729 define <4 x double> @stack_fold_unpckhpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1730 ;CHECK-LABEL: stack_fold_unpckhpd_ymm 1731 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1732 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1733 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 1734 ; fadd forces execution domain 1735 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1736 ret <4 x double> %3 1737 } 1738 1739 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1740 ;CHECK-LABEL: stack_fold_unpckhps 1741 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1742 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1743 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1744 ; fadd forces execution domain 1745 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1746 ret <4 x float> %3 1747 } 1748 1749 define <8 x float> @stack_fold_unpckhps_ymm(<8 x float> %a0, <8 x float> %a1) { 1750 ;CHECK-LABEL: stack_fold_unpckhps_ymm 1751 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1753 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1754 ; fadd forces execution domain 1755 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1756 ret <8 x float> %3 1757 } 1758 1759 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1760 ;CHECK-LABEL: stack_fold_unpcklpd 1761 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1762 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1763 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1764 ; fadd forces execution domain 1765 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1766 ret <2 x double> %3 1767 } 1768 1769 define <4 x double> @stack_fold_unpcklpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1770 ;CHECK-LABEL: stack_fold_unpcklpd_ymm 1771 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1772 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1773 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1774 ; fadd forces execution domain 1775 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1776 ret <4 x double> %3 1777 } 1778 1779 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1780 ;CHECK-LABEL: stack_fold_unpcklps 1781 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1783 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1784 ; fadd forces execution domain 1785 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1786 ret <4 x float> %3 1787 } 1788 1789 define <8 x float> @stack_fold_unpcklps_ymm(<8 x float> %a0, <8 x float> %a1) { 1790 ;CHECK-LABEL: stack_fold_unpcklps_ymm 1791 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1792 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1793 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1794 ; fadd forces execution domain 1795 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1796 ret <8 x float> %3 1797 } 1798 1799 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1800 ;CHECK-LABEL: stack_fold_xorpd 1801 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1802 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1803 %2 = bitcast <2 x double> %a0 to <2 x i64> 1804 %3 = bitcast <2 x double> %a1 to <2 x i64> 1805 %4 = xor <2 x i64> %2, %3 1806 %5 = bitcast <2 x i64> %4 to <2 x double> 1807 ; fadd forces execution domain 1808 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1809 ret <2 x double> %6 1810 } 1811 1812 define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1813 ;CHECK-LABEL: stack_fold_xorpd_ymm 1814 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1815 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1816 %2 = bitcast <4 x double> %a0 to <4 x i64> 1817 %3 = bitcast <4 x double> %a1 to <4 x i64> 1818 %4 = xor <4 x i64> %2, %3 1819 %5 = bitcast <4 x i64> %4 to <4 x double> 1820 ; fadd forces execution domain 1821 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1822 ret <4 x double> %6 1823 } 1824 1825 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1826 ;CHECK-LABEL: stack_fold_xorps 1827 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1828 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1829 %2 = bitcast <4 x float> %a0 to <2 x i64> 1830 %3 = bitcast <4 x float> %a1 to <2 x i64> 1831 %4 = xor <2 x i64> %2, %3 1832 %5 = bitcast <2 x i64> %4 to <4 x float> 1833 ; fadd forces execution domain 1834 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1835 ret <4 x float> %6 1836 } 1837 1838 define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { 1839 ;CHECK-LABEL: stack_fold_xorps_ymm 1840 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1842 %2 = bitcast <8 x float> %a0 to <4 x i64> 1843 %3 = bitcast <8 x float> %a1 to <4 x i64> 1844 %4 = xor <4 x i64> %2, %3 1845 %5 = bitcast <4 x i64> %4 to <8 x float> 1846 ; fadd forces execution domain 1847 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1848 ret <8 x float> %6 1849 } 1850