1 ; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17 } 18 19 define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) { 20 ;CHECK-LABEL: stack_fold_addpd_ymm 21 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x double> %a0, %a1 24 ret <4 x double> %2 25 } 26 27 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 28 ;CHECK-LABEL: stack_fold_addps 29 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd <4 x float> %a0, %a1 32 ret <4 x float> %2 33 } 34 35 define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) { 36 ;CHECK-LABEL: stack_fold_addps_ymm 37 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = fadd <8 x float> %a0, %a1 40 ret <8 x float> %2 41 } 42 43 define double @stack_fold_addsd(double %a0, double %a1) { 44 ;CHECK-LABEL: stack_fold_addsd 45 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 46 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 47 %2 = fadd double %a0, %a1 48 ret double %2 49 } 50 51 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 52 ;CHECK-LABEL: stack_fold_addsd_int 53 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 54 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 55 %2 = extractelement <2 x double> %a0, i32 0 56 %3 = extractelement <2 x double> %a1, i32 0 57 %4 = fadd double %2, %3 58 %5 = insertelement <2 x double> %a0, double %4, i32 0 59 ret <2 x double> %5 60 } 61 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 62 63 define float @stack_fold_addss(float %a0, float %a1) { 64 ;CHECK-LABEL: stack_fold_addss 65 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 66 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 67 %2 = fadd float %a0, %a1 68 ret float %2 69 } 70 71 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 72 ;CHECK-LABEL: stack_fold_addss_int 73 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 74 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 75 %2 = extractelement <4 x float> %a0, i32 0 76 %3 = extractelement <4 x float> %a1, i32 0 77 %4 = fadd float %2, %3 78 %5 = insertelement <4 x float> %a0, float %4, i32 0 79 ret <4 x float> %5 80 } 81 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 82 83 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 84 ;CHECK-LABEL: stack_fold_addsubpd 85 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 86 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 87 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 88 ret <2 x double> %2 89 } 90 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 91 92 define <4 x double> @stack_fold_addsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 93 ;CHECK-LABEL: stack_fold_addsubpd_ymm 94 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 95 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 96 %2 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) 97 ret <4 x double> %2 98 } 99 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 100 101 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 102 ;CHECK-LABEL: stack_fold_addsubps 103 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 104 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 105 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 106 ret <4 x float> %2 107 } 108 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 109 110 define <8 x float> @stack_fold_addsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 111 ;CHECK-LABEL: stack_fold_addsubps_ymm 112 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 113 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 114 %2 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) 115 ret <8 x float> %2 116 } 117 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 118 119 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 120 ;CHECK-LABEL: stack_fold_andnpd 121 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 122 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 123 %2 = bitcast <2 x double> %a0 to <2 x i64> 124 %3 = bitcast <2 x double> %a1 to <2 x i64> 125 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 126 %5 = and <2 x i64> %4, %3 127 %6 = bitcast <2 x i64> %5 to <2 x double> 128 ; fadd forces execution domain 129 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 130 ret <2 x double> %7 131 } 132 133 define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) { 134 ;CHECK-LABEL: stack_fold_andnpd_ymm 135 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 137 %2 = bitcast <4 x double> %a0 to <4 x i64> 138 %3 = bitcast <4 x double> %a1 to <4 x i64> 139 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 140 %5 = and <4 x i64> %4, %3 141 %6 = bitcast <4 x i64> %5 to <4 x double> 142 ; fadd forces execution domain 143 %7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0> 144 ret <4 x double> %7 145 } 146 147 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 148 ;CHECK-LABEL: stack_fold_andnps 149 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 150 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 151 %2 = bitcast <4 x float> %a0 to <2 x i64> 152 %3 = bitcast <4 x float> %a1 to <2 x i64> 153 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 154 %5 = and <2 x i64> %4, %3 155 %6 = bitcast <2 x i64> %5 to <4 x float> 156 ; fadd forces execution domain 157 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 158 ret <4 x float> %7 159 } 160 161 define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) { 162 ;CHECK-LABEL: stack_fold_andnps_ymm 163 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 164 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 165 %2 = bitcast <8 x float> %a0 to <4 x i64> 166 %3 = bitcast <8 x float> %a1 to <4 x i64> 167 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 168 %5 = and <4 x i64> %4, %3 169 %6 = bitcast <4 x i64> %5 to <8 x float> 170 ; fadd forces execution domain 171 %7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 172 ret <8 x float> %7 173 } 174 175 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 176 ;CHECK-LABEL: stack_fold_andpd 177 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 178 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 179 %2 = bitcast <2 x double> %a0 to <2 x i64> 180 %3 = bitcast <2 x double> %a1 to <2 x i64> 181 %4 = and <2 x i64> %2, %3 182 %5 = bitcast <2 x i64> %4 to <2 x double> 183 ; fadd forces execution domain 184 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 185 ret <2 x double> %6 186 } 187 188 define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) { 189 ;CHECK-LABEL: stack_fold_andpd_ymm 190 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 191 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 192 %2 = bitcast <4 x double> %a0 to <4 x i64> 193 %3 = bitcast <4 x double> %a1 to <4 x i64> 194 %4 = and <4 x i64> %2, %3 195 %5 = bitcast <4 x i64> %4 to <4 x double> 196 ; fadd forces execution domain 197 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 198 ret <4 x double> %6 199 } 200 201 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 202 ;CHECK-LABEL: stack_fold_andps 203 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 204 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 205 %2 = bitcast <4 x float> %a0 to <2 x i64> 206 %3 = bitcast <4 x float> %a1 to <2 x i64> 207 %4 = and <2 x i64> %2, %3 208 %5 = bitcast <2 x i64> %4 to <4 x float> 209 ; fadd forces execution domain 210 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 211 ret <4 x float> %6 212 } 213 214 define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) { 215 ;CHECK-LABEL: stack_fold_andps_ymm 216 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 217 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 218 %2 = bitcast <8 x float> %a0 to <4 x i64> 219 %3 = bitcast <8 x float> %a1 to <4 x i64> 220 %4 = and <4 x i64> %2, %3 221 %5 = bitcast <4 x i64> %4 to <8 x float> 222 ; fadd forces execution domain 223 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 224 ret <8 x float> %6 225 } 226 227 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 228 ;CHECK-LABEL: stack_fold_blendpd 229 ;CHECK: vblendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 230 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 231 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 232 ; fadd forces execution domain 233 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 234 ret <2 x double> %3 235 } 236 237 define <4 x double> @stack_fold_blendpd_ymm(<4 x double> %a0, <4 x double> %a1) { 238 ;CHECK-LABEL: stack_fold_blendpd_ymm 239 ;CHECK: vblendpd $6, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 240 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 241 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x double> %a0, <4 x double> %a1 242 ; fadd forces execution domain 243 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 244 ret <4 x double> %3} 245 246 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 247 ;CHECK-LABEL: stack_fold_blendps 248 ;CHECK: vblendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 249 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 250 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 251 ; fadd forces execution domain 252 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 253 ret <4 x float> %3 254 } 255 256 define <8 x float> @stack_fold_blendps_ymm(<8 x float> %a0, <8 x float> %a1) { 257 ;CHECK-LABEL: stack_fold_blendps_ymm 258 ;CHECK: vblendps $102, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 259 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 260 %2 = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %a0, <8 x float> %a1 261 ; fadd forces execution domain 262 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 263 ret <8 x float> %3 264 } 265 266 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 267 ;CHECK-LABEL: stack_fold_blendvpd 268 ;CHECK: vblendvpd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 269 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 270 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 271 ret <2 x double> %2 272 } 273 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 274 275 define <4 x double> @stack_fold_blendvpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %c) { 276 ;CHECK-LABEL: stack_fold_blendvpd_ymm 277 ;CHECK: vblendvpd {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 278 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 279 %2 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a1, <4 x double> %c, <4 x double> %a0) 280 ret <4 x double> %2 281 } 282 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 283 284 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 285 ;CHECK-LABEL: stack_fold_blendvps 286 ;CHECK: vblendvps {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 287 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 288 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 289 ret <4 x float> %2 290 } 291 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 292 293 define <8 x float> @stack_fold_blendvps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %c) { 294 ;CHECK-LABEL: stack_fold_blendvps_ymm 295 ;CHECK: vblendvps {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 296 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 297 %2 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a1, <8 x float> %c, <8 x float> %a0) 298 ret <8 x float> %2 299 } 300 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 301 302 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 303 ;CHECK-LABEL: stack_fold_cmppd 304 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 305 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 306 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 307 ret <2 x double> %2 308 } 309 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 310 311 define <4 x double> @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) { 312 ;CHECK-LABEL: stack_fold_cmppd_ymm 313 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 314 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 315 %2 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) 316 ret <4 x double> %2 317 } 318 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 319 320 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 321 ;CHECK-LABEL: stack_fold_cmpps 322 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 323 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 324 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 325 ret <4 x float> %2 326 } 327 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 328 329 define <8 x float> @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { 330 ;CHECK-LABEL: stack_fold_cmpps_ymm 331 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 332 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 333 %2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) 334 ret <8 x float> %2 335 } 336 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 337 338 define i32 @stack_fold_cmpsd(double %a0, double %a1) { 339 ;CHECK-LABEL: stack_fold_cmpsd 340 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 341 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 342 %2 = fcmp oeq double %a0, %a1 343 %3 = zext i1 %2 to i32 344 ret i32 %3 345 } 346 347 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 348 ;CHECK-LABEL: stack_fold_cmpsd_int 349 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 350 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 351 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 352 ret <2 x double> %2 353 } 354 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 355 356 define i32 @stack_fold_cmpss(float %a0, float %a1) { 357 ;CHECK-LABEL: stack_fold_cmpss 358 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 359 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 360 %2 = fcmp oeq float %a0, %a1 361 %3 = zext i1 %2 to i32 362 ret i32 %3 363 } 364 365 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 366 ;CHECK-LABEL: stack_fold_cmpss_int 367 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 368 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 369 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 370 ret <4 x float> %2 371 } 372 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 373 374 ; TODO stack_fold_comisd 375 376 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 377 ;CHECK-LABEL: stack_fold_comisd_int 378 ;CHECK: vcomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 380 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 381 ret i32 %2 382 } 383 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 384 385 ; TODO stack_fold_comiss 386 387 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 388 ;CHECK-LABEL: stack_fold_comiss_int 389 ;CHECK: vcomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 390 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 391 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 392 ret i32 %2 393 } 394 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 395 396 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 397 ;CHECK-LABEL: stack_fold_cvtdq2pd 398 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 399 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 400 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 401 %3 = sitofp <2 x i32> %2 to <2 x double> 402 ret <2 x double> %3 403 } 404 define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) { 405 ;CHECK-LABEL: stack_fold_cvtdq2pd_int 406 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 407 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 408 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1> 409 %cvt = sitofp <2 x i32> %2 to <2 x double> 410 ret <2 x double> %cvt 411 } 412 413 define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) { 414 ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm 415 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 416 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 417 %2 = sitofp <4 x i32> %a0 to <4 x double> 418 ret <4 x double> %2 419 } 420 421 define <4 x double> @stack_fold_cvtdq2pd_ymm_int(<4 x i32> %a0) { 422 ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm_int 423 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 424 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 425 %cvt = sitofp <4 x i32> %a0 to <4 x double> 426 ret <4 x double> %cvt 427 } 428 429 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 430 ;CHECK-LABEL: stack_fold_cvtdq2ps 431 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 432 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 433 %2 = sitofp <4 x i32> %a0 to <4 x float> 434 ret <4 x float> %2 435 } 436 437 define <8 x float> @stack_fold_cvtdq2ps_ymm(<8 x i32> %a0) { 438 ;CHECK-LABEL: stack_fold_cvtdq2ps_ymm 439 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 440 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 441 %2 = sitofp <8 x i32> %a0 to <8 x float> 442 ret <8 x float> %2 443 } 444 445 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 446 ;CHECK-LABEL: stack_fold_cvtpd2dq 447 ;CHECK: vcvtpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 448 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 449 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 450 ret <4 x i32> %2 451 } 452 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 453 454 define <4 x i32> @stack_fold_cvtpd2dq_ymm(<4 x double> %a0) { 455 ;CHECK-LABEL: stack_fold_cvtpd2dq_ymm 456 ;CHECK: vcvtpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 457 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 458 %2 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) 459 ret <4 x i32> %2 460 } 461 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 462 463 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 464 ;CHECK-LABEL: stack_fold_cvtpd2ps 465 ;CHECK: vcvtpd2psx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 466 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 467 %2 = fptrunc <2 x double> %a0 to <2 x float> 468 ret <2 x float> %2 469 } 470 471 define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) { 472 ;CHECK-LABEL: stack_fold_cvtpd2ps_ymm 473 ;CHECK: vcvtpd2psy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 474 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 475 %2 = fptrunc <4 x double> %a0 to <4 x float> 476 ret <4 x float> %2 477 } 478 479 define <4 x float> @stack_fold_cvtph2ps(<8 x i16> %a0) { 480 ;CHECK-LABEL: stack_fold_cvtph2ps 481 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 482 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 483 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) 484 ret <4 x float> %2 485 } 486 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly 487 488 define <8 x float> @stack_fold_cvtph2ps_ymm(<8 x i16> %a0) { 489 ;CHECK-LABEL: stack_fold_cvtph2ps_ymm 490 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 491 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 492 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) 493 ret <8 x float> %2 494 } 495 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly 496 497 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 498 ;CHECK-LABEL: stack_fold_cvtps2dq 499 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 500 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 501 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 502 ret <4 x i32> %2 503 } 504 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 505 506 define <8 x i32> @stack_fold_cvtps2dq_ymm(<8 x float> %a0) { 507 ;CHECK-LABEL: stack_fold_cvtps2dq_ymm 508 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 509 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 510 %2 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) 511 ret <8 x i32> %2 512 } 513 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 514 515 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 516 ;CHECK-LABEL: stack_fold_cvtps2pd 517 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 518 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 519 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> 520 %3 = fpext <2 x float> %2 to <2 x double> 521 ret <2 x double> %3 522 } 523 524 define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) { 525 ;CHECK-LABEL: stack_fold_cvtps2pd_int 526 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 527 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 528 %2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> 529 %cvtps2pd = fpext <2 x float> %2 to <2 x double> 530 ret <2 x double> %cvtps2pd 531 } 532 533 define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) { 534 ;CHECK-LABEL: stack_fold_cvtps2pd_ymm 535 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 536 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 537 %2 = fpext <4 x float> %a0 to <4 x double> 538 ret <4 x double> %2 539 } 540 541 define <4 x double> @stack_fold_cvtps2pd_ymm_int(<4 x float> %a0) { 542 ;CHECK-LABEL: stack_fold_cvtps2pd_ymm_int 543 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 544 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 545 %cvtps2pd = fpext <4 x float> %a0 to <4 x double> 546 ret <4 x double> %cvtps2pd 547 } 548 549 define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) { 550 ;CHECK-LABEL: stack_fold_cvtps2ph_ymm 551 ;CHECK: vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 552 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) 553 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 554 ret <8 x i16> %1 555 } 556 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly 557 558 ; TODO stack_fold_cvtsd2si 559 560 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 561 ;CHECK-LABEL: stack_fold_cvtsd2si_int 562 ;CHECK: vcvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 563 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 564 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 565 ret i32 %2 566 } 567 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 568 569 ; TODO stack_fold_cvtsd2si64 570 571 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 572 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 573 ;CHECK: vcvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 574 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 575 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 576 ret i64 %2 577 } 578 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 579 580 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 581 define double @stack_fold_cvtsi2sd(i32 %a0) optsize { 582 ;CHECK-LABEL: stack_fold_cvtsi2sd 583 ;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 585 %2 = sitofp i32 %a0 to double 586 ret double %2 587 } 588 589 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 590 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) optsize { 591 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 592 ;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 593 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 594 %2 = sitofp i32 %a0 to double 595 %3 = insertelement <2 x double> zeroinitializer, double %2, i64 0 596 ret <2 x double> %3 597 } 598 599 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 600 define double @stack_fold_cvtsi642sd(i64 %a0) optsize { 601 ;CHECK-LABEL: stack_fold_cvtsi642sd 602 ;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 603 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 604 %2 = sitofp i64 %a0 to double 605 ret double %2 606 } 607 608 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 609 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) optsize { 610 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 611 ;CHECK: vcvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 612 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 613 %2 = sitofp i64 %a0 to double 614 %3 = insertelement <2 x double> zeroinitializer, double %2, i64 0 615 ret <2 x double> %3 616 } 617 618 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 619 define float @stack_fold_cvtsi2ss(i32 %a0) optsize { 620 ;CHECK-LABEL: stack_fold_cvtsi2ss 621 ;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 622 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 623 %2 = sitofp i32 %a0 to float 624 ret float %2 625 } 626 627 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 628 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) optsize { 629 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 630 ;CHECK: vcvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 631 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 632 %2 = sitofp i32 %a0 to float 633 %3 = insertelement <4 x float> zeroinitializer, float %2, i64 0 634 ret <4 x float> %3 635 } 636 637 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 638 define float @stack_fold_cvtsi642ss(i64 %a0) optsize { 639 ;CHECK-LABEL: stack_fold_cvtsi642ss 640 ;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 641 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 642 %2 = sitofp i64 %a0 to float 643 ret float %2 644 } 645 646 ; TODO: This fold shouldn't require optsize. Not folding doesn't prevent reading an undef register since the registers are a mix of XMM and GPR. 647 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) optsize { 648 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 649 ;CHECK: vcvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 650 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 651 %2 = sitofp i64 %a0 to float 652 %3 = insertelement <4 x float> zeroinitializer, float %2, i64 0 653 ret <4 x float> %3 654 } 655 656 ; TODO stack_fold_cvtss2si 657 658 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 659 ;CHECK-LABEL: stack_fold_cvtss2si_int 660 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 661 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 662 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 663 ret i32 %2 664 } 665 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 666 667 ; TODO stack_fold_cvtss2si64 668 669 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 670 ;CHECK-LABEL: stack_fold_cvtss2si64_int 671 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 672 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 673 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 674 ret i64 %2 675 } 676 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 677 678 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 679 ;CHECK-LABEL: stack_fold_cvttpd2dq 680 ;CHECK: vcvttpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 681 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 682 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 683 ret <4 x i32> %2 684 } 685 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 686 687 define <4 x i32> @stack_fold_cvttpd2dq_ymm(<4 x double> %a0) { 688 ;CHECK-LABEL: stack_fold_cvttpd2dq_ymm 689 ;CHECK: vcvttpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 690 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 691 %2 = fptosi <4 x double> %a0 to <4 x i32> 692 ret <4 x i32> %2 693 } 694 695 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 696 ;CHECK-LABEL: stack_fold_cvttps2dq 697 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 698 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 699 %2 = fptosi <4 x float> %a0 to <4 x i32> 700 ret <4 x i32> %2 701 } 702 703 define <8 x i32> @stack_fold_cvttps2dq_ymm(<8 x float> %a0) { 704 ;CHECK-LABEL: stack_fold_cvttps2dq_ymm 705 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 706 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 707 %2 = fptosi <8 x float> %a0 to <8 x i32> 708 ret <8 x i32> %2 709 } 710 711 define i32 @stack_fold_cvttsd2si(double %a0) { 712 ;CHECK-LABEL: stack_fold_cvttsd2si 713 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 714 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 715 %2 = fptosi double %a0 to i32 716 ret i32 %2 717 } 718 719 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 720 ;CHECK-LABEL: stack_fold_cvttsd2si_int 721 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 722 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 723 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 724 ret i32 %2 725 } 726 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 727 728 define i64 @stack_fold_cvttsd2si64(double %a0) { 729 ;CHECK-LABEL: stack_fold_cvttsd2si64 730 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 731 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 732 %2 = fptosi double %a0 to i64 733 ret i64 %2 734 } 735 736 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 737 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 738 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 739 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 740 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 741 ret i64 %2 742 } 743 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 744 745 define i32 @stack_fold_cvttss2si(float %a0) { 746 ;CHECK-LABEL: stack_fold_cvttss2si 747 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 748 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 749 %2 = fptosi float %a0 to i32 750 ret i32 %2 751 } 752 753 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 754 ;CHECK-LABEL: stack_fold_cvttss2si_int 755 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 756 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 757 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 758 ret i32 %2 759 } 760 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 761 762 define i64 @stack_fold_cvttss2si64(float %a0) { 763 ;CHECK-LABEL: stack_fold_cvttss2si64 764 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 765 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 766 %2 = fptosi float %a0 to i64 767 ret i64 %2 768 } 769 770 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 771 ;CHECK-LABEL: stack_fold_cvttss2si64_int 772 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 773 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 774 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 775 ret i64 %2 776 } 777 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 778 779 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 780 ;CHECK-LABEL: stack_fold_divpd 781 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 783 %2 = fdiv <2 x double> %a0, %a1 784 ret <2 x double> %2 785 } 786 787 define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) { 788 ;CHECK-LABEL: stack_fold_divpd_ymm 789 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 790 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 791 %2 = fdiv <4 x double> %a0, %a1 792 ret <4 x double> %2 793 } 794 795 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 796 ;CHECK-LABEL: stack_fold_divps 797 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 798 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 799 %2 = fdiv <4 x float> %a0, %a1 800 ret <4 x float> %2 801 } 802 803 define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) { 804 ;CHECK-LABEL: stack_fold_divps_ymm 805 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 806 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 807 %2 = fdiv <8 x float> %a0, %a1 808 ret <8 x float> %2 809 } 810 811 define double @stack_fold_divsd(double %a0, double %a1) { 812 ;CHECK-LABEL: stack_fold_divsd 813 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 814 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 815 %2 = fdiv double %a0, %a1 816 ret double %2 817 } 818 819 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 820 ;CHECK-LABEL: stack_fold_divsd_int 821 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 822 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 823 %2 = extractelement <2 x double> %a0, i32 0 824 %3 = extractelement <2 x double> %a1, i32 0 825 %4 = fdiv double %2, %3 826 %5 = insertelement <2 x double> %a0, double %4, i32 0 827 ret <2 x double> %5 828 } 829 830 define float @stack_fold_divss(float %a0, float %a1) { 831 ;CHECK-LABEL: stack_fold_divss 832 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 833 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 834 %2 = fdiv float %a0, %a1 835 ret float %2 836 } 837 838 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 839 ;CHECK-LABEL: stack_fold_divss_int 840 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 842 %2 = extractelement <4 x float> %a0, i32 0 843 %3 = extractelement <4 x float> %a1, i32 0 844 %4 = fdiv float %2, %3 845 %5 = insertelement <4 x float> %a0, float %4, i32 0 846 ret <4 x float> %5 847 } 848 849 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 850 ;CHECK-LABEL: stack_fold_dppd 851 ;CHECK: vdppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 852 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 853 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 854 ret <2 x double> %2 855 } 856 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 857 858 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 859 ;CHECK-LABEL: stack_fold_dpps 860 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 861 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 862 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 863 ret <4 x float> %2 864 } 865 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 866 867 define <8 x float> @stack_fold_dpps_ymm(<8 x float> %a0, <8 x float> %a1) { 868 ;CHECK-LABEL: stack_fold_dpps_ymm 869 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 870 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 871 %2 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) 872 ret <8 x float> %2 873 } 874 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 875 876 define <4 x float> @stack_fold_extractf128(<8 x float> %a0, <8 x float> %a1) { 877 ;CHECK-LABEL: stack_fold_extractf128 878 ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 879 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 880 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 881 ret <4 x float> %1 882 } 883 884 define i32 @stack_fold_extractps(<4 x float> %a0) { 885 ;CHECK-LABEL: stack_fold_extractps 886 ;CHECK: vextractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 887 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 888 ; fadd forces execution domain 889 %1 = fadd <4 x float> %a0, <float 1.0, float 2.0, float 3.0, float 4.0> 890 %2 = extractelement <4 x float> %1, i32 1 891 %3 = bitcast float %2 to i32 892 %4 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 893 ret i32 %3 894 } 895 896 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 897 ;CHECK-LABEL: stack_fold_haddpd 898 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 899 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 900 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 901 ret <2 x double> %2 902 } 903 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 904 905 define <4 x double> @stack_fold_haddpd_ymm(<4 x double> %a0, <4 x double> %a1) { 906 ;CHECK-LABEL: stack_fold_haddpd_ymm 907 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 908 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 909 %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) 910 ret <4 x double> %2 911 } 912 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 913 914 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 915 ;CHECK-LABEL: stack_fold_haddps 916 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 917 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 918 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 919 ret <4 x float> %2 920 } 921 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 922 923 define <8 x float> @stack_fold_haddps_ymm(<8 x float> %a0, <8 x float> %a1) { 924 ;CHECK-LABEL: stack_fold_haddps_ymm 925 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 926 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 927 %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) 928 ret <8 x float> %2 929 } 930 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 931 932 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 933 ;CHECK-LABEL: stack_fold_hsubpd 934 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 935 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 936 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 937 ret <2 x double> %2 938 } 939 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 940 941 define <4 x double> @stack_fold_hsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 942 ;CHECK-LABEL: stack_fold_hsubpd_ymm 943 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 945 %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) 946 ret <4 x double> %2 947 } 948 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 949 950 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 951 ;CHECK-LABEL: stack_fold_hsubps 952 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 953 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 954 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 955 ret <4 x float> %2 956 } 957 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 958 959 define <8 x float> @stack_fold_hsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 960 ;CHECK-LABEL: stack_fold_hsubps_ymm 961 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 963 %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) 964 ret <8 x float> %2 965 } 966 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 967 968 define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) { 969 ;CHECK-LABEL: stack_fold_insertf128 970 ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 971 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 972 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 973 ret <8 x float> %2 974 } 975 976 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 977 ;CHECK-LABEL: stack_fold_insertps 978 ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 979 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 980 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 981 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 982 ret <4 x float> %2 983 } 984 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 985 986 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { 987 ;CHECK-LABEL: stack_fold_maxpd 988 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 989 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 990 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 991 ret <2 x double> %2 992 } 993 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 994 995 define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { 996 ;CHECK-LABEL: stack_fold_maxpd_commutable 997 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 998 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 999 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 1000 ret <2 x double> %2 1001 } 1002 1003 define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 { 1004 ;CHECK-LABEL: stack_fold_maxpd_ymm 1005 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1006 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1007 %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) 1008 ret <4 x double> %2 1009 } 1010 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 1011 1012 define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 { 1013 ;CHECK-LABEL: stack_fold_maxpd_ymm_commutable 1014 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1015 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1016 %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) 1017 ret <4 x double> %2 1018 } 1019 1020 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 { 1021 ;CHECK-LABEL: stack_fold_maxps 1022 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1023 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1024 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1025 ret <4 x float> %2 1026 } 1027 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1028 1029 define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { 1030 ;CHECK-LABEL: stack_fold_maxps_commutable 1031 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1032 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1033 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1034 ret <4 x float> %2 1035 } 1036 1037 define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 { 1038 ;CHECK-LABEL: stack_fold_maxps_ymm 1039 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1040 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1041 %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) 1042 ret <8 x float> %2 1043 } 1044 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 1045 1046 define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 { 1047 ;CHECK-LABEL: stack_fold_maxps_ymm_commutable 1048 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1049 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1050 %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) 1051 ret <8 x float> %2 1052 } 1053 1054 define double @stack_fold_maxsd(double %a0, double %a1) #0 { 1055 ;CHECK-LABEL: stack_fold_maxsd 1056 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1057 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1058 %2 = fcmp ogt double %a0, %a1 1059 %3 = select i1 %2, double %a0, double %a1 1060 ret double %3 1061 } 1062 1063 define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 { 1064 ;CHECK-LABEL: stack_fold_maxsd_commutable 1065 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1066 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1067 %2 = fcmp ogt double %a0, %a1 1068 %3 = select i1 %2, double %a0, double %a1 1069 ret double %3 1070 } 1071 1072 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 { 1073 ;CHECK-LABEL: stack_fold_maxsd_int 1074 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1075 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1076 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 1077 ret <2 x double> %2 1078 } 1079 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 1080 1081 define float @stack_fold_maxss(float %a0, float %a1) #0 { 1082 ;CHECK-LABEL: stack_fold_maxss 1083 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1084 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1085 %2 = fcmp ogt float %a0, %a1 1086 %3 = select i1 %2, float %a0, float %a1 1087 ret float %3 1088 } 1089 1090 define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 { 1091 ;CHECK-LABEL: stack_fold_maxss_commutable 1092 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1093 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1094 %2 = fcmp ogt float %a0, %a1 1095 %3 = select i1 %2, float %a0, float %a1 1096 ret float %3 1097 } 1098 1099 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 { 1100 ;CHECK-LABEL: stack_fold_maxss_int 1101 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1102 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1103 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1104 ret <4 x float> %2 1105 } 1106 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1107 1108 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 { 1109 ;CHECK-LABEL: stack_fold_minpd 1110 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1111 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1112 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1113 ret <2 x double> %2 1114 } 1115 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 1116 1117 define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { 1118 ;CHECK-LABEL: stack_fold_minpd_commutable 1119 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1120 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1121 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1122 ret <2 x double> %2 1123 } 1124 1125 define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 { 1126 ;CHECK-LABEL: stack_fold_minpd_ymm 1127 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1128 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1129 %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) 1130 ret <4 x double> %2 1131 } 1132 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 1133 1134 define <4 x double> @stack_fold_minpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 { 1135 ;CHECK-LABEL: stack_fold_minpd_ymm_commutable 1136 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1137 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1138 %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) 1139 ret <4 x double> %2 1140 } 1141 1142 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 { 1143 ;CHECK-LABEL: stack_fold_minps 1144 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1145 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1146 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1147 ret <4 x float> %2 1148 } 1149 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1150 1151 define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { 1152 ;CHECK-LABEL: stack_fold_minps_commutable 1153 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1154 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1155 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1156 ret <4 x float> %2 1157 } 1158 1159 define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 { 1160 ;CHECK-LABEL: stack_fold_minps_ymm 1161 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1162 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1163 %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) 1164 ret <8 x float> %2 1165 } 1166 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 1167 1168 define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 { 1169 ;CHECK-LABEL: stack_fold_minps_ymm_commutable 1170 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1171 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1172 %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) 1173 ret <8 x float> %2 1174 } 1175 1176 define double @stack_fold_minsd(double %a0, double %a1) #0 { 1177 ;CHECK-LABEL: stack_fold_minsd 1178 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1180 %2 = fcmp olt double %a0, %a1 1181 %3 = select i1 %2, double %a0, double %a1 1182 ret double %3 1183 } 1184 1185 define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 { 1186 ;CHECK-LABEL: stack_fold_minsd_commutable 1187 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1189 %2 = fcmp olt double %a0, %a1 1190 %3 = select i1 %2, double %a0, double %a1 1191 ret double %3 1192 } 1193 1194 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 1195 ;CHECK-LABEL: stack_fold_minsd_int 1196 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1197 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1198 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 1199 ret <2 x double> %2 1200 } 1201 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 1202 1203 define float @stack_fold_minss(float %a0, float %a1) #0 { 1204 ;CHECK-LABEL: stack_fold_minss 1205 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1207 %2 = fcmp olt float %a0, %a1 1208 %3 = select i1 %2, float %a0, float %a1 1209 ret float %3 1210 } 1211 1212 define float @stack_fold_minss_commutable(float %a0, float %a1) #1 { 1213 ;CHECK-LABEL: stack_fold_minss_commutable 1214 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1216 %2 = fcmp olt float %a0, %a1 1217 %3 = select i1 %2, float %a0, float %a1 1218 ret float %3 1219 } 1220 1221 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 { 1222 ;CHECK-LABEL: stack_fold_minss_int 1223 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1224 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1225 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1226 ret <4 x float> %2 1227 } 1228 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1229 1230 define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 1231 ;CHECK-LABEL: stack_fold_movddup 1232 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1233 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1234 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1235 ret <2 x double> %2 1236 } 1237 1238 define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) { 1239 ;CHECK-LABEL: stack_fold_movddup_ymm 1240 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1241 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1242 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1243 ret <4 x double> %2 1244 } 1245 1246 ; TODO stack_fold_movhpd (load / store) 1247 ; TODO stack_fold_movhps (load / store) 1248 1249 ; TODO stack_fold_movlpd (load / store) 1250 ; TODO stack_fold_movlps (load / store) 1251 1252 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 1253 ;CHECK-LABEL: stack_fold_movshdup 1254 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1255 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1256 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1257 ret <4 x float> %2 1258 } 1259 1260 define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) { 1261 ;CHECK-LABEL: stack_fold_movshdup_ymm 1262 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1264 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1265 ret <8 x float> %2 1266 } 1267 1268 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 1269 ;CHECK-LABEL: stack_fold_movsldup 1270 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1271 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1272 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1273 ret <4 x float> %2 1274 } 1275 1276 define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) { 1277 ;CHECK-LABEL: stack_fold_movsldup_ymm 1278 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1279 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1280 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1281 ret <8 x float> %2 1282 } 1283 1284 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 1285 ;CHECK-LABEL: stack_fold_mulpd 1286 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1287 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1288 %2 = fmul <2 x double> %a0, %a1 1289 ret <2 x double> %2 1290 } 1291 1292 define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1293 ;CHECK-LABEL: stack_fold_mulpd_ymm 1294 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1295 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1296 %2 = fmul <4 x double> %a0, %a1 1297 ret <4 x double> %2 1298 } 1299 1300 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 1301 ;CHECK-LABEL: stack_fold_mulps 1302 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1303 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1304 %2 = fmul <4 x float> %a0, %a1 1305 ret <4 x float> %2 1306 } 1307 1308 define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) { 1309 ;CHECK-LABEL: stack_fold_mulps_ymm 1310 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1312 %2 = fmul <8 x float> %a0, %a1 1313 ret <8 x float> %2 1314 } 1315 1316 define double @stack_fold_mulsd(double %a0, double %a1) { 1317 ;CHECK-LABEL: stack_fold_mulsd 1318 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1319 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1320 %2 = fmul double %a0, %a1 1321 ret double %2 1322 } 1323 1324 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 1325 ;CHECK-LABEL: stack_fold_mulsd_int 1326 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1327 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1328 %2 = extractelement <2 x double> %a0, i32 0 1329 %3 = extractelement <2 x double> %a1, i32 0 1330 %4 = fmul double %2, %3 1331 %5 = insertelement <2 x double> %a0, double %4, i32 0 1332 ret <2 x double> %5 1333 } 1334 1335 define float @stack_fold_mulss(float %a0, float %a1) { 1336 ;CHECK-LABEL: stack_fold_mulss 1337 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1338 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1339 %2 = fmul float %a0, %a1 1340 ret float %2 1341 } 1342 1343 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 1344 ;CHECK-LABEL: stack_fold_mulss_int 1345 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1347 %2 = extractelement <4 x float> %a0, i32 0 1348 %3 = extractelement <4 x float> %a1, i32 0 1349 %4 = fmul float %2, %3 1350 %5 = insertelement <4 x float> %a0, float %4, i32 0 1351 ret <4 x float> %5 1352 } 1353 1354 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 1355 ;CHECK-LABEL: stack_fold_orpd 1356 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1357 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1358 %2 = bitcast <2 x double> %a0 to <2 x i64> 1359 %3 = bitcast <2 x double> %a1 to <2 x i64> 1360 %4 = or <2 x i64> %2, %3 1361 %5 = bitcast <2 x i64> %4 to <2 x double> 1362 ; fadd forces execution domain 1363 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1364 ret <2 x double> %6 1365 } 1366 1367 define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1368 ;CHECK-LABEL: stack_fold_orpd_ymm 1369 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1370 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1371 %2 = bitcast <4 x double> %a0 to <4 x i64> 1372 %3 = bitcast <4 x double> %a1 to <4 x i64> 1373 %4 = or <4 x i64> %2, %3 1374 %5 = bitcast <4 x i64> %4 to <4 x double> 1375 ; fadd forces execution domain 1376 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1377 ret <4 x double> %6 1378 } 1379 1380 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 1381 ;CHECK-LABEL: stack_fold_orps 1382 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1383 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1384 %2 = bitcast <4 x float> %a0 to <2 x i64> 1385 %3 = bitcast <4 x float> %a1 to <2 x i64> 1386 %4 = or <2 x i64> %2, %3 1387 %5 = bitcast <2 x i64> %4 to <4 x float> 1388 ; fadd forces execution domain 1389 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1390 ret <4 x float> %6 1391 } 1392 1393 define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) { 1394 ;CHECK-LABEL: stack_fold_orps_ymm 1395 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1396 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1397 %2 = bitcast <8 x float> %a0 to <4 x i64> 1398 %3 = bitcast <8 x float> %a1 to <4 x i64> 1399 %4 = or <4 x i64> %2, %3 1400 %5 = bitcast <4 x i64> %4 to <8 x float> 1401 ; fadd forces execution domain 1402 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1403 ret <8 x float> %6 1404 } 1405 1406 define <8 x float> @stack_fold_perm2f128(<8 x float> %a0, <8 x float> %a1) { 1407 ;CHECK-LABEL: stack_fold_perm2f128 1408 ;CHECK: vperm2f128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1409 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1410 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1411 ret <8 x float> %2 1412 } 1413 1414 define <2 x double> @stack_fold_permilpd(<2 x double> %a0) { 1415 ;CHECK-LABEL: stack_fold_permilpd 1416 ;CHECK: vpermilpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1417 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1418 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1419 ret <2 x double> %2 1420 } 1421 1422 define <4 x double> @stack_fold_permilpd_ymm(<4 x double> %a0) { 1423 ;CHECK-LABEL: stack_fold_permilpd_ymm 1424 ;CHECK: vpermilpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1425 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1426 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 1427 ret <4 x double> %2 1428 } 1429 1430 define <2 x double> @stack_fold_permilpdvar(<2 x double> %a0, <2 x i64> %a1) { 1431 ;CHECK-LABEL: stack_fold_permilpdvar 1432 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1433 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1434 %2 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) 1435 ret <2 x double> %2 1436 } 1437 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 1438 1439 define <4 x double> @stack_fold_permilpdvar_ymm(<4 x double> %a0, <4 x i64> %a1) { 1440 ;CHECK-LABEL: stack_fold_permilpdvar_ymm 1441 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1442 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1443 %2 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) 1444 ret <4 x double> %2 1445 } 1446 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 1447 1448 define <4 x float> @stack_fold_permilps(<4 x float> %a0) { 1449 ;CHECK-LABEL: stack_fold_permilps 1450 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1452 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1453 ret <4 x float> %2 1454 } 1455 1456 define <8 x float> @stack_fold_permilps_ymm(<8 x float> %a0) { 1457 ;CHECK-LABEL: stack_fold_permilps_ymm 1458 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1459 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1460 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1461 ret <8 x float> %2 1462 } 1463 1464 define <4 x float> @stack_fold_permilpsvar(<4 x float> %a0, <4 x i32> %a1) { 1465 ;CHECK-LABEL: stack_fold_permilpsvar 1466 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1467 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1468 %2 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) 1469 ret <4 x float> %2 1470 } 1471 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 1472 1473 define <8 x float> @stack_fold_permilpsvar_ymm(<8 x float> %a0, <8 x i32> %a1) { 1474 ;CHECK-LABEL: stack_fold_permilpsvar_ymm 1475 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1476 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1477 %2 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) 1478 ret <8 x float> %2 1479 } 1480 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 1481 1482 ; TODO stack_fold_rcpps 1483 1484 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 1485 ;CHECK-LABEL: stack_fold_rcpps_int 1486 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1487 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1488 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1489 ret <4 x float> %2 1490 } 1491 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1492 1493 ; TODO stack_fold_rcpps_ymm 1494 1495 define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) { 1496 ;CHECK-LABEL: stack_fold_rcpps_ymm_int 1497 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1498 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1499 %2 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) 1500 ret <8 x float> %2 1501 } 1502 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 1503 1504 ; TODO stack_fold_rcpss 1505 ; TODO stack_fold_rcpss_int 1506 1507 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 1508 ;CHECK-LABEL: stack_fold_roundpd 1509 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1510 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1511 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 1512 ret <2 x double> %2 1513 } 1514 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1515 1516 define <4 x double> @stack_fold_roundpd_ymm(<4 x double> %a0) { 1517 ;CHECK-LABEL: stack_fold_roundpd_ymm 1518 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1519 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1520 %2 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) 1521 ret <4 x double> %2 1522 } 1523 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 1524 1525 define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 1526 ;CHECK-LABEL: stack_fold_roundps 1527 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1528 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1529 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 1530 ret <4 x float> %2 1531 } 1532 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1533 1534 define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) { 1535 ;CHECK-LABEL: stack_fold_roundps_ymm 1536 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1537 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1538 %2 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) 1539 ret <8 x float> %2 1540 } 1541 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 1542 1543 define double @stack_fold_roundsd(double %a0) optsize { 1544 ;CHECK-LABEL: stack_fold_roundsd 1545 ;CHECK: vroundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1546 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1547 %2 = call double @llvm.floor.f64(double %a0) 1548 ret double %2 1549 } 1550 declare double @llvm.floor.f64(double) nounwind readnone 1551 1552 define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize { 1553 ;CHECK-LABEL: stack_fold_roundsd_int 1554 ;CHECK: vroundsd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1555 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1556 %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) 1557 ret <2 x double> %2 1558 } 1559 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1560 1561 define float @stack_fold_roundss(float %a0) optsize { 1562 ;CHECK-LABEL: stack_fold_roundss 1563 ;CHECK: vroundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1564 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1565 %2 = call float @llvm.floor.f32(float %a0) 1566 ret float %2 1567 } 1568 declare float @llvm.floor.f32(float) nounwind readnone 1569 1570 define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize { 1571 ;CHECK-LABEL: stack_fold_roundss_int 1572 ;CHECK: vroundss $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1573 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1574 %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) 1575 ret <4 x float> %2 1576 } 1577 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1578 1579 ; TODO stack_fold_rsqrtps 1580 1581 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 1582 ;CHECK-LABEL: stack_fold_rsqrtps_int 1583 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1585 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1586 ret <4 x float> %2 1587 } 1588 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1589 1590 ; TODO stack_fold_rsqrtps_ymm 1591 1592 define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) { 1593 ;CHECK-LABEL: stack_fold_rsqrtps_ymm_int 1594 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1595 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1596 %2 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) 1597 ret <8 x float> %2 1598 } 1599 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 1600 1601 ; TODO stack_fold_rsqrtss 1602 ; TODO stack_fold_rsqrtss_int 1603 1604 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 1605 ;CHECK-LABEL: stack_fold_shufpd 1606 ;CHECK: vshufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1607 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1608 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 1609 ; fadd forces execution domain 1610 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1611 ret <2 x double> %3 1612 } 1613 1614 define <4 x double> @stack_fold_shufpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1615 ;CHECK-LABEL: stack_fold_shufpd_ymm 1616 ;CHECK: vshufpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1617 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1618 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 1619 ; fadd forces execution domain 1620 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1621 ret <4 x double> %3 1622 } 1623 1624 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 1625 ;CHECK-LABEL: stack_fold_shufps 1626 ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1627 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1628 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 1629 ret <4 x float> %2 1630 } 1631 1632 define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) { 1633 ;CHECK-LABEL: stack_fold_shufps_ymm 1634 ;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1635 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1636 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14> 1637 ret <8 x float> %2 1638 } 1639 1640 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 1641 ;CHECK-LABEL: stack_fold_sqrtpd 1642 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1643 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1644 %2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0) 1645 ret <2 x double> %2 1646 } 1647 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) 1648 1649 define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) { 1650 ;CHECK-LABEL: stack_fold_sqrtpd_ymm 1651 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1652 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1653 %2 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a0) 1654 ret <4 x double> %2 1655 } 1656 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) 1657 1658 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 1659 ;CHECK-LABEL: stack_fold_sqrtps 1660 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1661 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1662 %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) 1663 ret <4 x float> %2 1664 } 1665 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) 1666 1667 define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) { 1668 ;CHECK-LABEL: stack_fold_sqrtps_ymm 1669 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1670 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1671 %2 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %a0) 1672 ret <8 x float> %2 1673 } 1674 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) 1675 1676 define double @stack_fold_sqrtsd(double %a0) optsize { 1677 ;CHECK-LABEL: stack_fold_sqrtsd 1678 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1679 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1680 %2 = call double @llvm.sqrt.f64(double %a0) 1681 ret double %2 1682 } 1683 declare double @llvm.sqrt.f64(double) nounwind readnone 1684 1685 ; TODO stack_fold_sqrtsd_int 1686 1687 define float @stack_fold_sqrtss(float %a0) optsize { 1688 ;CHECK-LABEL: stack_fold_sqrtss 1689 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1690 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1691 %2 = call float @llvm.sqrt.f32(float %a0) 1692 ret float %2 1693 } 1694 declare float @llvm.sqrt.f32(float) nounwind readnone 1695 1696 ; TODO stack_fold_sqrtss_int 1697 1698 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 1699 ;CHECK-LABEL: stack_fold_subpd 1700 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1701 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1702 %2 = fsub <2 x double> %a0, %a1 1703 ret <2 x double> %2 1704 } 1705 1706 define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1707 ;CHECK-LABEL: stack_fold_subpd_ymm 1708 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1709 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1710 %2 = fsub <4 x double> %a0, %a1 1711 ret <4 x double> %2 1712 } 1713 1714 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1715 ;CHECK-LABEL: stack_fold_subps 1716 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1717 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1718 %2 = fsub <4 x float> %a0, %a1 1719 ret <4 x float> %2 1720 } 1721 1722 define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) { 1723 ;CHECK-LABEL: stack_fold_subps_ymm 1724 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1725 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1726 %2 = fsub <8 x float> %a0, %a1 1727 ret <8 x float> %2 1728 } 1729 1730 define double @stack_fold_subsd(double %a0, double %a1) { 1731 ;CHECK-LABEL: stack_fold_subsd 1732 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1733 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1734 %2 = fsub double %a0, %a1 1735 ret double %2 1736 } 1737 1738 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1739 ;CHECK-LABEL: stack_fold_subsd_int 1740 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1741 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1742 %2 = extractelement <2 x double> %a0, i32 0 1743 %3 = extractelement <2 x double> %a1, i32 0 1744 %4 = fsub double %2, %3 1745 %5 = insertelement <2 x double> %a0, double %4, i32 0 1746 ret <2 x double> %5 1747 } 1748 1749 define float @stack_fold_subss(float %a0, float %a1) { 1750 ;CHECK-LABEL: stack_fold_subss 1751 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1753 %2 = fsub float %a0, %a1 1754 ret float %2 1755 } 1756 1757 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1758 ;CHECK-LABEL: stack_fold_subss_int 1759 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1760 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1761 %2 = extractelement <4 x float> %a0, i32 0 1762 %3 = extractelement <4 x float> %a1, i32 0 1763 %4 = fsub float %2, %3 1764 %5 = insertelement <4 x float> %a0, float %4, i32 0 1765 ret <4 x float> %5 1766 } 1767 1768 define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) { 1769 ;CHECK-LABEL: stack_fold_testpd 1770 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1771 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1772 %2 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) 1773 ret i32 %2 1774 } 1775 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 1776 1777 define i32 @stack_fold_testpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1778 ;CHECK-LABEL: stack_fold_testpd_ymm 1779 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1780 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1781 %2 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) 1782 ret i32 %2 1783 } 1784 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 1785 1786 define i32 @stack_fold_testps(<4 x float> %a0, <4 x float> %a1) { 1787 ;CHECK-LABEL: stack_fold_testps 1788 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1789 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1790 %2 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) 1791 ret i32 %2 1792 } 1793 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 1794 1795 define i32 @stack_fold_testps_ymm(<8 x float> %a0, <8 x float> %a1) { 1796 ;CHECK-LABEL: stack_fold_testps_ymm 1797 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1798 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1799 %2 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) 1800 ret i32 %2 1801 } 1802 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 1803 1804 define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1805 ;CHECK-LABEL: stack_fold_ucomisd 1806 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1807 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1808 %2 = fcmp ueq double %a0, %a1 1809 %3 = select i1 %2, i32 1, i32 -1 1810 ret i32 %3 1811 } 1812 1813 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1814 ;CHECK-LABEL: stack_fold_ucomisd_int 1815 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1816 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1817 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1818 ret i32 %2 1819 } 1820 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1821 1822 define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1823 ;CHECK-LABEL: stack_fold_ucomiss 1824 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1825 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1826 %2 = fcmp ueq float %a0, %a1 1827 %3 = select i1 %2, i32 1, i32 -1 1828 ret i32 %3 1829 } 1830 1831 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1832 ;CHECK-LABEL: stack_fold_ucomiss_int 1833 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1834 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1835 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1836 ret i32 %2 1837 } 1838 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1839 1840 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1841 ;CHECK-LABEL: stack_fold_unpckhpd 1842 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1843 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1844 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1845 ; fadd forces execution domain 1846 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1847 ret <2 x double> %3 1848 } 1849 1850 define <4 x double> @stack_fold_unpckhpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1851 ;CHECK-LABEL: stack_fold_unpckhpd_ymm 1852 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1853 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1854 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 1855 ; fadd forces execution domain 1856 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1857 ret <4 x double> %3 1858 } 1859 1860 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1861 ;CHECK-LABEL: stack_fold_unpckhps 1862 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1863 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1864 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1865 ; fadd forces execution domain 1866 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1867 ret <4 x float> %3 1868 } 1869 1870 define <8 x float> @stack_fold_unpckhps_ymm(<8 x float> %a0, <8 x float> %a1) { 1871 ;CHECK-LABEL: stack_fold_unpckhps_ymm 1872 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1873 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1874 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1875 ; fadd forces execution domain 1876 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1877 ret <8 x float> %3 1878 } 1879 1880 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1881 ;CHECK-LABEL: stack_fold_unpcklpd 1882 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1883 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1884 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1885 ; fadd forces execution domain 1886 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1887 ret <2 x double> %3 1888 } 1889 1890 define <4 x double> @stack_fold_unpcklpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1891 ;CHECK-LABEL: stack_fold_unpcklpd_ymm 1892 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1893 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1894 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1895 ; fadd forces execution domain 1896 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1897 ret <4 x double> %3 1898 } 1899 1900 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1901 ;CHECK-LABEL: stack_fold_unpcklps 1902 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1903 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1904 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1905 ; fadd forces execution domain 1906 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1907 ret <4 x float> %3 1908 } 1909 1910 define <8 x float> @stack_fold_unpcklps_ymm(<8 x float> %a0, <8 x float> %a1) { 1911 ;CHECK-LABEL: stack_fold_unpcklps_ymm 1912 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1913 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1914 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1915 ; fadd forces execution domain 1916 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1917 ret <8 x float> %3 1918 } 1919 1920 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1921 ;CHECK-LABEL: stack_fold_xorpd 1922 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1923 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1924 %2 = bitcast <2 x double> %a0 to <2 x i64> 1925 %3 = bitcast <2 x double> %a1 to <2 x i64> 1926 %4 = xor <2 x i64> %2, %3 1927 %5 = bitcast <2 x i64> %4 to <2 x double> 1928 ; fadd forces execution domain 1929 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1930 ret <2 x double> %6 1931 } 1932 1933 define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1934 ;CHECK-LABEL: stack_fold_xorpd_ymm 1935 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1936 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1937 %2 = bitcast <4 x double> %a0 to <4 x i64> 1938 %3 = bitcast <4 x double> %a1 to <4 x i64> 1939 %4 = xor <4 x i64> %2, %3 1940 %5 = bitcast <4 x i64> %4 to <4 x double> 1941 ; fadd forces execution domain 1942 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1943 ret <4 x double> %6 1944 } 1945 1946 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1947 ;CHECK-LABEL: stack_fold_xorps 1948 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1949 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1950 %2 = bitcast <4 x float> %a0 to <2 x i64> 1951 %3 = bitcast <4 x float> %a1 to <2 x i64> 1952 %4 = xor <2 x i64> %2, %3 1953 %5 = bitcast <2 x i64> %4 to <4 x float> 1954 ; fadd forces execution domain 1955 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1956 ret <4 x float> %6 1957 } 1958 1959 define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { 1960 ;CHECK-LABEL: stack_fold_xorps_ymm 1961 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1963 %2 = bitcast <8 x float> %a0 to <4 x i64> 1964 %3 = bitcast <8 x float> %a1 to <4 x i64> 1965 %4 = xor <4 x i64> %2, %3 1966 %5 = bitcast <4 x i64> %4 to <8 x float> 1967 ; fadd forces execution domain 1968 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1969 ret <8 x float> %6 1970 } 1971 1972 attributes #0 = { "unsafe-fp-math"="false" } 1973 attributes #1 = { "unsafe-fp-math"="true" } 1974