1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s 2 3 ; CHECK: vaddpd 4 define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 5 entry: 6 %add.i = fadd <4 x double> %x, %y 7 ret <4 x double> %add.i 8 } 9 10 ; CHECK: vaddpd LCP{{.*}}(%rip) 11 define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 12 entry: 13 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 14 ret <4 x double> %add.i 15 } 16 17 ; CHECK: vaddps 18 define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 19 entry: 20 %add.i = fadd <8 x float> %x, %y 21 ret <8 x float> %add.i 22 } 23 24 ; CHECK: vaddps LCP{{.*}}(%rip) 25 define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 26 entry: 27 %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 28 ret <8 x float> %add.i 29 } 30 31 ; CHECK: vsubpd 32 define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 33 entry: 34 %sub.i = fsub <4 x double> %x, %y 35 ret <4 x double> %sub.i 36 } 37 38 ; CHECK: vsubpd (% 39 define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { 40 entry: 41 %tmp2 = load <4 x double>* %x, align 32 42 %sub.i = fsub <4 x double> %y, %tmp2 43 ret <4 x double> %sub.i 44 } 45 46 ; CHECK: vsubps 47 define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 48 entry: 49 %sub.i = fsub <8 x float> %x, %y 50 ret <8 x float> %sub.i 51 } 52 53 ; CHECK: vsubps (% 54 define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { 55 entry: 56 %tmp2 = load <8 x float>* %x, align 32 57 %sub.i = fsub <8 x float> %y, %tmp2 58 ret <8 x float> %sub.i 59 } 60 61 ; CHECK: vmulpd 62 define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 63 entry: 64 %mul.i = fmul <4 x double> %x, %y 65 ret <4 x double> %mul.i 66 } 67 68 ; CHECK: vmulpd LCP{{.*}}(%rip) 69 define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 70 entry: 71 %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 72 ret <4 x double> %mul.i 73 } 74 75 ; CHECK: vmulps 76 define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 77 entry: 78 %mul.i = fmul <8 x float> %x, %y 79 ret <8 x float> %mul.i 80 } 81 82 ; CHECK: vmulps LCP{{.*}}(%rip) 83 define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 84 entry: 85 %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 86 ret <8 x float> %mul.i 87 } 88 89 ; CHECK: vdivpd 90 define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { 91 entry: 92 %div.i = fdiv <4 x double> %x, %y 93 ret <4 x double> %div.i 94 } 95 96 ; CHECK: vdivpd LCP{{.*}}(%rip) 97 define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { 98 entry: 99 %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 100 ret <4 x double> %div.i 101 } 102 103 ; CHECK: vdivps 104 define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { 105 entry: 106 %div.i = fdiv <8 x float> %x, %y 107 ret <8 x float> %div.i 108 } 109 110 ; CHECK: vdivps LCP{{.*}}(%rip) 111 define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp { 112 entry: 113 %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 114 ret <8 x float> %div.i 115 } 116 117 ; CHECK: vsqrtss 118 define float @sqrtA(float %a) nounwind uwtable readnone ssp { 119 entry: 120 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 121 ret float %conv1 122 } 123 124 declare double @sqrt(double) readnone 125 126 ; CHECK: vsqrtsd 127 define double @sqrtB(double %a) nounwind uwtable readnone ssp { 128 entry: 129 %call = tail call double @sqrt(double %a) nounwind readnone 130 ret double %call 131 } 132 133 declare float @sqrtf(float) readnone 134 135 136 ; CHECK: vextractf128 $1 137 ; CHECK-NEXT: vextractf128 $1 138 ; CHECK-NEXT: vpaddq %xmm 139 ; CHECK-NEXT: vpaddq %xmm 140 ; CHECK-NEXT: vinsertf128 $1 141 define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 142 %x = add <4 x i64> %i, %j 143 ret <4 x i64> %x 144 } 145 146 ; CHECK: vextractf128 $1 147 ; CHECK-NEXT: vextractf128 $1 148 ; CHECK-NEXT: vpaddd %xmm 149 ; CHECK-NEXT: vpaddd %xmm 150 ; CHECK-NEXT: vinsertf128 $1 151 define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 152 %x = add <8 x i32> %i, %j 153 ret <8 x i32> %x 154 } 155 156 ; CHECK: vextractf128 $1 157 ; CHECK-NEXT: vextractf128 $1 158 ; CHECK-NEXT: vpaddw %xmm 159 ; CHECK-NEXT: vpaddw %xmm 160 ; CHECK-NEXT: vinsertf128 $1 161 define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 162 %x = add <16 x i16> %i, %j 163 ret <16 x i16> %x 164 } 165 166 ; CHECK: vextractf128 $1 167 ; CHECK-NEXT: vextractf128 $1 168 ; CHECK-NEXT: vpaddb %xmm 169 ; CHECK-NEXT: vpaddb %xmm 170 ; CHECK-NEXT: vinsertf128 $1 171 define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 172 %x = add <32 x i8> %i, %j 173 ret <32 x i8> %x 174 } 175 176 ; CHECK: vextractf128 $1 177 ; CHECK-NEXT: vextractf128 $1 178 ; CHECK-NEXT: vpsubq %xmm 179 ; CHECK-NEXT: vpsubq %xmm 180 ; CHECK-NEXT: vinsertf128 $1 181 define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 182 %x = sub <4 x i64> %i, %j 183 ret <4 x i64> %x 184 } 185 186 ; CHECK: vextractf128 $1 187 ; CHECK-NEXT: vextractf128 $1 188 ; CHECK-NEXT: vpsubd %xmm 189 ; CHECK-NEXT: vpsubd %xmm 190 ; CHECK-NEXT: vinsertf128 $1 191 define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 192 %x = sub <8 x i32> %i, %j 193 ret <8 x i32> %x 194 } 195 196 ; CHECK: vextractf128 $1 197 ; CHECK-NEXT: vextractf128 $1 198 ; CHECK-NEXT: vpsubw %xmm 199 ; CHECK-NEXT: vpsubw %xmm 200 ; CHECK-NEXT: vinsertf128 $1 201 define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 202 %x = sub <16 x i16> %i, %j 203 ret <16 x i16> %x 204 } 205 206 ; CHECK: vextractf128 $1 207 ; CHECK-NEXT: vextractf128 $1 208 ; CHECK-NEXT: vpsubb %xmm 209 ; CHECK-NEXT: vpsubb %xmm 210 ; CHECK-NEXT: vinsertf128 $1 211 define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 212 %x = sub <32 x i8> %i, %j 213 ret <32 x i8> %x 214 } 215 216 ; CHECK: vextractf128 $1 217 ; CHECK-NEXT: vextractf128 $1 218 ; CHECK-NEXT: vpmulld %xmm 219 ; CHECK-NEXT: vpmulld %xmm 220 ; CHECK-NEXT: vinsertf128 $1 221 define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 222 %x = mul <8 x i32> %i, %j 223 ret <8 x i32> %x 224 } 225 226 ; CHECK: vextractf128 $1 227 ; CHECK-NEXT: vextractf128 $1 228 ; CHECK-NEXT: vpmullw %xmm 229 ; CHECK-NEXT: vpmullw %xmm 230 ; CHECK-NEXT: vinsertf128 $1 231 define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 232 %x = mul <16 x i16> %i, %j 233 ret <16 x i16> %x 234 } 235 236 ; CHECK: vextractf128 $1 237 ; CHECK-NEXT: vextractf128 $1 238 ; CHECK-NEXT: vpmuludq %xmm 239 ; CHECK-NEXT: vpsrlq $32, %xmm 240 ; CHECK-NEXT: vpmuludq %xmm 241 ; CHECK-NEXT: vpsllq $32, %xmm 242 ; CHECK-NEXT: vpaddq %xmm 243 ; CHECK-NEXT: vpmuludq %xmm 244 ; CHECK-NEXT: vpsrlq $32, %xmm 245 ; CHECK-NEXT: vpmuludq %xmm 246 ; CHECK-NEXT: vpsllq $32, %xmm 247 ; CHECK-NEXT: vpsrlq $32, %xmm 248 ; CHECK-NEXT: vpmuludq %xmm 249 ; CHECK-NEXT: vpsllq $32, %xmm 250 ; CHECK-NEXT: vpaddq %xmm 251 ; CHECK-NEXT: vpaddq %xmm 252 ; CHECK-NEXT: vpsrlq $32, %xmm 253 ; CHECK-NEXT: vpmuludq %xmm 254 ; CHECK-NEXT: vpsllq $32, %xmm 255 ; CHECK-NEXT: vpaddq %xmm 256 ; CHECK-NEXT: vinsertf128 $1 257 define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 258 %x = mul <4 x i64> %i, %j 259 ret <4 x i64> %x 260 } 261 262 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 263 264 define <4 x float> @int_sqrt_ss() { 265 ; CHECK: int_sqrt_ss 266 ; CHECK: vsqrtss 267 %x0 = load float addrspace(1)* undef, align 8 268 %x1 = insertelement <4 x float> undef, float %x0, i32 0 269 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind 270 ret <4 x float> %x2 271 } 272 273