1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 2 3 ; CHECK-LABEL: addpd512 4 ; CHECK: vaddpd 5 ; CHECK: ret 6 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 7 entry: 8 %add.i = fadd <8 x double> %x, %y 9 ret <8 x double> %add.i 10 } 11 12 ; CHECK-LABEL: addpd512fold 13 ; CHECK: vaddpd LCP{{.*}}(%rip) 14 ; CHECK: ret 15 define <8 x double> @addpd512fold(<8 x double> %y) { 16 entry: 17 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 18 ret <8 x double> %add.i 19 } 20 21 ; CHECK-LABEL: addps512 22 ; CHECK: vaddps 23 ; CHECK: ret 24 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 25 entry: 26 %add.i = fadd <16 x float> %x, %y 27 ret <16 x float> %add.i 28 } 29 30 ; CHECK-LABEL: addps512fold 31 ; CHECK: vaddps LCP{{.*}}(%rip) 32 ; CHECK: ret 33 define <16 x float> @addps512fold(<16 x float> %y) { 34 entry: 35 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 36 ret <16 x float> %add.i 37 } 38 39 ; CHECK-LABEL: subpd512 40 ; CHECK: vsubpd 41 ; CHECK: ret 42 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 43 entry: 44 %sub.i = fsub <8 x double> %x, %y 45 ret <8 x double> %sub.i 46 } 47 48 ; CHECK-LABEL: @subpd512fold 49 ; CHECK: vsubpd (% 50 ; CHECK: ret 51 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 52 entry: 53 %tmp2 = load <8 x double>* %x, align 8 54 %sub.i = fsub <8 x double> %y, %tmp2 55 ret <8 x double> %sub.i 56 } 57 58 ; CHECK-LABEL: @subps512 59 ; CHECK: vsubps 60 ; CHECK: ret 61 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 62 entry: 63 %sub.i = fsub <16 x float> %x, %y 64 ret <16 x float> %sub.i 65 } 66 67 ; CHECK-LABEL: subps512fold 68 ; CHECK: vsubps (% 69 ; CHECK: ret 70 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 71 entry: 72 %tmp2 = load <16 x float>* %x, align 4 73 %sub.i = fsub <16 x float> %y, %tmp2 74 ret <16 x float> %sub.i 75 } 76 77 ; CHECK-LABEL: imulq512 78 ; CHECK: vpmuludq 79 ; CHECK: vpmuludq 80 ; CHECK: ret 81 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 82 %z = mul <8 x i64>%x, %y 83 ret <8 x i64>%z 84 } 85 86 ; CHECK-LABEL: mulpd512 87 ; CHECK: vmulpd 88 ; CHECK: ret 89 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 90 entry: 91 %mul.i = fmul <8 x double> %x, %y 92 ret <8 x double> %mul.i 93 } 94 95 ; CHECK-LABEL: mulpd512fold 96 ; CHECK: vmulpd LCP{{.*}}(%rip) 97 ; CHECK: ret 98 define <8 x double> @mulpd512fold(<8 x double> %y) { 99 entry: 100 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 101 ret <8 x double> %mul.i 102 } 103 104 ; CHECK-LABEL: mulps512 105 ; CHECK: vmulps 106 ; CHECK: ret 107 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 108 entry: 109 %mul.i = fmul <16 x float> %x, %y 110 ret <16 x float> %mul.i 111 } 112 113 ; CHECK-LABEL: mulps512fold 114 ; CHECK: vmulps LCP{{.*}}(%rip) 115 ; CHECK: ret 116 define <16 x float> @mulps512fold(<16 x float> %y) { 117 entry: 118 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 119 ret <16 x float> %mul.i 120 } 121 122 ; CHECK-LABEL: divpd512 123 ; CHECK: vdivpd 124 ; CHECK: ret 125 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 126 entry: 127 %div.i = fdiv <8 x double> %x, %y 128 ret <8 x double> %div.i 129 } 130 131 ; CHECK-LABEL: divpd512fold 132 ; CHECK: vdivpd LCP{{.*}}(%rip) 133 ; CHECK: ret 134 define <8 x double> @divpd512fold(<8 x double> %y) { 135 entry: 136 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 137 ret <8 x double> %div.i 138 } 139 140 ; CHECK-LABEL: divps512 141 ; CHECK: vdivps 142 ; CHECK: ret 143 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 144 entry: 145 %div.i = fdiv <16 x float> %x, %y 146 ret <16 x float> %div.i 147 } 148 149 ; CHECK-LABEL: divps512fold 150 ; CHECK: vdivps LCP{{.*}}(%rip) 151 ; CHECK: ret 152 define <16 x float> @divps512fold(<16 x float> %y) { 153 entry: 154 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 155 ret <16 x float> %div.i 156 } 157 158 ; CHECK-LABEL: vpaddq_test 159 ; CHECK: vpaddq %zmm 160 ; CHECK: ret 161 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 162 %x = add <8 x i64> %i, %j 163 ret <8 x i64> %x 164 } 165 166 ; CHECK-LABEL: vpaddq_fold_test 167 ; CHECK: vpaddq (% 168 ; CHECK: ret 169 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 170 %tmp = load <8 x i64>* %j, align 4 171 %x = add <8 x i64> %i, %tmp 172 ret <8 x i64> %x 173 } 174 175 ; CHECK-LABEL: vpaddq_broadcast_test 176 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8} 177 ; CHECK: ret 178 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 179 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 180 ret <8 x i64> %x 181 } 182 183 ; CHECK-LABEL: vpaddq_broadcast2_test 184 ; CHECK: vpaddq (%rdi){1to8} 185 ; CHECK: ret 186 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 187 %tmp = load i64* %j 188 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 189 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 190 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 191 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 192 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 193 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 194 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 195 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 196 %x = add <8 x i64> %i, %j.7 197 ret <8 x i64> %x 198 } 199 200 ; CHECK-LABEL: vpaddd_test 201 ; CHECK: vpaddd %zmm 202 ; CHECK: ret 203 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 204 %x = add <16 x i32> %i, %j 205 ret <16 x i32> %x 206 } 207 208 ; CHECK-LABEL: vpaddd_fold_test 209 ; CHECK: vpaddd (% 210 ; CHECK: ret 211 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 212 %tmp = load <16 x i32>* %j, align 4 213 %x = add <16 x i32> %i, %tmp 214 ret <16 x i32> %x 215 } 216 217 ; CHECK-LABEL: vpaddd_broadcast_test 218 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16} 219 ; CHECK: ret 220 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 221 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 222 ret <16 x i32> %x 223 } 224 225 ; CHECK-LABEL: vpaddd_mask_test 226 ; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }} 227 ; CHECK: ret 228 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 229 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 230 %x = add <16 x i32> %i, %j 231 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 232 ret <16 x i32> %r 233 } 234 235 ; CHECK-LABEL: vpaddd_maskz_test 236 ; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z} }} 237 ; CHECK: ret 238 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 239 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 240 %x = add <16 x i32> %i, %j 241 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 242 ret <16 x i32> %r 243 } 244 245 ; CHECK-LABEL: vpaddd_mask_fold_test 246 ; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }} 247 ; CHECK: ret 248 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 249 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 250 %j = load <16 x i32>* %j.ptr 251 %x = add <16 x i32> %i, %j 252 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 253 ret <16 x i32> %r 254 } 255 256 ; CHECK-LABEL: vpaddd_mask_broadcast_test 257 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }} 258 ; CHECK: ret 259 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 260 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 261 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 262 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 263 ret <16 x i32> %r 264 } 265 266 ; CHECK-LABEL: vpaddd_maskz_fold_test 267 ; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z} 268 ; CHECK: ret 269 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 270 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 271 %j = load <16 x i32>* %j.ptr 272 %x = add <16 x i32> %i, %j 273 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 274 ret <16 x i32> %r 275 } 276 277 ; CHECK-LABEL: vpaddd_maskz_broadcast_test 278 ; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z} 279 ; CHECK: ret 280 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 281 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 282 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 283 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 284 ret <16 x i32> %r 285 } 286 287 ; CHECK-LABEL: vpsubq_test 288 ; CHECK: vpsubq %zmm 289 ; CHECK: ret 290 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 291 %x = sub <8 x i64> %i, %j 292 ret <8 x i64> %x 293 } 294 295 ; CHECK-LABEL: vpsubd_test 296 ; CHECK: vpsubd 297 ; CHECK: ret 298 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 299 %x = sub <16 x i32> %i, %j 300 ret <16 x i32> %x 301 } 302 303 ; CHECK-LABEL: vpmulld_test 304 ; CHECK: vpmulld %zmm 305 ; CHECK: ret 306 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 307 %x = mul <16 x i32> %i, %j 308 ret <16 x i32> %x 309 } 310 311 ; CHECK-LABEL: sqrtA 312 ; CHECK: vsqrtss {{.*}} encoding: [0x62 313 ; CHECK: ret 314 declare float @sqrtf(float) readnone 315 define float @sqrtA(float %a) nounwind uwtable readnone ssp { 316 entry: 317 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 318 ret float %conv1 319 } 320 321 ; CHECK-LABEL: sqrtB 322 ; CHECK: vsqrtsd {{.*}}## encoding: [0x62 323 ; CHECK: ret 324 declare double @sqrt(double) readnone 325 define double @sqrtB(double %a) nounwind uwtable readnone ssp { 326 entry: 327 %call = tail call double @sqrt(double %a) nounwind readnone 328 ret double %call 329 } 330 331 ; CHECK-LABEL: sqrtC 332 ; CHECK: vsqrtss {{.*}}## encoding: [0x62 333 ; CHECK: ret 334 declare float @llvm.sqrt.f32(float) 335 define float @sqrtC(float %a) nounwind { 336 %b = call float @llvm.sqrt.f32(float %a) 337 ret float %b 338 } 339 340 ; CHECK-LABEL: sqrtD 341 ; CHECK: vsqrtps {{.*}} 342 ; CHECK: ret 343 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 344 define <16 x float> @sqrtD(<16 x float> %a) nounwind { 345 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 346 ret <16 x float> %b 347 } 348 349 ; CHECK-LABEL: sqrtE 350 ; CHECK: vsqrtpd {{.*}} 351 ; CHECK: ret 352 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 353 define <8 x double> @sqrtE(<8 x double> %a) nounwind { 354 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 355 ret <8 x double> %b 356 } 357 358 ; CHECK-LABEL: fadd_broadcast 359 ; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0 360 ; CHECK: ret 361 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 362 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 363 ret <16 x float> %b 364 } 365 366 ; CHECK-LABEL: addq_broadcast 367 ; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 368 ; CHECK: ret 369 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 370 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 371 ret <8 x i64> %b 372 } 373 374 ; CHECK-LABEL: orq_broadcast 375 ; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 376 ; CHECK: ret 377 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 378 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 379 ret <8 x i64> %b 380 } 381 382 ; CHECK-LABEL: andd512fold 383 ; CHECK: vpandd (% 384 ; CHECK: ret 385 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 386 entry: 387 %a = load <16 x i32>* %x, align 4 388 %b = and <16 x i32> %y, %a 389 ret <16 x i32> %b 390 } 391 392 ; CHECK-LABEL: andqbrst 393 ; CHECK: vpandq (%rdi){1to8}, %zmm 394 ; CHECK: ret 395 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 396 entry: 397 %a = load i64* %ap, align 8 398 %b = insertelement <8 x i64> undef, i64 %a, i32 0 399 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 400 %d = and <8 x i64> %p1, %c 401 ret <8 x i64>%d 402 } 403