1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s 2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s 3 4 ;CHECK-LABEL: test1: 5 ;CHECK: vinsertps 6 ;CHECK: vinsertf32x4 7 ;CHECK: ret 8 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 9 %rrr = load float, float* %br 10 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 11 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 12 ret <16 x float> %rrr3 13 } 14 15 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 16 ; KNL-LABEL: test2: 17 ; KNL: ## BB#0: 18 ; KNL-NEXT: vmovhpd (%rdi), %xmm0, %xmm2 19 ; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 20 ; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2 21 ; KNL-NEXT: vmovsd %xmm1, %xmm2, %xmm1 22 ; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 23 ; KNL-NEXT: retq 24 ; 25 ; SKX-LABEL: test2: 26 ; SKX: ## BB#0: 27 ; SKX-NEXT: vmovhpd (%rdi), %xmm0, %xmm2 28 ; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm0 29 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm2 30 ; SKX-NEXT: vmovsd %xmm1, %xmm2, %xmm1 31 ; SKX-NEXT: vinsertf64x2 $3, %xmm1, %zmm0, %zmm0 32 ; SKX-NEXT: retq 33 %rrr = load double, double* %br 34 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 35 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 36 ret <8 x double> %rrr3 37 } 38 39 ;CHECK-LABEL: test3: 40 ;CHECK: vextractf32x4 $1 41 ;CHECK: vinsertf32x4 $0 42 ;CHECK: ret 43 define <16 x float> @test3(<16 x float> %x) nounwind { 44 %eee = extractelement <16 x float> %x, i32 4 45 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 46 ret <16 x float> %rrr2 47 } 48 49 define <8 x i64> @test4(<8 x i64> %x) nounwind { 50 ; KNL-LABEL: test4: 51 ; KNL: ## BB#0: 52 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm1 53 ; KNL-NEXT: vmovq %xmm1, %rax 54 ; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 55 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 56 ; KNL-NEXT: retq 57 ; 58 ; SKX-LABEL: test4: 59 ; SKX: ## BB#0: 60 ; SKX-NEXT: vextracti64x2 $2, %zmm0, %xmm1 61 ; SKX-NEXT: vmovq %xmm1, %rax 62 ; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 63 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 64 ; SKX-NEXT: retq 65 %eee = extractelement <8 x i64> %x, i32 4 66 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 67 ret <8 x i64> %rrr2 68 } 69 70 ;CHECK-LABEL: test5: 71 ;CHECK: vextractps 72 ;CHECK: ret 73 define i32 @test5(<4 x float> %x) nounwind { 74 %ef = extractelement <4 x float> %x, i32 3 75 %ei = bitcast float %ef to i32 76 ret i32 %ei 77 } 78 79 ;CHECK-LABEL: test6: 80 ;CHECK: vextractps {{.*}}, (%rdi) 81 ;CHECK: ret 82 define void @test6(<4 x float> %x, float* %out) nounwind { 83 %ef = extractelement <4 x float> %x, i32 3 84 store float %ef, float* %out, align 4 85 ret void 86 } 87 88 ;CHECK-LABEL: test7 89 ;CHECK: vmovd 90 ;CHECK: vpermps %zmm 91 ;CHECK: ret 92 define float @test7(<16 x float> %x, i32 %ind) nounwind { 93 %e = extractelement <16 x float> %x, i32 %ind 94 ret float %e 95 } 96 97 ;CHECK-LABEL: test8 98 ;CHECK: vmovq 99 ;CHECK: vpermpd %zmm 100 ;CHECK: ret 101 define double @test8(<8 x double> %x, i32 %ind) nounwind { 102 %e = extractelement <8 x double> %x, i32 %ind 103 ret double %e 104 } 105 106 ;CHECK-LABEL: test9 107 ;CHECK: vmovd 108 ;CHECK: vpermps %ymm 109 ;CHECK: ret 110 define float @test9(<8 x float> %x, i32 %ind) nounwind { 111 %e = extractelement <8 x float> %x, i32 %ind 112 ret float %e 113 } 114 115 ;CHECK-LABEL: test10 116 ;CHECK: vmovd 117 ;CHECK: vpermd %zmm 118 ;CHECK: vmovd %xmm0, %eax 119 ;CHECK: ret 120 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 121 %e = extractelement <16 x i32> %x, i32 %ind 122 ret i32 %e 123 } 124 125 ;CHECK-LABEL: test11 126 ;CHECK: vpcmpltud 127 ;CHECK: kshiftlw $11 128 ;CHECK: kshiftrw $15 129 ;CHECK: testb 130 ;CHECK: je 131 ;CHECK: ret 132 ;CHECK: ret 133 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 134 %cmp_res = icmp ult <16 x i32> %a, %b 135 %ia = extractelement <16 x i1> %cmp_res, i32 4 136 br i1 %ia, label %A, label %B 137 A: 138 ret <16 x i32>%b 139 B: 140 %c = add <16 x i32>%b, %a 141 ret <16 x i32>%c 142 } 143 144 ;CHECK-LABEL: test12 145 ;CHECK: vpcmpgtq 146 ;CHECK: kshiftlw $15 147 ;CHECK: kshiftrw $15 148 ;CHECK: testb 149 ;CHECK: ret 150 151 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 152 153 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 154 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 155 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 156 ret i64 %res 157 } 158 159 ;CHECK-LABEL: test13 160 ;CHECK: cmpl %esi, %edi 161 ;CHECK: setb %al 162 ;CHECK: andl $1, %eax 163 ;CHECK: kmovw %eax, %k0 164 ;CHECK: movw $-4 165 ;CHECK: korw 166 define i16 @test13(i32 %a, i32 %b) { 167 %cmp_res = icmp ult i32 %a, %b 168 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 169 %res = bitcast <16 x i1> %maskv to i16 170 ret i16 %res 171 } 172 173 ;CHECK-LABEL: test14 174 ;CHECK: vpcmpgtq 175 ;KNL: kshiftlw $11 176 ;KNL: kshiftrw $15 177 ;KNL: testb 178 ;SKX: kshiftlb $3 179 ;SKX: kshiftrb $7 180 ;SKX: testb 181 ;CHECK: ret 182 183 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 184 185 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 186 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 187 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 188 ret i64 %res 189 } 190 191 ;CHECK-LABEL: test15 192 ;CHECK: movb (%rdi), %al 193 ;CHECK: andb $1, %al 194 ;CHECK: movw $-1, %ax 195 ;CHECK: cmovew 196 define i16 @test15(i1 *%addr) { 197 %x = load i1 , i1 * %addr, align 1 198 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 199 %x2 = bitcast <16 x i1>%x1 to i16 200 ret i16 %x2 201 } 202 203 ;CHECK-LABEL: test16 204 ;CHECK: movb (%rdi), %al 205 ;CHECK: andw $1, %ax 206 ;CHECK: kmovw 207 ;CHECK: kshiftlw $10 208 ;CHECK: korw 209 ;CHECK: ret 210 define i16 @test16(i1 *%addr, i16 %a) { 211 %x = load i1 , i1 * %addr, align 128 212 %a1 = bitcast i16 %a to <16 x i1> 213 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 214 %x2 = bitcast <16 x i1>%x1 to i16 215 ret i16 %x2 216 } 217 218 ;CHECK-LABEL: test17 219 ;KNL: movb (%rdi), %al 220 ;KNL: andw $1, %ax 221 ;KNL: kshiftlw $4 222 ;KNL: korw 223 ;SKX: kshiftlb $4 224 ;SKX: korb 225 ;CHECK: ret 226 define i8 @test17(i1 *%addr, i8 %a) { 227 %x = load i1 , i1 * %addr, align 128 228 %a1 = bitcast i8 %a to <8 x i1> 229 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4 230 %x2 = bitcast <8 x i1>%x1 to i8 231 ret i8 %x2 232 } 233 234 define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) { 235 ; SKX-LABEL: extract_v8i64: 236 ; SKX: ## BB#0: 237 ; SKX-NEXT: vpextrq $1, %xmm0, %rax 238 ; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm0 239 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 240 ; SKX-NEXT: retq 241 %r1 = extractelement <8 x i64> %x, i32 1 242 %r2 = extractelement <8 x i64> %x, i32 3 243 store i64 %r2, i64* %dst, align 1 244 ret i64 %r1 245 } 246 247 define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) { 248 ; SKX-LABEL: extract_v4i64: 249 ; SKX: ## BB#0: 250 ; SKX-NEXT: vpextrq $1, %xmm0, %rax 251 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 252 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 253 ; SKX-NEXT: retq 254 %r1 = extractelement <4 x i64> %x, i32 1 255 %r2 = extractelement <4 x i64> %x, i32 3 256 store i64 %r2, i64* %dst, align 1 257 ret i64 %r1 258 } 259 260 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 261 ; SKX-LABEL: extract_v2i64: 262 ; SKX: ## BB#0: 263 ; SKX-NEXT: vmovq %xmm0, %rax 264 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 265 ; SKX-NEXT: retq 266 %r1 = extractelement <2 x i64> %x, i32 0 267 %r2 = extractelement <2 x i64> %x, i32 1 268 store i64 %r2, i64* %dst, align 1 269 ret i64 %r1 270 } 271 272 define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { 273 ; SKX-LABEL: extract_v16i32: 274 ; SKX: ## BB#0: 275 ; SKX-NEXT: vpextrd $1, %xmm0, %eax 276 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 277 ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 278 ; SKX-NEXT: retq 279 %r1 = extractelement <16 x i32> %x, i32 1 280 %r2 = extractelement <16 x i32> %x, i32 5 281 store i32 %r2, i32* %dst, align 1 282 ret i32 %r1 283 } 284 285 define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { 286 ; SKX-LABEL: extract_v8i32: 287 ; SKX: ## BB#0: 288 ; SKX-NEXT: vpextrd $1, %xmm0, %eax 289 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 290 ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 291 ; SKX-NEXT: retq 292 %r1 = extractelement <8 x i32> %x, i32 1 293 %r2 = extractelement <8 x i32> %x, i32 5 294 store i32 %r2, i32* %dst, align 1 295 ret i32 %r1 296 } 297 298 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 299 ; SKX-LABEL: extract_v4i32: 300 ; SKX: ## BB#0: 301 ; SKX-NEXT: vpextrd $1, %xmm0, %eax 302 ; SKX-NEXT: vpextrd $3, %xmm0, (%rdi) 303 ; SKX-NEXT: retq 304 %r1 = extractelement <4 x i32> %x, i32 1 305 %r2 = extractelement <4 x i32> %x, i32 3 306 store i32 %r2, i32* %dst, align 1 307 ret i32 %r1 308 } 309 310 define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) { 311 ; SKX-LABEL: extract_v32i16: 312 ; SKX: ## BB#0: 313 ; SKX-NEXT: vpextrw $1, %xmm0, %eax 314 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 315 ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 316 ; SKX-NEXT: retq 317 %r1 = extractelement <32 x i16> %x, i32 1 318 %r2 = extractelement <32 x i16> %x, i32 9 319 store i16 %r2, i16* %dst, align 1 320 ret i16 %r1 321 } 322 323 define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) { 324 ; SKX-LABEL: extract_v16i16: 325 ; SKX: ## BB#0: 326 ; SKX-NEXT: vpextrw $1, %xmm0, %eax 327 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 328 ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 329 ; SKX-NEXT: retq 330 %r1 = extractelement <16 x i16> %x, i32 1 331 %r2 = extractelement <16 x i16> %x, i32 9 332 store i16 %r2, i16* %dst, align 1 333 ret i16 %r1 334 } 335 336 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 337 ; SKX-LABEL: extract_v8i16: 338 ; SKX: ## BB#0: 339 ; SKX-NEXT: vpextrw $1, %xmm0, %eax 340 ; SKX-NEXT: vpextrw $3, %xmm0, (%rdi) 341 ; SKX-NEXT: retq 342 %r1 = extractelement <8 x i16> %x, i32 1 343 %r2 = extractelement <8 x i16> %x, i32 3 344 store i16 %r2, i16* %dst, align 1 345 ret i16 %r1 346 } 347 348 define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) { 349 ; SKX-LABEL: extract_v64i8: 350 ; SKX: ## BB#0: 351 ; SKX-NEXT: vpextrb $1, %xmm0, %eax 352 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 353 ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 354 ; SKX-NEXT: retq 355 %r1 = extractelement <64 x i8> %x, i32 1 356 %r2 = extractelement <64 x i8> %x, i32 17 357 store i8 %r2, i8* %dst, align 1 358 ret i8 %r1 359 } 360 361 define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) { 362 ; SKX-LABEL: extract_v32i8: 363 ; SKX: ## BB#0: 364 ; SKX-NEXT: vpextrb $1, %xmm0, %eax 365 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 366 ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 367 ; SKX-NEXT: retq 368 %r1 = extractelement <32 x i8> %x, i32 1 369 %r2 = extractelement <32 x i8> %x, i32 17 370 store i8 %r2, i8* %dst, align 1 371 ret i8 %r1 372 } 373 374 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 375 ; SKX-LABEL: extract_v16i8: 376 ; SKX: ## BB#0: 377 ; SKX-NEXT: vpextrb $1, %xmm0, %eax 378 ; SKX-NEXT: vpextrb $3, %xmm0, (%rdi) 379 ; SKX-NEXT: retq 380 %r1 = extractelement <16 x i8> %x, i32 1 381 %r2 = extractelement <16 x i8> %x, i32 3 382 store i8 %r2, i8* %dst, align 1 383 ret i8 %r1 384 } 385 386 define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) { 387 ; SKX-LABEL: insert_v8i64: 388 ; SKX: ## BB#0: 389 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 390 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 391 ; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm1 392 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 393 ; SKX-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 394 ; SKX-NEXT: retq 395 %val = load i64, i64* %ptr 396 %r1 = insertelement <8 x i64> %x, i64 %val, i32 1 397 %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3 398 ret <8 x i64> %r2 399 } 400 401 define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) { 402 ; SKX-LABEL: insert_v4i64: 403 ; SKX: ## BB#0: 404 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 405 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 406 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 407 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 408 ; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 409 ; SKX-NEXT: retq 410 %val = load i64, i64* %ptr 411 %r1 = insertelement <4 x i64> %x, i64 %val, i32 1 412 %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3 413 ret <4 x i64> %r2 414 } 415 416 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 417 ; SKX-LABEL: insert_v2i64: 418 ; SKX: ## BB#0: 419 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0 420 ; SKX-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0 421 ; SKX-NEXT: retq 422 %val = load i64, i64* %ptr 423 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 424 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3 425 ret <2 x i64> %r2 426 } 427 428 define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) { 429 ; SKX-LABEL: insert_v16i32: 430 ; SKX: ## BB#0: 431 ; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 432 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 433 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 434 ; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 435 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 436 ; SKX-NEXT: retq 437 %val = load i32, i32* %ptr 438 %r1 = insertelement <16 x i32> %x, i32 %val, i32 1 439 %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5 440 ret <16 x i32> %r2 441 } 442 443 define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) { 444 ; KNL-LABEL: insert_v8i32: 445 ; KNL: ## BB#0: 446 ; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 447 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 448 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 449 ; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 450 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 451 ; KNL-NEXT: retq 452 ; 453 ; SKX-LABEL: insert_v8i32: 454 ; SKX: ## BB#0: 455 ; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 456 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 457 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 458 ; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 459 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 460 ; SKX-NEXT: retq 461 %val = load i32, i32* %ptr 462 %r1 = insertelement <8 x i32> %x, i32 %val, i32 1 463 %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5 464 ret <8 x i32> %r2 465 } 466 467 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 468 ; KNL-LABEL: insert_v4i32: 469 ; KNL: ## BB#0: 470 ; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 471 ; KNL-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 472 ; KNL-NEXT: retq 473 ; 474 ; SKX-LABEL: insert_v4i32: 475 ; SKX: ## BB#0: 476 ; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 477 ; SKX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 478 ; SKX-NEXT: retq 479 %val = load i32, i32* %ptr 480 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 481 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 482 ret <4 x i32> %r2 483 } 484 485 define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) { 486 ; KNL-LABEL: insert_v32i16: 487 ; KNL: ## BB#0: 488 ; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm2 489 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 490 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 491 ; KNL-NEXT: vpinsrw $1, %edi, %xmm2, %xmm2 492 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 493 ; KNL-NEXT: retq 494 ; 495 ; SKX-LABEL: insert_v32i16: 496 ; SKX: ## BB#0: 497 ; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 498 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 499 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 500 ; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 501 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 502 ; SKX-NEXT: retq 503 %val = load i16, i16* %ptr 504 %r1 = insertelement <32 x i16> %x, i16 %val, i32 1 505 %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9 506 ret <32 x i16> %r2 507 } 508 509 define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) { 510 ; KNL-LABEL: insert_v16i16: 511 ; KNL: ## BB#0: 512 ; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 513 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 514 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 515 ; KNL-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 516 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 517 ; KNL-NEXT: retq 518 ; 519 ; SKX-LABEL: insert_v16i16: 520 ; SKX: ## BB#0: 521 ; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 522 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 523 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 524 ; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 525 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 526 ; SKX-NEXT: retq 527 %val = load i16, i16* %ptr 528 %r1 = insertelement <16 x i16> %x, i16 %val, i32 1 529 %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9 530 ret <16 x i16> %r2 531 } 532 533 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 534 ; KNL-LABEL: insert_v8i16: 535 ; KNL: ## BB#0: 536 ; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 537 ; KNL-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 538 ; KNL-NEXT: retq 539 ; 540 ; SKX-LABEL: insert_v8i16: 541 ; SKX: ## BB#0: 542 ; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 543 ; SKX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 544 ; SKX-NEXT: retq 545 %val = load i16, i16* %ptr 546 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 547 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 548 ret <8 x i16> %r2 549 } 550 551 define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) { 552 ; KNL-LABEL: insert_v64i8: 553 ; KNL: ## BB#0: 554 ; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm2 555 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 556 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 557 ; KNL-NEXT: vpinsrb $2, %edi, %xmm2, %xmm2 558 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 559 ; KNL-NEXT: retq 560 ; 561 ; SKX-LABEL: insert_v64i8: 562 ; SKX: ## BB#0: 563 ; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 564 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 565 ; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 566 ; SKX-NEXT: vpinsrb $2, %edi, %xmm1, %xmm1 567 ; SKX-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 568 ; SKX-NEXT: retq 569 %val = load i8, i8* %ptr 570 %r1 = insertelement <64 x i8> %x, i8 %val, i32 1 571 %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50 572 ret <64 x i8> %r2 573 } 574 575 define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) { 576 ; SKX-LABEL: insert_v32i8: 577 ; SKX: ## BB#0: 578 ; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 579 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 580 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 581 ; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 582 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 583 ; SKX-NEXT: retq 584 %val = load i8, i8* %ptr 585 %r1 = insertelement <32 x i8> %x, i8 %val, i32 1 586 %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17 587 ret <32 x i8> %r2 588 } 589 590 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 591 ; KNL-LABEL: insert_v16i8: 592 ; KNL: ## BB#0: 593 ; KNL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 594 ; KNL-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 595 ; KNL-NEXT: retq 596 ; 597 ; SKX-LABEL: insert_v16i8: 598 ; SKX: ## BB#0: 599 ; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 600 ; SKX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 601 ; SKX-NEXT: retq 602 %val = load i8, i8* %ptr 603 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 604 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 605 ret <16 x i8> %r2 606 } 607 608 define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) { 609 ; KNL-LABEL: test_insert_128_v8i64: 610 ; KNL: ## BB#0: 611 ; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 612 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 613 ; KNL-NEXT: retq 614 ; 615 ; SKX-LABEL: test_insert_128_v8i64: 616 ; SKX: ## BB#0: 617 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 618 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 619 ; SKX-NEXT: retq 620 %r = insertelement <8 x i64> %x, i64 %y, i32 1 621 ret <8 x i64> %r 622 } 623 624 define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) { 625 ; KNL-LABEL: test_insert_128_v16i32: 626 ; KNL: ## BB#0: 627 ; KNL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 628 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 629 ; KNL-NEXT: retq 630 ; 631 ; SKX-LABEL: test_insert_128_v16i32: 632 ; SKX: ## BB#0: 633 ; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 634 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 635 ; SKX-NEXT: retq 636 %r = insertelement <16 x i32> %x, i32 %y, i32 1 637 ret <16 x i32> %r 638 } 639 640 define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) { 641 ; KNL-LABEL: test_insert_128_v8f64: 642 ; KNL: ## BB#0: 643 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] 644 ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 645 ; KNL-NEXT: retq 646 ; 647 ; SKX-LABEL: test_insert_128_v8f64: 648 ; SKX: ## BB#0: 649 ; SKX-NEXT: vunpcklpd %xmm1, %xmm0, %xmm1 650 ; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0 651 ; SKX-NEXT: retq 652 %r = insertelement <8 x double> %x, double %y, i32 1 653 ret <8 x double> %r 654 } 655 656 define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) { 657 ; KNL-LABEL: test_insert_128_v16f32: 658 ; KNL: ## BB#0: 659 ; KNL-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1 660 ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 661 ; KNL-NEXT: retq 662 ; 663 ; SKX-LABEL: test_insert_128_v16f32: 664 ; SKX: ## BB#0: 665 ; SKX-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1 666 ; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 667 ; SKX-NEXT: retq 668 %r = insertelement <16 x float> %x, float %y, i32 1 669 ret <16 x float> %r 670 } 671 672 define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) { 673 ; KNL-LABEL: test_insert_128_v16i16: 674 ; KNL: ## BB#0: 675 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 676 ; KNL-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 677 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 678 ; KNL-NEXT: retq 679 ; 680 ; SKX-LABEL: test_insert_128_v16i16: 681 ; SKX: ## BB#0: 682 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 683 ; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 684 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 685 ; SKX-NEXT: retq 686 %r = insertelement <16 x i16> %x, i16 %y, i32 10 687 ret <16 x i16> %r 688 } 689 690 define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) { 691 ; KNL-LABEL: test_insert_128_v32i8: 692 ; KNL: ## BB#0: 693 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 694 ; KNL-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 695 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 696 ; KNL-NEXT: retq 697 ; 698 ; SKX-LABEL: test_insert_128_v32i8: 699 ; SKX: ## BB#0: 700 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 701 ; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 702 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 703 ; SKX-NEXT: retq 704 %r = insertelement <32 x i8> %x, i8 %y, i32 20 705 ret <32 x i8> %r 706 } 707