1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL %s 3 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s 4 5 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { 6 ; KNL-LABEL: test1: 7 ; KNL: ## BB#0: 8 ; KNL-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] 9 ; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 10 ; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2 11 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 12 ; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 13 ; KNL-NEXT: retq 14 ; 15 ; SKX-LABEL: test1: 16 ; SKX: ## BB#0: 17 ; SKX-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] 18 ; SKX-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 19 ; SKX-NEXT: vextractf32x4 $3, %zmm0, %xmm2 20 ; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 21 ; SKX-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 22 ; SKX-NEXT: retq 23 %rrr = load float, float* %br 24 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 25 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 26 ret <16 x float> %rrr3 27 } 28 29 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { 30 ; KNL-LABEL: test2: 31 ; KNL: ## BB#0: 32 ; KNL-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0] 33 ; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 34 ; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2 35 ; KNL-NEXT: vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1] 36 ; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 37 ; KNL-NEXT: retq 38 ; 39 ; SKX-LABEL: test2: 40 ; SKX: ## BB#0: 41 ; SKX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0] 42 ; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm0 43 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm2 44 ; SKX-NEXT: vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1] 45 ; SKX-NEXT: vinsertf64x2 $3, %xmm1, %zmm0, %zmm0 46 ; SKX-NEXT: retq 47 %rrr = load double, double* %br 48 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 49 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 50 ret <8 x double> %rrr3 51 } 52 53 define <16 x float> @test3(<16 x float> %x) nounwind { 54 ; KNL-LABEL: test3: 55 ; KNL: ## BB#0: 56 ; KNL-NEXT: vextractf32x4 $1, %zmm0, %xmm1 57 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 58 ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 59 ; KNL-NEXT: retq 60 ; 61 ; SKX-LABEL: test3: 62 ; SKX: ## BB#0: 63 ; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm1 64 ; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 65 ; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 66 ; SKX-NEXT: retq 67 %eee = extractelement <16 x float> %x, i32 4 68 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 69 ret <16 x float> %rrr2 70 } 71 72 define <8 x i64> @test4(<8 x i64> %x) nounwind { 73 ; KNL-LABEL: test4: 74 ; KNL: ## BB#0: 75 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm1 76 ; KNL-NEXT: vmovq %xmm1, %rax 77 ; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 78 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 79 ; KNL-NEXT: retq 80 ; 81 ; SKX-LABEL: test4: 82 ; SKX: ## BB#0: 83 ; SKX-NEXT: vextracti64x2 $2, %zmm0, %xmm1 84 ; SKX-NEXT: vmovq %xmm1, %rax 85 ; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 86 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 87 ; SKX-NEXT: retq 88 %eee = extractelement <8 x i64> %x, i32 4 89 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 90 ret <8 x i64> %rrr2 91 } 92 93 define i32 @test5(<4 x float> %x) nounwind { 94 ; KNL-LABEL: test5: 95 ; KNL: ## BB#0: 96 ; KNL-NEXT: vextractps $3, %xmm0, %eax 97 ; KNL-NEXT: retq 98 ; 99 ; SKX-LABEL: test5: 100 ; SKX: ## BB#0: 101 ; SKX-NEXT: vextractps $3, %xmm0, %eax 102 ; SKX-NEXT: retq 103 %ef = extractelement <4 x float> %x, i32 3 104 %ei = bitcast float %ef to i32 105 ret i32 %ei 106 } 107 108 define void @test6(<4 x float> %x, float* %out) nounwind { 109 ; KNL-LABEL: test6: 110 ; KNL: ## BB#0: 111 ; KNL-NEXT: vextractps $3, %xmm0, (%rdi) 112 ; KNL-NEXT: retq 113 ; 114 ; SKX-LABEL: test6: 115 ; SKX: ## BB#0: 116 ; SKX-NEXT: vextractps $3, %xmm0, (%rdi) 117 ; SKX-NEXT: retq 118 %ef = extractelement <4 x float> %x, i32 3 119 store float %ef, float* %out, align 4 120 ret void 121 } 122 123 define float @test7(<16 x float> %x, i32 %ind) nounwind { 124 ; KNL-LABEL: test7: 125 ; KNL: ## BB#0: 126 ; KNL-NEXT: vmovd %edi, %xmm1 127 ; KNL-NEXT: vpermps %zmm0, %zmm1, %zmm0 128 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 129 ; KNL-NEXT: retq 130 ; 131 ; SKX-LABEL: test7: 132 ; SKX: ## BB#0: 133 ; SKX-NEXT: vmovd %edi, %xmm1 134 ; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 135 ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 136 ; SKX-NEXT: retq 137 %e = extractelement <16 x float> %x, i32 %ind 138 ret float %e 139 } 140 141 define double @test8(<8 x double> %x, i32 %ind) nounwind { 142 ; KNL-LABEL: test8: 143 ; KNL: ## BB#0: 144 ; KNL-NEXT: movslq %edi, %rax 145 ; KNL-NEXT: vmovq %rax, %xmm1 146 ; KNL-NEXT: vpermpd %zmm0, %zmm1, %zmm0 147 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 148 ; KNL-NEXT: retq 149 ; 150 ; SKX-LABEL: test8: 151 ; SKX: ## BB#0: 152 ; SKX-NEXT: movslq %edi, %rax 153 ; SKX-NEXT: vmovq %rax, %xmm1 154 ; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 155 ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 156 ; SKX-NEXT: retq 157 %e = extractelement <8 x double> %x, i32 %ind 158 ret double %e 159 } 160 161 define float @test9(<8 x float> %x, i32 %ind) nounwind { 162 ; KNL-LABEL: test9: 163 ; KNL: ## BB#0: 164 ; KNL-NEXT: vmovd %edi, %xmm1 165 ; KNL-NEXT: vpermps %ymm0, %ymm1, %ymm0 166 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 167 ; KNL-NEXT: retq 168 ; 169 ; SKX-LABEL: test9: 170 ; SKX: ## BB#0: 171 ; SKX-NEXT: vmovd %edi, %xmm1 172 ; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 173 ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 174 ; SKX-NEXT: retq 175 %e = extractelement <8 x float> %x, i32 %ind 176 ret float %e 177 } 178 179 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 180 ; KNL-LABEL: test10: 181 ; KNL: ## BB#0: 182 ; KNL-NEXT: vmovd %edi, %xmm1 183 ; KNL-NEXT: vpermd %zmm0, %zmm1, %zmm0 184 ; KNL-NEXT: vmovd %xmm0, %eax 185 ; KNL-NEXT: retq 186 ; 187 ; SKX-LABEL: test10: 188 ; SKX: ## BB#0: 189 ; SKX-NEXT: vmovd %edi, %xmm1 190 ; SKX-NEXT: vpermd %zmm0, %zmm1, %zmm0 191 ; SKX-NEXT: vmovd %xmm0, %eax 192 ; SKX-NEXT: retq 193 %e = extractelement <16 x i32> %x, i32 %ind 194 ret i32 %e 195 } 196 197 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { 198 ; KNL-LABEL: test11: 199 ; KNL: ## BB#0: 200 ; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 201 ; KNL-NEXT: kshiftlw $11, %k0, %k0 202 ; KNL-NEXT: kshiftrw $15, %k0, %k0 203 ; KNL-NEXT: kmovw %k0, %eax 204 ; KNL-NEXT: testb %al, %al 205 ; KNL-NEXT: je LBB10_2 206 ; KNL-NEXT: ## BB#1: ## %A 207 ; KNL-NEXT: vmovaps %zmm1, %zmm0 208 ; KNL-NEXT: retq 209 ; KNL-NEXT: LBB10_2: ## %B 210 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 211 ; KNL-NEXT: retq 212 ; 213 ; SKX-LABEL: test11: 214 ; SKX: ## BB#0: 215 ; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 216 ; SKX-NEXT: kshiftlw $11, %k0, %k0 217 ; SKX-NEXT: kshiftrw $15, %k0, %k0 218 ; SKX-NEXT: kmovw %k0, %eax 219 ; SKX-NEXT: testb %al, %al 220 ; SKX-NEXT: je LBB10_2 221 ; SKX-NEXT: ## BB#1: ## %A 222 ; SKX-NEXT: vmovaps %zmm1, %zmm0 223 ; SKX-NEXT: retq 224 ; SKX-NEXT: LBB10_2: ## %B 225 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 226 ; SKX-NEXT: retq 227 %cmp_res = icmp ult <16 x i32> %a, %b 228 %ia = extractelement <16 x i1> %cmp_res, i32 4 229 br i1 %ia, label %A, label %B 230 A: 231 ret <16 x i32>%b 232 B: 233 %c = add <16 x i32>%b, %a 234 ret <16 x i32>%c 235 } 236 237 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) { 238 ; KNL-LABEL: test12: 239 ; KNL: ## BB#0: 240 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 241 ; KNL-NEXT: vpcmpgtq %zmm1, %zmm3, %k1 242 ; KNL-NEXT: kunpckbw %k0, %k1, %k0 243 ; KNL-NEXT: kshiftlw $15, %k0, %k0 244 ; KNL-NEXT: kshiftrw $15, %k0, %k0 245 ; KNL-NEXT: kmovw %k0, %eax 246 ; KNL-NEXT: testb %al, %al 247 ; KNL-NEXT: cmoveq %rsi, %rdi 248 ; KNL-NEXT: movq %rdi, %rax 249 ; KNL-NEXT: retq 250 ; 251 ; SKX-LABEL: test12: 252 ; SKX: ## BB#0: 253 ; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 254 ; SKX-NEXT: vpcmpgtq %zmm1, %zmm3, %k1 255 ; SKX-NEXT: kunpckbw %k0, %k1, %k0 256 ; SKX-NEXT: kshiftlw $15, %k0, %k0 257 ; SKX-NEXT: kshiftrw $15, %k0, %k0 258 ; SKX-NEXT: kmovw %k0, %eax 259 ; SKX-NEXT: testb %al, %al 260 ; SKX-NEXT: cmoveq %rsi, %rdi 261 ; SKX-NEXT: movq %rdi, %rax 262 ; SKX-NEXT: retq 263 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 264 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 265 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 266 ret i64 %res 267 } 268 269 define i16 @test13(i32 %a, i32 %b) { 270 ; KNL-LABEL: test13: 271 ; KNL: ## BB#0: 272 ; KNL-NEXT: cmpl %esi, %edi 273 ; KNL-NEXT: setb %al 274 ; KNL-NEXT: kmovw %eax, %k0 275 ; KNL-NEXT: movw $-4, %ax 276 ; KNL-NEXT: kmovw %eax, %k1 277 ; KNL-NEXT: korw %k0, %k1, %k0 278 ; KNL-NEXT: kmovw %k0, %eax 279 ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 280 ; KNL-NEXT: retq 281 ; 282 ; SKX-LABEL: test13: 283 ; SKX: ## BB#0: 284 ; SKX-NEXT: cmpl %esi, %edi 285 ; SKX-NEXT: setb %al 286 ; SKX-NEXT: kmovw %eax, %k0 287 ; SKX-NEXT: movw $-4, %ax 288 ; SKX-NEXT: kmovw %eax, %k1 289 ; SKX-NEXT: korw %k0, %k1, %k0 290 ; SKX-NEXT: kmovw %k0, %eax 291 ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 292 ; SKX-NEXT: retq 293 %cmp_res = icmp ult i32 %a, %b 294 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 295 %res = bitcast <16 x i1> %maskv to i16 296 ret i16 %res 297 } 298 299 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { 300 ; KNL-LABEL: test14: 301 ; KNL: ## BB#0: 302 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 303 ; KNL-NEXT: kshiftlw $11, %k0, %k0 304 ; KNL-NEXT: kshiftrw $15, %k0, %k0 305 ; KNL-NEXT: kmovw %k0, %eax 306 ; KNL-NEXT: testb %al, %al 307 ; KNL-NEXT: cmoveq %rsi, %rdi 308 ; KNL-NEXT: movq %rdi, %rax 309 ; KNL-NEXT: retq 310 ; 311 ; SKX-LABEL: test14: 312 ; SKX: ## BB#0: 313 ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 314 ; SKX-NEXT: kshiftlb $3, %k0, %k0 315 ; SKX-NEXT: kshiftrb $7, %k0, %k0 316 ; SKX-NEXT: kmovw %k0, %eax 317 ; SKX-NEXT: testb %al, %al 318 ; SKX-NEXT: cmoveq %rsi, %rdi 319 ; SKX-NEXT: movq %rdi, %rax 320 ; SKX-NEXT: retq 321 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 322 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 323 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 324 ret i64 %res 325 } 326 327 define i16 @test15(i1 *%addr) { 328 ; KNL-LABEL: test15: 329 ; KNL: ## BB#0: 330 ; KNL-NEXT: movb (%rdi), %al 331 ; KNL-NEXT: xorl %ecx, %ecx 332 ; KNL-NEXT: testb %al, %al 333 ; KNL-NEXT: movw $-1, %ax 334 ; KNL-NEXT: cmovew %cx, %ax 335 ; KNL-NEXT: retq 336 ; 337 ; SKX-LABEL: test15: 338 ; SKX: ## BB#0: 339 ; SKX-NEXT: movb (%rdi), %al 340 ; SKX-NEXT: xorl %ecx, %ecx 341 ; SKX-NEXT: testb %al, %al 342 ; SKX-NEXT: movw $-1, %ax 343 ; SKX-NEXT: cmovew %cx, %ax 344 ; SKX-NEXT: retq 345 %x = load i1 , i1 * %addr, align 1 346 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 347 %x2 = bitcast <16 x i1>%x1 to i16 348 ret i16 %x2 349 } 350 351 define i16 @test16(i1 *%addr, i16 %a) { 352 ; KNL-LABEL: test16: 353 ; KNL: ## BB#0: 354 ; KNL-NEXT: movzbl (%rdi), %eax 355 ; KNL-NEXT: andl $1, %eax 356 ; KNL-NEXT: kmovw %eax, %k0 357 ; KNL-NEXT: kmovw %esi, %k1 358 ; KNL-NEXT: kshiftlw $10, %k0, %k0 359 ; KNL-NEXT: korw %k0, %k1, %k0 360 ; KNL-NEXT: kmovw %k0, %eax 361 ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 362 ; KNL-NEXT: retq 363 ; 364 ; SKX-LABEL: test16: 365 ; SKX: ## BB#0: 366 ; SKX-NEXT: movzbl (%rdi), %eax 367 ; SKX-NEXT: andl $1, %eax 368 ; SKX-NEXT: kmovd %eax, %k0 369 ; SKX-NEXT: kmovw %esi, %k1 370 ; SKX-NEXT: kshiftlw $10, %k0, %k0 371 ; SKX-NEXT: korw %k0, %k1, %k0 372 ; SKX-NEXT: kmovw %k0, %eax 373 ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 374 ; SKX-NEXT: retq 375 %x = load i1 , i1 * %addr, align 128 376 %a1 = bitcast i16 %a to <16 x i1> 377 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 378 %x2 = bitcast <16 x i1>%x1 to i16 379 ret i16 %x2 380 } 381 382 define i8 @test17(i1 *%addr, i8 %a) { 383 ; KNL-LABEL: test17: 384 ; KNL: ## BB#0: 385 ; KNL-NEXT: movzbl (%rdi), %eax 386 ; KNL-NEXT: andl $1, %eax 387 ; KNL-NEXT: kmovw %eax, %k0 388 ; KNL-NEXT: kmovw %esi, %k1 389 ; KNL-NEXT: kshiftlw $4, %k0, %k0 390 ; KNL-NEXT: korw %k0, %k1, %k0 391 ; KNL-NEXT: kmovw %k0, %eax 392 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 393 ; KNL-NEXT: retq 394 ; 395 ; SKX-LABEL: test17: 396 ; SKX: ## BB#0: 397 ; SKX-NEXT: movzbl (%rdi), %eax 398 ; SKX-NEXT: andl $1, %eax 399 ; SKX-NEXT: kmovd %eax, %k0 400 ; SKX-NEXT: kmovb %esi, %k1 401 ; SKX-NEXT: kshiftlb $4, %k0, %k0 402 ; SKX-NEXT: korb %k0, %k1, %k0 403 ; SKX-NEXT: kmovb %k0, %eax 404 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 405 ; SKX-NEXT: retq 406 %x = load i1 , i1 * %addr, align 128 407 %a1 = bitcast i8 %a to <8 x i1> 408 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4 409 %x2 = bitcast <8 x i1>%x1 to i8 410 ret i8 %x2 411 } 412 413 define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) { 414 ; KNL-LABEL: extract_v8i64: 415 ; KNL: ## BB#0: 416 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 417 ; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 418 ; KNL-NEXT: vpextrq $1, %xmm0, (%rdi) 419 ; KNL-NEXT: retq 420 ; 421 ; SKX-LABEL: extract_v8i64: 422 ; SKX: ## BB#0: 423 ; SKX-NEXT: vpextrq $1, %xmm0, %rax 424 ; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm0 425 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 426 ; SKX-NEXT: retq 427 %r1 = extractelement <8 x i64> %x, i32 1 428 %r2 = extractelement <8 x i64> %x, i32 3 429 store i64 %r2, i64* %dst, align 1 430 ret i64 %r1 431 } 432 433 define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) { 434 ; KNL-LABEL: extract_v4i64: 435 ; KNL: ## BB#0: 436 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 437 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 438 ; KNL-NEXT: vpextrq $1, %xmm0, (%rdi) 439 ; KNL-NEXT: retq 440 ; 441 ; SKX-LABEL: extract_v4i64: 442 ; SKX: ## BB#0: 443 ; SKX-NEXT: vpextrq $1, %xmm0, %rax 444 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 445 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 446 ; SKX-NEXT: retq 447 %r1 = extractelement <4 x i64> %x, i32 1 448 %r2 = extractelement <4 x i64> %x, i32 3 449 store i64 %r2, i64* %dst, align 1 450 ret i64 %r1 451 } 452 453 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 454 ; KNL-LABEL: extract_v2i64: 455 ; KNL: ## BB#0: 456 ; KNL-NEXT: vmovq %xmm0, %rax 457 ; KNL-NEXT: vpextrq $1, %xmm0, (%rdi) 458 ; KNL-NEXT: retq 459 ; 460 ; SKX-LABEL: extract_v2i64: 461 ; SKX: ## BB#0: 462 ; SKX-NEXT: vmovq %xmm0, %rax 463 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) 464 ; SKX-NEXT: retq 465 %r1 = extractelement <2 x i64> %x, i32 0 466 %r2 = extractelement <2 x i64> %x, i32 1 467 store i64 %r2, i64* %dst, align 1 468 ret i64 %r1 469 } 470 471 define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { 472 ; KNL-LABEL: extract_v16i32: 473 ; KNL: ## BB#0: 474 ; KNL-NEXT: vpextrd $1, %xmm0, %eax 475 ; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 476 ; KNL-NEXT: vpextrd $1, %xmm0, (%rdi) 477 ; KNL-NEXT: retq 478 ; 479 ; SKX-LABEL: extract_v16i32: 480 ; SKX: ## BB#0: 481 ; SKX-NEXT: vpextrd $1, %xmm0, %eax 482 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 483 ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 484 ; SKX-NEXT: retq 485 %r1 = extractelement <16 x i32> %x, i32 1 486 %r2 = extractelement <16 x i32> %x, i32 5 487 store i32 %r2, i32* %dst, align 1 488 ret i32 %r1 489 } 490 491 define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { 492 ; KNL-LABEL: extract_v8i32: 493 ; KNL: ## BB#0: 494 ; KNL-NEXT: vpextrd $1, %xmm0, %eax 495 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 496 ; KNL-NEXT: vpextrd $1, %xmm0, (%rdi) 497 ; KNL-NEXT: retq 498 ; 499 ; SKX-LABEL: extract_v8i32: 500 ; SKX: ## BB#0: 501 ; SKX-NEXT: vpextrd $1, %xmm0, %eax 502 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 503 ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) 504 ; SKX-NEXT: retq 505 %r1 = extractelement <8 x i32> %x, i32 1 506 %r2 = extractelement <8 x i32> %x, i32 5 507 store i32 %r2, i32* %dst, align 1 508 ret i32 %r1 509 } 510 511 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 512 ; KNL-LABEL: extract_v4i32: 513 ; KNL: ## BB#0: 514 ; KNL-NEXT: vpextrd $1, %xmm0, %eax 515 ; KNL-NEXT: vpextrd $3, %xmm0, (%rdi) 516 ; KNL-NEXT: retq 517 ; 518 ; SKX-LABEL: extract_v4i32: 519 ; SKX: ## BB#0: 520 ; SKX-NEXT: vpextrd $1, %xmm0, %eax 521 ; SKX-NEXT: vpextrd $3, %xmm0, (%rdi) 522 ; SKX-NEXT: retq 523 %r1 = extractelement <4 x i32> %x, i32 1 524 %r2 = extractelement <4 x i32> %x, i32 3 525 store i32 %r2, i32* %dst, align 1 526 ret i32 %r1 527 } 528 529 define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) { 530 ; KNL-LABEL: extract_v32i16: 531 ; KNL: ## BB#0: 532 ; KNL-NEXT: vpextrw $1, %xmm0, %eax 533 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 534 ; KNL-NEXT: vpextrw $1, %xmm0, (%rdi) 535 ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 536 ; KNL-NEXT: retq 537 ; 538 ; SKX-LABEL: extract_v32i16: 539 ; SKX: ## BB#0: 540 ; SKX-NEXT: vpextrw $1, %xmm0, %eax 541 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 542 ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 543 ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 544 ; SKX-NEXT: retq 545 %r1 = extractelement <32 x i16> %x, i32 1 546 %r2 = extractelement <32 x i16> %x, i32 9 547 store i16 %r2, i16* %dst, align 1 548 ret i16 %r1 549 } 550 551 define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) { 552 ; KNL-LABEL: extract_v16i16: 553 ; KNL: ## BB#0: 554 ; KNL-NEXT: vpextrw $1, %xmm0, %eax 555 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 556 ; KNL-NEXT: vpextrw $1, %xmm0, (%rdi) 557 ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 558 ; KNL-NEXT: retq 559 ; 560 ; SKX-LABEL: extract_v16i16: 561 ; SKX: ## BB#0: 562 ; SKX-NEXT: vpextrw $1, %xmm0, %eax 563 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 564 ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) 565 ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 566 ; SKX-NEXT: retq 567 %r1 = extractelement <16 x i16> %x, i32 1 568 %r2 = extractelement <16 x i16> %x, i32 9 569 store i16 %r2, i16* %dst, align 1 570 ret i16 %r1 571 } 572 573 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 574 ; KNL-LABEL: extract_v8i16: 575 ; KNL: ## BB#0: 576 ; KNL-NEXT: vpextrw $1, %xmm0, %eax 577 ; KNL-NEXT: vpextrw $3, %xmm0, (%rdi) 578 ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 579 ; KNL-NEXT: retq 580 ; 581 ; SKX-LABEL: extract_v8i16: 582 ; SKX: ## BB#0: 583 ; SKX-NEXT: vpextrw $1, %xmm0, %eax 584 ; SKX-NEXT: vpextrw $3, %xmm0, (%rdi) 585 ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 586 ; SKX-NEXT: retq 587 %r1 = extractelement <8 x i16> %x, i32 1 588 %r2 = extractelement <8 x i16> %x, i32 3 589 store i16 %r2, i16* %dst, align 1 590 ret i16 %r1 591 } 592 593 define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) { 594 ; KNL-LABEL: extract_v64i8: 595 ; KNL: ## BB#0: 596 ; KNL-NEXT: vpextrb $1, %xmm0, %eax 597 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 598 ; KNL-NEXT: vpextrb $1, %xmm0, (%rdi) 599 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 600 ; KNL-NEXT: retq 601 ; 602 ; SKX-LABEL: extract_v64i8: 603 ; SKX: ## BB#0: 604 ; SKX-NEXT: vpextrb $1, %xmm0, %eax 605 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0 606 ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 607 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 608 ; SKX-NEXT: retq 609 %r1 = extractelement <64 x i8> %x, i32 1 610 %r2 = extractelement <64 x i8> %x, i32 17 611 store i8 %r2, i8* %dst, align 1 612 ret i8 %r1 613 } 614 615 define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) { 616 ; KNL-LABEL: extract_v32i8: 617 ; KNL: ## BB#0: 618 ; KNL-NEXT: vpextrb $1, %xmm0, %eax 619 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 620 ; KNL-NEXT: vpextrb $1, %xmm0, (%rdi) 621 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 622 ; KNL-NEXT: retq 623 ; 624 ; SKX-LABEL: extract_v32i8: 625 ; SKX: ## BB#0: 626 ; SKX-NEXT: vpextrb $1, %xmm0, %eax 627 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 628 ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) 629 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 630 ; SKX-NEXT: retq 631 %r1 = extractelement <32 x i8> %x, i32 1 632 %r2 = extractelement <32 x i8> %x, i32 17 633 store i8 %r2, i8* %dst, align 1 634 ret i8 %r1 635 } 636 637 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 638 ; KNL-LABEL: extract_v16i8: 639 ; KNL: ## BB#0: 640 ; KNL-NEXT: vpextrb $1, %xmm0, %eax 641 ; KNL-NEXT: vpextrb $3, %xmm0, (%rdi) 642 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 643 ; KNL-NEXT: retq 644 ; 645 ; SKX-LABEL: extract_v16i8: 646 ; SKX: ## BB#0: 647 ; SKX-NEXT: vpextrb $1, %xmm0, %eax 648 ; SKX-NEXT: vpextrb $3, %xmm0, (%rdi) 649 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 650 ; SKX-NEXT: retq 651 %r1 = extractelement <16 x i8> %x, i32 1 652 %r2 = extractelement <16 x i8> %x, i32 3 653 store i8 %r2, i8* %dst, align 1 654 ret i8 %r1 655 } 656 657 define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) { 658 ; KNL-LABEL: insert_v8i64: 659 ; KNL: ## BB#0: 660 ; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 661 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 662 ; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm1 663 ; KNL-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 664 ; KNL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 665 ; KNL-NEXT: retq 666 ; 667 ; SKX-LABEL: insert_v8i64: 668 ; SKX: ## BB#0: 669 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 670 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 671 ; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm1 672 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 673 ; SKX-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 674 ; SKX-NEXT: retq 675 %val = load i64, i64* %ptr 676 %r1 = insertelement <8 x i64> %x, i64 %val, i32 1 677 %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3 678 ret <8 x i64> %r2 679 } 680 681 define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) { 682 ; KNL-LABEL: insert_v4i64: 683 ; KNL: ## BB#0: 684 ; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 685 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 686 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 687 ; KNL-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 688 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 689 ; KNL-NEXT: retq 690 ; 691 ; SKX-LABEL: insert_v4i64: 692 ; SKX: ## BB#0: 693 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 694 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 695 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm1 696 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 697 ; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 698 ; SKX-NEXT: retq 699 %val = load i64, i64* %ptr 700 %r1 = insertelement <4 x i64> %x, i64 %val, i32 1 701 %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3 702 ret <4 x i64> %r2 703 } 704 705 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 706 ; KNL-LABEL: insert_v2i64: 707 ; KNL: ## BB#0: 708 ; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0 709 ; KNL-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0 710 ; KNL-NEXT: retq 711 ; 712 ; SKX-LABEL: insert_v2i64: 713 ; SKX: ## BB#0: 714 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0 715 ; SKX-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0 716 ; SKX-NEXT: retq 717 %val = load i64, i64* %ptr 718 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 719 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3 720 ret <2 x i64> %r2 721 } 722 723 define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) { 724 ; KNL-LABEL: insert_v16i32: 725 ; KNL: ## BB#0: 726 ; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 727 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 728 ; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm1 729 ; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 730 ; KNL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 731 ; KNL-NEXT: retq 732 ; 733 ; SKX-LABEL: insert_v16i32: 734 ; SKX: ## BB#0: 735 ; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 736 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 737 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 738 ; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 739 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 740 ; SKX-NEXT: retq 741 %val = load i32, i32* %ptr 742 %r1 = insertelement <16 x i32> %x, i32 %val, i32 1 743 %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5 744 ret <16 x i32> %r2 745 } 746 747 define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) { 748 ; KNL-LABEL: insert_v8i32: 749 ; KNL: ## BB#0: 750 ; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 751 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 752 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 753 ; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 754 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 755 ; KNL-NEXT: retq 756 ; 757 ; SKX-LABEL: insert_v8i32: 758 ; SKX: ## BB#0: 759 ; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 760 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 761 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 762 ; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 763 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 764 ; SKX-NEXT: retq 765 %val = load i32, i32* %ptr 766 %r1 = insertelement <8 x i32> %x, i32 %val, i32 1 767 %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5 768 ret <8 x i32> %r2 769 } 770 771 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 772 ; KNL-LABEL: insert_v4i32: 773 ; KNL: ## BB#0: 774 ; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 775 ; KNL-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 776 ; KNL-NEXT: retq 777 ; 778 ; SKX-LABEL: insert_v4i32: 779 ; SKX: ## BB#0: 780 ; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 781 ; SKX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 782 ; SKX-NEXT: retq 783 %val = load i32, i32* %ptr 784 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 785 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 786 ret <4 x i32> %r2 787 } 788 789 define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) { 790 ; KNL-LABEL: insert_v32i16: 791 ; KNL: ## BB#0: 792 ; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm2 793 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 794 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 795 ; KNL-NEXT: vpinsrw $1, %edi, %xmm2, %xmm2 796 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 797 ; KNL-NEXT: retq 798 ; 799 ; SKX-LABEL: insert_v32i16: 800 ; SKX: ## BB#0: 801 ; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 802 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 803 ; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1 804 ; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 805 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 806 ; SKX-NEXT: retq 807 %val = load i16, i16* %ptr 808 %r1 = insertelement <32 x i16> %x, i16 %val, i32 1 809 %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9 810 ret <32 x i16> %r2 811 } 812 813 define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) { 814 ; KNL-LABEL: insert_v16i16: 815 ; KNL: ## BB#0: 816 ; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 817 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 818 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 819 ; KNL-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 820 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 821 ; KNL-NEXT: retq 822 ; 823 ; SKX-LABEL: insert_v16i16: 824 ; SKX: ## BB#0: 825 ; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 826 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 827 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 828 ; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 829 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 830 ; SKX-NEXT: retq 831 %val = load i16, i16* %ptr 832 %r1 = insertelement <16 x i16> %x, i16 %val, i32 1 833 %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9 834 ret <16 x i16> %r2 835 } 836 837 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 838 ; KNL-LABEL: insert_v8i16: 839 ; KNL: ## BB#0: 840 ; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 841 ; KNL-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 842 ; KNL-NEXT: retq 843 ; 844 ; SKX-LABEL: insert_v8i16: 845 ; SKX: ## BB#0: 846 ; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 847 ; SKX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 848 ; SKX-NEXT: retq 849 %val = load i16, i16* %ptr 850 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 851 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 852 ret <8 x i16> %r2 853 } 854 855 define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) { 856 ; KNL-LABEL: insert_v64i8: 857 ; KNL: ## BB#0: 858 ; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm2 859 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 860 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 861 ; KNL-NEXT: vpinsrb $2, %edi, %xmm2, %xmm2 862 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 863 ; KNL-NEXT: retq 864 ; 865 ; SKX-LABEL: insert_v64i8: 866 ; SKX: ## BB#0: 867 ; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 868 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 869 ; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 870 ; SKX-NEXT: vpinsrb $2, %edi, %xmm1, %xmm1 871 ; SKX-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 872 ; SKX-NEXT: retq 873 %val = load i8, i8* %ptr 874 %r1 = insertelement <64 x i8> %x, i8 %val, i32 1 875 %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50 876 ret <64 x i8> %r2 877 } 878 879 define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) { 880 ; KNL-LABEL: insert_v32i8: 881 ; KNL: ## BB#0: 882 ; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 883 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 884 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 885 ; KNL-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 886 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 887 ; KNL-NEXT: retq 888 ; 889 ; SKX-LABEL: insert_v32i8: 890 ; SKX: ## BB#0: 891 ; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 892 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 893 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 894 ; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 895 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 896 ; SKX-NEXT: retq 897 %val = load i8, i8* %ptr 898 %r1 = insertelement <32 x i8> %x, i8 %val, i32 1 899 %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17 900 ret <32 x i8> %r2 901 } 902 903 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 904 ; KNL-LABEL: insert_v16i8: 905 ; KNL: ## BB#0: 906 ; KNL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 907 ; KNL-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 908 ; KNL-NEXT: retq 909 ; 910 ; SKX-LABEL: insert_v16i8: 911 ; SKX: ## BB#0: 912 ; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 913 ; SKX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 914 ; SKX-NEXT: retq 915 %val = load i8, i8* %ptr 916 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 917 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 918 ret <16 x i8> %r2 919 } 920 921 define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) { 922 ; KNL-LABEL: test_insert_128_v8i64: 923 ; KNL: ## BB#0: 924 ; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 925 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 926 ; KNL-NEXT: retq 927 ; 928 ; SKX-LABEL: test_insert_128_v8i64: 929 ; SKX: ## BB#0: 930 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 931 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 932 ; SKX-NEXT: retq 933 %r = insertelement <8 x i64> %x, i64 %y, i32 1 934 ret <8 x i64> %r 935 } 936 937 define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) { 938 ; KNL-LABEL: test_insert_128_v16i32: 939 ; KNL: ## BB#0: 940 ; KNL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 941 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 942 ; KNL-NEXT: retq 943 ; 944 ; SKX-LABEL: test_insert_128_v16i32: 945 ; SKX: ## BB#0: 946 ; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 947 ; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 948 ; SKX-NEXT: retq 949 %r = insertelement <16 x i32> %x, i32 %y, i32 1 950 ret <16 x i32> %r 951 } 952 953 define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) { 954 ; KNL-LABEL: test_insert_128_v8f64: 955 ; KNL: ## BB#0: 956 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] 957 ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 958 ; KNL-NEXT: retq 959 ; 960 ; SKX-LABEL: test_insert_128_v8f64: 961 ; SKX: ## BB#0: 962 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] 963 ; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0 964 ; SKX-NEXT: retq 965 %r = insertelement <8 x double> %x, double %y, i32 1 966 ret <8 x double> %r 967 } 968 969 define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) { 970 ; KNL-LABEL: test_insert_128_v16f32: 971 ; KNL: ## BB#0: 972 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 973 ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 974 ; KNL-NEXT: retq 975 ; 976 ; SKX-LABEL: test_insert_128_v16f32: 977 ; SKX: ## BB#0: 978 ; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 979 ; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 980 ; SKX-NEXT: retq 981 %r = insertelement <16 x float> %x, float %y, i32 1 982 ret <16 x float> %r 983 } 984 985 define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) { 986 ; KNL-LABEL: test_insert_128_v16i16: 987 ; KNL: ## BB#0: 988 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 989 ; KNL-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 990 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 991 ; KNL-NEXT: retq 992 ; 993 ; SKX-LABEL: test_insert_128_v16i16: 994 ; SKX: ## BB#0: 995 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 996 ; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 997 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 998 ; SKX-NEXT: retq 999 %r = insertelement <16 x i16> %x, i16 %y, i32 10 1000 ret <16 x i16> %r 1001 } 1002 1003 define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) { 1004 ; KNL-LABEL: test_insert_128_v32i8: 1005 ; KNL: ## BB#0: 1006 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 1007 ; KNL-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1008 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1009 ; KNL-NEXT: retq 1010 ; 1011 ; SKX-LABEL: test_insert_128_v32i8: 1012 ; SKX: ## BB#0: 1013 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 1014 ; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1015 ; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 1016 ; SKX-NEXT: retq 1017 %r = insertelement <32 x i8> %x, i8 %y, i32 20 1018 ret <32 x i8> %r 1019 } 1020