Lines Matching full:next
10 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
11 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
12 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX1-NEXT: retq
17 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
18 ; AVX2-NEXT: retq
26 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
27 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
28 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
29 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
30 ; AVX1-NEXT: retq
34 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
35 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
36 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
37 ; AVX2-NEXT: retq
45 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
46 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
47 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
48 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
49 ; AVX1-NEXT: retq
53 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
54 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
55 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
56 ; AVX2-NEXT: retq
64 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
65 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
66 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
67 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
68 ; AVX1-NEXT: retq
72 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
73 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
74 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
75 ; AVX2-NEXT: retq
83 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
84 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
85 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
86 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
87 ; AVX1-NEXT: retq
91 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
92 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
93 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
94 ; AVX2-NEXT: retq
102 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
103 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
104 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
105 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
106 ; AVX1-NEXT: retq
110 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
111 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
112 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
113 ; AVX2-NEXT: retq
121 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
122 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
123 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
124 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
125 ; AVX1-NEXT: retq
129 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
130 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
131 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
132 ; AVX2-NEXT: retq
140 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
141 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
142 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
143 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
144 ; AVX1-NEXT: retq
148 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
149 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
150 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
151 ; AVX2-NEXT: retq
159 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
160 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
161 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
162 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
163 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
164 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
165 ; AVX1-NEXT: retq
169 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
170 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
171 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
172 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
173 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
174 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
175 ; AVX2-NEXT: retq
183 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
184 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
185 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
186 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
187 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
188 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
189 ; AVX1-NEXT: retq
193 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
194 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
195 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
196 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
197 ; AVX2-NEXT: retq
205 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
206 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
207 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
208 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
209 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
210 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
211 ; AVX1-NEXT: retq
215 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
216 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
217 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
218 ; AVX2-NEXT: retq
226 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
227 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
228 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
229 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
230 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
231 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
232 ; AVX1-NEXT: retq
236 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
237 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
238 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
239 ; AVX2-NEXT: retq
247 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
248 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
249 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
250 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
251 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
252 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
253 ; AVX1-NEXT: retq
257 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
258 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
259 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
260 ; AVX2-NEXT: retq
268 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
269 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
270 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
271 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
272 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
273 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
274 ; AVX1-NEXT: retq
278 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
279 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
280 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
281 ; AVX2-NEXT: retq
289 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
290 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
291 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
292 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
293 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
294 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
295 ; AVX1-NEXT: retq
299 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
300 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
301 NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
302 ; AVX2-NEXT: retq
310 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
311 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
312 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
313 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
314 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
315 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
316 ; AVX1-NEXT: retq
320 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
321 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
322 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
323 ; AVX2-NEXT: retq
331 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
332 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
333 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
334 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
335 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
336 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
337 ; AVX1-NEXT: retq
341 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
342 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
343 ; AVX2-NEXT: retq
351 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7]
352 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
354 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
355 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
356 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
357 ; AVX1-NEXT: retq
361 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
362 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,3,3,6,6,7,7]
363 ; AVX2-NEXT: retq
371 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
372 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
373 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
374 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
375 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
376 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
377 ; AVX1-NEXT: retq
381 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
382 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
383 ; AVX2-NEXT: retq
391 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
392 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
393 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
394 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
395 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
396 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
397 ; AVX1-NEXT: retq
401 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
402 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
403 ; AVX2-NEXT: retq
411 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
412 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
413 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
414 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
415 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
416 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
417 ; AVX1-NEXT: retq
421 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
422 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
423 ; AVX2-NEXT: retq
431 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
432 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
433 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
434 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
435 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
436 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
437 ; AVX1-NEXT: retq
441 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
442 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
443 ; AVX2-NEXT: retq
451 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
452 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
453 ; AVX1-NEXT: retq
457 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
458 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
459 ; AVX2-NEXT: retq
467 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
468 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
469 ; AVX1-NEXT: retq
473 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
474 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
475 ; AVX2-NEXT: retq
483 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
484 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
485 ; AVX1-NEXT: retq
489 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
490 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
491 ; AVX2-NEXT: retq
499 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
500 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
501 ; AVX1-NEXT: retq
505 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
506 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
507 ; AVX2-NEXT: retq
515 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
516 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
517 ; AVX1-NEXT: retq
521 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
522 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
523 ; AVX2-NEXT: retq
531 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
532 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
533 ; AVX1-NEXT: retq
537 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
538 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
539 ; AVX2-NEXT: retq
547 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
548 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
549 ; AVX1-NEXT: retq
553 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
554 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
555 ; AVX2-NEXT: retq
563 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
564 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
565 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
566 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
567 ; AVX1-NEXT: retq
571 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
572 ; AVX2-NEXT: retq
580 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
581 ; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
582 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
583 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
584 ; AVX1-NEXT: retq
588 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
589 ; AVX2-NEXT: retq
597 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
598 ; AVX1-NEXT: retq
602 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
603 ; AVX2-NEXT: retq
611 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
612 ; AVX1-NEXT: retq
616 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
617 ; AVX2-NEXT: retq
625 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,0]
626 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
627 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
628 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
629 ; AVX1-NEXT: retq
633 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
634 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
635 ; AVX2-NEXT: retq
643 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535]
644 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
645 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
646 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
647 ; AVX1-NEXT: retq
651 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
652 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
653 ; AVX2-NEXT: retq
661 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,0,65535,0,65535,0,65535,0,65535]
662 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
663 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
664 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
665 ; AVX1-NEXT: retq
669 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
670 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
671 ; AVX2-NEXT: retq
679 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,0,65535,0,65535,0,65535,65535,0,65535,0,65535,0,65535,0]
680 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
681 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
682 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
683 ; AVX1-NEXT: retq
687 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
688 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
689 ; AVX2-NEXT: retq
697 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
698 ; AVX1-NEXT: retq
702 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
703 ; AVX2-NEXT: retq
711 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
712 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
713 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
714 ; AVX1-NEXT: retq
718 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
719 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
720 ; AVX2-NEXT: retq
728 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
729 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
730 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
731 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
732 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
733 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
734 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
735 ; AVX1-NEXT: retq
739 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
740 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
741 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,1,1,4,4,5,5]
742 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
743 ; AVX2-NEXT: retq
751 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
752 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
753 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
754 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
755 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
756 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
757 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
758 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
759 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
760 ; AVX1-NEXT: retq
764 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
765 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
766 ; AVX2-NEXT: retq
774 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
775 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
776 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
777 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
778 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
779 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
780 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
781 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
782 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
783 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
784 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
785 ; AVX1-NEXT: retq
789 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
790 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
791 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
792 ; AVX2-NEXT: retq
800 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
801 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
802 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
803 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
804 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
805 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
806 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
807 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
808 ; AVX1-NEXT: retq
812 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
813 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
814 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
815 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
816 ; AVX2-NEXT: retq
824 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
825 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
826 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
827 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
828 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
829 ; AVX1-NEXT: retq
833 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
834 ; AVX2-NEXT: retq
842 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
843 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
844 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
845 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
846 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
847 ; AVX1-NEXT: retq
851 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
852 ; AVX2-NEXT: retq
860 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
861 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
862 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
863 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
864 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
865 ; AVX1-NEXT: retq
869 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
870 ; AVX2-NEXT: retq
878 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
879 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
880 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
881 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
882 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
883 ; AVX1-NEXT: retq
887 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
888 ; AVX2-NEXT: retq
896 ; AVX1-NEXT
897 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
898 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
899 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
900 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
901 ; AVX1-NEXT: retq
905 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
906 ; AVX2-NEXT: retq
914 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
915 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
916 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
917 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
918 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
919 ; AVX1-NEXT: retq
923 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
924 ; AVX2-NEXT: retq
932 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
933 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
934 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
935 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
936 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
937 ; AVX1-NEXT: retq
941 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
942 ; AVX2-NEXT: retq
950 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
951 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
952 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
953 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
954 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
955 ; AVX1-NEXT: retq
959 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
960 ; AVX2-NEXT: retq
968 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
969 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
970 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
971 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
972 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
973 ; AVX1-NEXT: retq
977 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
978 ; AVX2-NEXT: retq
986 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
987 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
988 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
989 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
990 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
991 ; AVX1-NEXT: retq
995 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31]
996 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
997 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
998 ; AVX2-NEXT: retq
1006 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1007 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1008 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1009 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1010 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1011 ; AVX1-NEXT: retq
1015 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
1016 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
1017 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1018 ; AVX2-NEXT: retq
1026 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
1027 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1028 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1029 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1030 ; AVX1-NEXT: retq
1034 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
1035 ; AVX2-NEXT: retq
1043 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
1044 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1045 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
1046 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1047 ; AVX1-NEXT: retq
1051 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
1052 ; AVX2-NEXT: retq
1060 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
1061 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1062 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
1063 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1064 ; AVX1-NEXT: retq
1068 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
1069 ; AVX2-NEXT: retq
1077 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1078 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1079 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
1080 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1081 ; AVX1-NEXT: retq
1085 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
1086 ; AVX2-NEXT: retq
1094 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1095 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1096 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
1097 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1098 ; AVX1-NEXT: retq
1102 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
1103 ; AVX2-NEXT: retq
1111 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1113 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
1114 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1115 ; AVX1-NEXT: retq
1119 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
1120 ; AVX2-NEXT: retq
1128 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1129 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1130 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
1131 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1132 ; AVX1-NEXT: retq
1136 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
1137 ; AVX2-NEXT: retq
1145 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
1146 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
1147 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1148 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
1149 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1150 ; AVX1-NEXT: retq
1154 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
1155 ; AVX2-NEXT: retq
1163 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1164 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1165 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1166 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1167 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1168 ; AVX1-NEXT: retq
1172 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
1173 ; AVX2-NEXT: retq
1181 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
1182 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1183 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1184 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
1185 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1186 ; AVX1-NEXT: retq
1190 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
1191 ; AVX2-NEXT: retq
1199 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1200 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1201 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
1202 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1203 ; AVX1-NEXT: retq
1207 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
1208 ; AVX2-NEXT: retq
1216 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
1217 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
1218 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1219 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
1220 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1221 ; AVX1-NEXT: retq
1225 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
1226 ; AVX2-NEXT: retq
1234 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3]
1235 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1237 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
1238 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1239 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1240 ; AVX1-NEXT: retq
1244 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
1245 ; AVX2-NEXT: retq
1253 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1254 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1255 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1256 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1257 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1258 ; AVX1-NEXT: retq
1262 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1263 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1264 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1265 ; AVX2-NEXT: retq
1273 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1274 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1275 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1276 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1277 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1278 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1279 ; AVX1-NEXT: retq
1283 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1284 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1285 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1286 ; AVX2-NEXT: retq
1294 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1295 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1296 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1297 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1298 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1299 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1300 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1301 ; AVX1-NEXT: retq
1305 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1306 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1307 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1308 ; AVX2-NEXT: retq
1316 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1317 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1318 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1319 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1320 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1321 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1322 ; AVX1-NEXT: retq
1326 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1327 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1328 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1329 ; AVX2-NEXT: retq
1337 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1338 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1339 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1340 ; AVX1-NEXT: retq
1344 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1345 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1346 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1347 ; AVX2-NEXT: retq
1355 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1356 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1357 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1358 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1359 ; AVX1-NEXT: retq
1363 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
1364 ; AVX2-NEXT: retq
1372 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1373 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1374 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1375 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1376 ; AVX1-NEXT: retq
1380 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
1381 ; AVX2-NEXT: retq
1393 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
1394 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1395 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
1396 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1397 ; AVX1-NEXT: retq
1401 ; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
1402 ; AVX2-NEXT: retq
1410 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
1411 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1412 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
1413 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1414 ; AVX1-NEXT: retq
1418 ; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
1419 ; AVX2-NEXT: retq
1427 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
1428 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1429 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
1430 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1431 ; AVX1-NEXT: retq
1435 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
1436 ; AVX2-NEXT: retq
1444 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
1445 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1446 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1447 ; AVX1-NEXT: retq
1451 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1452 ; AVX2-NEXT: retq
1460 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1461 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1462 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1463 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1464 ; AVX1-NEXT: retq
1468 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1469 ; AVX2-NEXT: retq
1477 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1478 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1479 ; AVX1-NEXT
1480 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1481 ; AVX1-NEXT: retq
1485 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1486 ; AVX2-NEXT: retq
1494 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1495 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1496 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1497 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1498 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1499 ; AVX1-NEXT: retq
1503 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1504 ; AVX2-NEXT: retq
1512 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1513 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1514 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
1515 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
1516 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1517 ; AVX1-NEXT: retq
1521 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
1522 ; AVX2-NEXT: retq
1530 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1531 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1532 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
1533 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
1534 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1535 ; AVX1-NEXT: retq
1539 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
1540 ; AVX2-NEXT: retq
1548 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1549 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1550 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1551 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1552 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1553 ; AVX1-NEXT: retq
1557 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1558 ; AVX2-NEXT: retq
1566 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
1567 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
1568 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1569 ; AVX1-NEXT: retq
1573 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1574 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
1575 ; AVX2-NEXT: retq
1583 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1584 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1585 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1586 ; AVX1-NEXT: retq
1590 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1591 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1592 ; AVX2-NEXT: retq
1600 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1601 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1602 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
1603 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
1604 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1605 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1606 ; AVX1-NEXT: retq
1610 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1611 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1612 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
1613 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
1614 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1615 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1616 ; AVX2-NEXT: retq
1624 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1625 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1626 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1627 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
1628 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1629 ; AVX1-NEXT: retq
1633 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1634 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1635 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1636 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
1637 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1638 ; AVX2-NEXT: retq
1646 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1647 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1648 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
1649 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1650 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
1651 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1652 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1653 ; AVX1-NEXT: retq
1657 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1658 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1659 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1660 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1661 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1662 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1663 ; AVX2-NEXT: retq
1671 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1672 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1673 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
1674 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1675 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1676 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1677 ; AVX1-NEXT: retq
1681 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1682 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1683 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
1684 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
1685 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1686 ; AVX2-NEXT: retq
1694 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1695 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1696 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1697 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
1698 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1699 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1700 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1701 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1702 ; AVX1-NEXT: retq
1706 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1707 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1708 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1709 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
1710 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1711 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1712 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1713 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1714 ; AVX2-NEXT: retq
1722 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1723 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1724 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1725 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1726 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1727 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1728 ; AVX1-NEXT: retq
1732 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1733 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1734 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1735 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1736 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1737 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1738 ; AVX2-NEXT: retq
1746 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1747 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1748 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1749 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1750 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1751 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1752 ; AVX1-NEXT: retq
1756 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1757 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1758 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1759 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
1760 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
1761 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1762 ; AVX2-NEXT: retq
1770 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1771 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1772 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1773 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1774 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
1775 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1776 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1777 ; AVX1-NEXT: retq
1781 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1782 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1783 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1784 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1785 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
1786 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1787 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1788 ; AVX2-NEXT: retq
1796 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1797 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1798 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
1799 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1800 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1801 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1802 ; AVX1-NEXT: retq
1806 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1807 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1808 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
1809 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1810 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1811 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1812 ; AVX2-NEXT: retq
1820 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1821 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1822 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1823 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
1824 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1825 ; AVX1-NEXT: retq
1829 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1830 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1831 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1832 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
1833 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1834 ; AVX2-NEXT: retq
1842 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1843 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1844 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
1845 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1846 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
1847 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1848 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1849 ; AVX1-NEXT: retq
1853 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1854 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1855 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
1856 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
1857 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
1858 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1859 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1860 ; AVX2-NEXT: retq
1868 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1869 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1870 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1871 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
1872 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1873 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1874 ; AVX1-NEXT: retq
1878 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1879 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
1880 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1881 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
1882 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1883 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1884 ; AVX2-NEXT: retq
1892 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1893 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
1894 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1895 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1896 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1897 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1898 ; AVX1-NEXT: retq
1902 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1903 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
1904 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1905 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1906 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1907 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
1908 ; AVX2-NEXT: retq
1916 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1917 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1918 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
1919 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1920 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
1921 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1922 ; AVX1-NEXT: retq
1926 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1927 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1928 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
1929 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1930 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
1931 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1932 ; AVX2-NEXT: retq
1940 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1941 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1942 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
1943 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1944 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
1945 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1946 ; AVX1-NEXT: retq
1950 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1951 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1952 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
1953 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1954 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
1955 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1956 ; AVX2-NEXT: retq
1964 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1965 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1966 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
1967 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1968 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
1969 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1970 ; AVX1-NEXT: retq
1974 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1975 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1976 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
1977 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1978 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
1979 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1980 ; AVX2-NEXT: retq
1988 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1989 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1990 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
1991 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1992 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
1993 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1994 ; AVX1-NEXT: retq
1998 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1999 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
2000 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
2001 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2002 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
2003 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2004 ; AVX2-NEXT: retq
2012 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2013 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2014 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
2015 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2016 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
2017 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2018 ; AVX1-NEXT: retq
2022 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2023 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2024 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
2025 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2026 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
2027 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2028 ; AVX2-NEXT: retq
2036 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2037 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
2038 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2039 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2040 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2041 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2042 ; AVX1-NEXT: retq
2046 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2047 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
2048 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2049 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2050 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2051 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2052 ; AVX2-NEXT: retq
2060 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2061 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
2062 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2063 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2064 NEXT: vpshufb %xmm2, %xmm0, %xmm0
2065 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2066 ; AVX1-NEXT: retq
2070 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2071 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
2072 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2073 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2074 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2075 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2076 ; AVX2-NEXT: retq
2084 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2085 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
2086 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2087 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2088 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2089 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2090 ; AVX1-NEXT: retq
2094 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2095 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
2096 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2097 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2098 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2099 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2100 ; AVX2-NEXT: retq
2108 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2109 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
2110 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2111 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
2112 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2113 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2114 ; AVX1-NEXT: retq
2118 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2119 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
2120 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2121 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
2122 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2123 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2124 ; AVX2-NEXT: retq
2132 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2133 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2134 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
2135 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2136 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
2137 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2138 ; AVX1-NEXT: retq
2142 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2143 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2144 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
2145 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2146 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
2147 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2148 ; AVX2-NEXT: retq
2156 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2157 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2158 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
2159 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2160 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
2161 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2162 ; AVX1-NEXT: retq
2166 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2167 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2168 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
2169 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2170 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
2171 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2172 ; AVX2-NEXT: retq
2180 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2181 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2182 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2183 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2184 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2185 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2186 ; AVX1-NEXT: retq
2190 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2191 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2192 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2193 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2194 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2195 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2196 ; AVX2-NEXT: retq
2204 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2205 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
2206 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
2207 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2208 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
2209 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2210 ; AVX1-NEXT: retq
2214 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2215 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
2216 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
2217 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2218 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
2219 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2220 ; AVX2-NEXT: retq
2228 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2229 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2230 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
2231 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2232 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2233 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
2234 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2235 ; AVX1-NEXT: retq
2239 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2240 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2241 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
2242 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2243 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2244 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
2245 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2246 ; AVX2-NEXT: retq
2254 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2255 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2256 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
2257 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2258 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
2259 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2260 ; AVX1-NEXT: retq
2264 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2265 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2266 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
2267 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2268 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
2269 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2270 ; AVX2-NEXT: retq
2278 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2279 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2280 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2281 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2282 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2283 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2284 ; AVX1-NEXT: retq
2288 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2289 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2290 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2291 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2292 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2293 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2294 ; AVX2-NEXT: retq
2302 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2303 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2304 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2305 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2306 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2307 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2308 ; AVX1-NEXT: retq
2312 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2313 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2314 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
2315 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2316 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2317 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2318 ; AVX2-NEXT: retq
2326 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2327 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
2328 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2329 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2330 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2331 ; AVX1-NEXT: retq
2335 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
2336 ; AVX2-NEXT: retq
2344 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2345 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
2346 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2347 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
2348 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2349 ; AVX1-NEXT: retq
2353 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2354 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2
2355 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2356 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
2357 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2358 ; AVX2-NEXT: retq
2366 ; AVX1-NEXT
2367 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
2368 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2369 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2370 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2371 ; AVX1-NEXT: retq
2375 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
2376 ; AVX2-NEXT: retq
2384 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
2385 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2386 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
2387 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2388 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2389 ; AVX1-NEXT: retq
2393 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
2394 ; AVX2-NEXT: retq
2402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2403 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
2404 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2405 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2406 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2407 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2408 ; AVX1-NEXT: retq
2412 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2413 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
2414 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2415 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2416 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2417 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2418 ; AVX2-NEXT: retq
2426 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2427 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
2428 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2429 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
2430 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2431 ; AVX1-NEXT: retq
2435 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2436 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
2437 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2438 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
2439 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2440 ; AVX2-NEXT: retq
2448 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2449 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
2450 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2451 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2452 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2453 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2454 ; AVX1-NEXT: retq
2458 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2459 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
2460 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2461 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
2462 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2463 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2464 ; AVX2-NEXT: retq
2472 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2473 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2474 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2475 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2476 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2477 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2478 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2479 ; AVX1-NEXT: retq
2483 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2484 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2485 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2486 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
2487 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
2488 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
2489 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
2490 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2491 ; AVX2-NEXT: retq
2499 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2500 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2501 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
2502 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
2503 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2504 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2505 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2506 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2507 ; AVX1-NEXT: retq
2511 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2512 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15]
2513 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2
2514 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2515 ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
2516 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
2517 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
2518 ; AVX2-NEXT: retq
2526 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2527 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2528 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2529 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2530 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2531 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2532 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2533 ; AVX1-NEXT: retq
2537 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2538 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2539 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2540 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
2541 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
2542 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
2543 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
2544 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2545 ; AVX2-NEXT: retq
2553 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2554 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2555 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2556 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2557 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2558 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2559 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2560 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2561 ; AVX1-NEXT: retq
2565 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2566 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2567 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2568 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7]
2569 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2570 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2571 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2572 ; AVX2-NEXT: retq
2580 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2581 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
2582 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2583 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
2584 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2585 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
2586 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
2587 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
2588 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
2589 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
2590 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2591 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2592 ; AVX1-NEXT: retq
2596 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2597 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
2598 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
2599 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
2600 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7]
2601 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2602 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2603 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
2604 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
2605 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2606 ; AVX2-NEXT: retq
2614 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2615 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
2616 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2617 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
2618 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2619 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2620 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
2621 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
2622 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2623 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2624 ; AVX1-NEXT: retq
2628 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2629 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3]
2630 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4
2631 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2632 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2633 ; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
2634 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
2635 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
2636 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2637 ; AVX2-NEXT: retq
2645 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2646 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2647 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
2648 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
2649 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2650 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2651 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2652 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
2653 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
2654 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2655 ; AVX1-NEXT: retq
2659 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2660 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2661 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
2662 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
2663 ; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
2664 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2665 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
2666 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2667 ; AVX2-NEXT: retq
2675 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2676 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2677 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2678 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2679 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2680 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2681 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2682 ; AVX1-NEXT: retq
2686 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2687 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2688 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2689 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
2690 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
2691 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2692 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
2693 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2694 ; AVX2-NEXT: retq
2702 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2703 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2704 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2705 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2706 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2707 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2708 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2709 ; AVX1-NEXT: retq
2713 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2714 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2715 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2716 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
2717 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
2718 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2719 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
2720 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2721 ; AVX2-NEXT: retq
2729 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2730 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7]
2731 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2732 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
2733 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
2734 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
2735 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2736 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
2737 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
2738 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2739 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2740 ; AVX1-NEXT: retq
2744 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2745 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2746 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
2747 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
2748 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2749 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
2750 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
2751 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2752 ; AVX2-NEXT: retq
2760 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
2761 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
2762 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
2763 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2764 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2765 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
2766 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
2767 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
2768 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2769 ; AVX1-NEXT: retq
2773 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
2774 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
2775 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
2776 ; AVX2-NEXT: retq
2784 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2785 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2786 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2787 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
2788 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2789 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
2790 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2791 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2792 ; AVX1-NEXT: retq
2796 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2797 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
2798 ; AVX2-NEXT: retq
2806 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
2807 ; AVX1-NEXT: retq
2811 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
2812 ; AVX2-NEXT: retq
2820 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2821 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2822 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
2823 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
2824 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
2825 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2826 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2827 ; AVX1-NEXT: retq
2831 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
2832 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
2833 ; AVX2-NEXT: retq
2841 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2842 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2843 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2844 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
2845 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
2846 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2847 ; AVX1-NEXT: retq
2851 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
2852 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
2853 ; AVX2-NEXT: retq
2861 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2862 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2863 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2864 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
2865 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2866 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
2867 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2868 ; AVX1-NEXT: retq
2872 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
2873 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
2874 ; AVX2-NEXT: retq
2882 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2883 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2884 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
2885 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
2886 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2887 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2888 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
2889 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2890 ; AVX1-NEXT: retq
2894 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2895 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2896 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2897 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
2898 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2899 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2900 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2901 ; AVX2-NEXT: retq
2909 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2910 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2911 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
2912 ; AVX1-NEXT
2913 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
2914 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2915 ; AVX1-NEXT: retq
2919 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
2920 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
2921 ; AVX2-NEXT: retq
2929 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2930 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
2931 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2932 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
2933 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
2934 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
2935 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
2936 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
2937 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2938 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2939 ; AVX1-NEXT: retq
2943 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
2944 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
2945 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
2946 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
2947 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
2948 ; AVX2-NEXT: retq
2956 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2957 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
2958 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2959 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2960 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
2961 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
2962 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2963 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
2964 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
2965 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2966 ; AVX1-NEXT: retq
2970 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
2971 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2972 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2973 ; AVX2-NEXT: retq
2981 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2982 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2983 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
2984 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
2985 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
2986 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2987 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
2988 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2989 ; AVX1-NEXT: retq
2993 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
2994 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2995 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
2996 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2997 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2998 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2999 ; AVX2-NEXT: retq
3007 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3008 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3009 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
3010 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
3011 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3012 ; AVX1-NEXT: retq
3016 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
3017 ; AVX2-NEXT: retq
3025 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3026 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
3027 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3028 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3029 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3030 ; AVX1-NEXT: retq
3034 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3035 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
3036 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3037 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3038 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3039 ; AVX2-NEXT: retq
3047 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3048 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3049 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3050 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3051 ; AVX1-NEXT: retq
3055 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
3056 ; AVX2-NEXT: retq
3064 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
3065 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3066 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
3067 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3068 ; AVX1-NEXT: retq
3072 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
3073 ; AVX2-NEXT: retq
3081 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3082 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3083 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3084 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3085 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
3086 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
3087 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
3088 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3089 ; AVX1-NEXT: retq
3093 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
3094 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3095 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3096 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3097 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3098 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3099 ; AVX2-NEXT: retq
3107 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3108 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3109 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3110 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
3111 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3112 ; AVX1-NEXT: retq
3116 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
3117 ; AVX2-NEXT: retq
3125 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3126 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3127 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3128 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3129 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3130 ; AVX1-NEXT: retq
3134 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3135 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3136 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3137 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3138 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3139 ; AVX2-NEXT: retq
3147 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3148 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3149 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3150 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3151 ; AVX1-NEXT: retq
3155 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
3156 ; AVX2-NEXT: retq
3164 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
3165 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3166 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
3167 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3168 ; AVX1-NEXT: retq
3172 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
3173 ; AVX2-NEXT: retq
3181 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3182 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3183 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3184 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3185 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
3186 ; AVX1-NEXT
3187 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
3188 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3189 ; AVX1-NEXT: retq
3193 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
3194 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3195 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3196 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3197 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3198 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3199 ; AVX2-NEXT: retq
3207 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3208 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3209 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3210 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
3211 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3212 ; AVX1-NEXT: retq
3216 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
3217 ; AVX2-NEXT: retq
3225 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3226 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3227 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
3228 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
3229 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
3230 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3231 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7]
3232 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3233 ; AVX1-NEXT: retq
3237 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
3238 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3239 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
3240 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3241 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3242 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3243 ; AVX2-NEXT: retq
3251 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3252 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3253 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
3254 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
3255 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3256 ; AVX1-NEXT: retq
3260 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
3261 ; AVX2-NEXT: retq
3269 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3270 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3271 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3272 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
3273 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3274 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3275 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
3276 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3277 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3278 ; AVX1-NEXT: retq
3282 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
3283 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
3284 ; AVX2-NEXT: retq
3292 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3293 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3294 ; AVX1-NEXT: retq
3298 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3299 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
3300 ; AVX2-NEXT: retq
3308 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3309 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3310 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3311 ; AVX1-NEXT: retq
3315 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3316 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3317 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
3318 ; AVX2-NEXT: retq
3326 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3327 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3328 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3329 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3330 ; AVX1-NEXT: retq
3334 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3335 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
3336 ; AVX2-NEXT: retq
3344 ; ALL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3345 ; ALL-NEXT: retq
3353 ; ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
3354 ; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3355 ; ALL-NEXT: retq
3363 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3364 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3365 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3366 ; AVX1-NEXT: retq
3370 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3371 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3372 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3373 ; AVX2-NEXT: retq
3381 ; ALL-NEXT: movzwl (%rdi), %eax
3382 ; ALL-NEXT: vmovd %eax, %xmm0
3383 ; ALL-NEXT: retq
3392 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
3393 ; AVX1-NEXT: retq
3397 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3398 ; AVX2-NEXT: retq
3408 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3409 ; ALL-NEXT: retq
3422 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
3423 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
3424 ; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
3425 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5,6,7]
3426 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
3427 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm5 = xmm4[0,1,2,3,5,5,6,7]
3428 ; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm0[2,3,2,3,4,5,6,7,8,9,8,9,0,1,2,3]
3429 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6,7]
3430 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3],xmm5[4,5,6],xmm2[7]
3431 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
3432 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
3433 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
3434 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,4,5,4,5,10,11,4,5,14,15,12,13,0,1]
3435 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7]
3436 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3437 ; AVX1-NEXT: retq
3441 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
3442 ; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23]
3443 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17]
3444 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0>
3445 ; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
3446 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
3447 ; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
3448 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
3449 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,6,7,8,9,10,11,13,13,14,15]
3450 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1,2],ymm2[3],ymm0[4],ymm2[5,6,7,8],ymm0[9,10],ymm2[11],ymm0[12],ymm2[13,14,15]
3451 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
3452 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
3453 ; AVX2-NEXT: retq
3461 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3462 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3463 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3464 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3465 ; AVX1-NEXT: retq
3469 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
3470 ; AVX2-NEXT: retq
3481 ; AVX1-NEXT: movswl (%rdi), %eax
3482 ; AVX1-NEXT: vmovd %eax, %xmm0
3483 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
3484 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3485 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3486 ; AVX1-NEXT: retq
3490 ; AVX2-NEXT: movswl (%rdi), %eax
3491 ; AVX2-NEXT: vmovd %eax, %xmm0
3492 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
3493 ; AVX2-NEXT: retq
3505 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3506 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
3507 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
3508 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3509 ; AVX1-NEXT: retq
3513 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
3514 ; AVX2-NEXT: retq
3525 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3526 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
3527 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3528 ; AVX1-NEXT: retq
3532 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
3533 ; AVX2-NEXT: retq