Home | History | Annotate | Download | only in X86

Lines Matching full:ymm0

11 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
16 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
29 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
36 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
47 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
54 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
65 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
72 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
83 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
90 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
101 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
108 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
119 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
126 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
137 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
144 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
153 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
157 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
162 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
164 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
166 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
175 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
179 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
184 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
186 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
187 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
196 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
200 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
205 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
206 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
207 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
216 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
220 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
225 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
226 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
227 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
240 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
245 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
246 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
247 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
256 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
260 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
265 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
266 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
267 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
276 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
280 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
285 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
286 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
287 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
296 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
300 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
305 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
306 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
307 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
316 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
320 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
325 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
334 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
338 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
343 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
354 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
357 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
362 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
363 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
374 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
377 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
382 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
383 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
394 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
397 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
402 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
403 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
414 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
417 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
422 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
423 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
433 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
439 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
449 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
455 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
465 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
471 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
481 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
487 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
497 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
503 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
513 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
519 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
529 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
535 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
545 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
548 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
553 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
562 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
566 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
571 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
580 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
585 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
594 ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
599 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
609 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
611 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
617 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
627 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
628 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
634 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
643 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
647 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
653 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
666 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
672 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
681 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
686 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
697 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
703 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
704 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
716 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
719 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
725 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
726 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
735 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
743 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
749 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
760 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
768 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
773 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
774 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
775 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
784 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
791 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
797 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
798 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
799 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
808 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
812 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
817 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
826 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
830 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
835 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
844 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
848 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
853 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
862 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
866 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
871 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
880 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
884 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
889 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
898 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
902 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
907 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
916 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
920 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
925 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
935 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
938 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
943 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
953 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
956 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
961 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
971 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
974 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
980 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
981 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
991 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
994 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1000 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
1001 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1011 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1013 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1018 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
1028 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1030 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1035 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
1045 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1047 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1052 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
1062 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1064 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1069 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
1079 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1081 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1086 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
1096 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1098 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1103 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
1113 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1115 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1120 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
1131 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1133 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1138 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
1148 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1151 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1156 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
1166 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1168 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1173 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
1183 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1185 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1190 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
1201 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1203 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1208 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
1219 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1222 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1227 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
1240 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1245 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1246 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1247 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1256 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1261 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1266 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1267 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1268 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1277 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1283 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1288 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1289 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1290 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1304 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1309 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1310 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
1311 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
1322 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1329 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1339 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1341 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1346 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
1356 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1358 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1363 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
1377 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1384 ; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
1394 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1396 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1401 ; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
1411 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1413 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1418 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
1428 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1429 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1434 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1446 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1451 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1463 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1468 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0
1478 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1481 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1486 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1495 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1499 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1504 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
1514 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1517 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1522 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
1531 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1535 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1540 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1551 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1556 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1557 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
1568 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1573 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1574 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1583 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1588 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1593 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1598 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1607 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1611 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1616 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1620 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1629 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1635 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1640 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1641 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1645 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1654 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1658 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1663 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1667 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1676 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1683 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1688 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1695 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1704 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1709 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1714 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1719 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1733 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1738 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1743 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1752 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1758 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1763 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1769 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1783 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1788 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1793 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1802 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1806 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1811 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1815 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1824 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1830 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1835 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1841 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1850 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1855 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1860 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1865 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1874 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1879 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1884 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1889 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
1898 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1903 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1908 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1913 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1922 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1927 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1932 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1937 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1946 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1951 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1956 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1961 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1970 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1975 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1980 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1985 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1994 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1999 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2004 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2009 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2018 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2023 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2028 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2033 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2042 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2047 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2052 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2057 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2066 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2071 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2076 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2081 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2090 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2095 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2100 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2105 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2114 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2119 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2124 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2129 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2138 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2143 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2148 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2153 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2162 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2167 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2172 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2177 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2186 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2191 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2196 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2201 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2210 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2216 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2221 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2227 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2241 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2246 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2251 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2265 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2270 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2275 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2284 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2289 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2294 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2299 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2312 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2317 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
2326 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2330 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2335 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2339 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2348 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2352 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2357 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
2366 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2371 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2376 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2381 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2390 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2394 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2399 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2403 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2412 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2417 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2422 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2427 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2437 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2442 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2453 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
2454 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2463 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2470 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2475 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2478 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2481 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
2491 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2496 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2507 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
2508 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2518 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2524 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2529 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2530 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2535 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2546 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2555 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2567 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
2568 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
2569 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2580 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2587 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2598 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
2599 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
2600 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2614 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2618 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2629 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
2630 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2639 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2645 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2650 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2655 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2656 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
2657 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2666 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2672 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
2677 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2682 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2683 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
2684 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
2693 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2703 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2708 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2709 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2715 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2732 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2737 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
2738 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
2739 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
2749 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2755 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2760 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2761 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
2770 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
2775 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
2784 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2796 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
2806 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2810 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2815 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
2816 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
2825 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2831 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2837 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
2846 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2853 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2858 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2859 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2864 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2874 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2878 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2883 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
2884 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
2895 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2902 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2909 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
2910 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
2911 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
2923 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
2929 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2935 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
2936 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2946 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2952 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2957 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
2958 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2962 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2972 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2975 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2980 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
2989 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2993 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2998 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3002 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3012 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3014 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3019 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
3029 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3031 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3036 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
3046 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3052 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3057 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
3058 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3062 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3072 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3075 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3080 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
3089 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3093 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3098 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3102 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3114 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3119 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
3129 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3131 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3136 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
3145 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3152 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3157 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
3158 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3162 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3171 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3175 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3180 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
3189 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3196 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3201 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
3202 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3206 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3219 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3224 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
3233 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3241 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3246 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
3247 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
3257 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3263 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
3272 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3274 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3279 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3281 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
3290 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3296 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3317 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
3322 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3333 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3349 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3354 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
3369 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3376 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
3391 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3396 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
3410 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
3415 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0