Lines Matching full:next
14 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
15 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
16 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
17 ; SSE2-NEXT: retq
21 ; SSSE3-NEXT: pxor %xmm1, %xmm1
22 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
23 ; SSSE3-NEXT: retq
27 ; SSE41-NEXT: pxor %xmm1, %xmm1
28 ; SSE41-NEXT: pshufb %xmm1, %xmm0
29 ; SSE41-NEXT: retq
33 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
35 ; AVX1-NEXT: retq
39 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
40 ; AVX2-NEXT: retq
48 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
49 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
50 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
51 ; SSE2-NEXT: retq
55 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
56 ; SSSE3-NEXT: retq
60 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
61 ; SSE41-NEXT: retq
65 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
66 ; AVX-NEXT: retq
74 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
75 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
76 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
77 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
78 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
79 ; SSE2-NEXT: retq
83 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
84 ; SSSE3-NEXT: retq
88 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
89 ; SSE41-NEXT: retq
93 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
94 ; AVX-NEXT: retq
102 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
103 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
104 ; SSE-NEXT: retq
108 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
109 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
110 ; AVX-NEXT: retq
118 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
119 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
120 ; SSE-NEXT: retq
124 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
125 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
126 ; AVX-NEXT: retq
134 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
135 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
136 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
137 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
138 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
139 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
140 ; SSE2-NEXT: retq
144 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
145 ; SSSE3-NEXT: retq
149 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
150 ; SSE41-NEXT: retq
154 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
155 ; AVX-NEXT: retq
163 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
164 ; SSE-NEXT: retq
168 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
169 ; AVX-NEXT: retq
177 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
178 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
179 ; SSE-NEXT: retq
183 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
184 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
185 ; AVX1-NEXT: retq
189 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
190 ; AVX2-NEXT: retq
198 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
199 ; SSE-NEXT: retq
203 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
204 ; AVX-NEXT: retq
212 ; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
213 ; SSE-NEXT: retq
217 ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
218 ; AVX-NEXT: retq
226 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
227 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
228 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
229 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
230 ; SSE2-NEXT: pand %xmm2, %xmm1
231 ; SSE2-NEXT: pandn %xmm0, %xmm2
232 ; SSE2-NEXT: por %xmm1, %xmm2
233 ; SSE2-NEXT: movdqa %xmm2, %xmm0
234 ; SSE2-NEXT: retq
238 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
239 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
240 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
241 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
242 ; SSSE3-NEXT: retq
246 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
247 ; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
248 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
249 ; SSE41-NEXT: movdqa %xmm1, %xmm0
250 ; SSE41-NEXT: retq
254 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
255 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
256 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
257 ; AVX1-NEXT: retq
261 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
262 ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
263 ; AVX2-NEXT: retq
271 ; SSE2-NEXT: pxor %xmm1, %xmm1
272 ; SSE2-NEXT: movdqa %xmm0, %xmm2
273 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
274 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
275 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
276 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
277 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
278 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
279 ; SSE2-NEXT: packuswb %xmm2, %xmm0
280 ; SSE2-NEXT: retq
284 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
285 ; SSSE3-NEXT: retq
289 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
290 ; SSE41-NEXT: retq
294 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
295 ; AVX-NEXT: retq
303 ; SSE2-NEXT: pxor %xmm2, %xmm2
304 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
305 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
306 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
307 ; SSE2-NEXT
308 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
309 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
310 ; SSE2-NEXT: packuswb %xmm1, %xmm0
311 ; SSE2-NEXT: retq
315 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
316 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
317 ; SSSE3-NEXT: retq
321 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
322 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
323 ; SSE41-NEXT: retq
327 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
328 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
329 ; AVX-NEXT: retq
337 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
338 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
339 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
340 ; SSE2-NEXT: pxor %xmm1, %xmm1
341 ; SSE2-NEXT: movdqa %xmm0, %xmm2
342 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
343 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7]
344 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
345 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
346 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
347 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
348 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
349 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
350 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
351 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
352 ; SSE2-NEXT: packuswb %xmm3, %xmm0
353 ; SSE2-NEXT: retq
357 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
358 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
359 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
360 ; SSSE3-NEXT: retq
364 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
365 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
366 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
367 ; SSE41-NEXT: retq
371 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
372 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
373 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
374 ; AVX-NEXT: retq
382 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
383 ; SSE2-NEXT: andps %xmm2, %xmm0
384 ; SSE2-NEXT: andnps %xmm1, %xmm2
385 ; SSE2-NEXT: orps %xmm2, %xmm0
386 ; SSE2-NEXT: retq
390 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
391 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
392 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
393 ; SSSE3-NEXT: retq
397 ; SSE41-NEXT: movdqa %xmm0, %xmm2
398 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
399 ; SSE41-NEXT: pblendvb %xmm2, %xmm1
400 ; SSE41-NEXT: movdqa %xmm1, %xmm0
401 ; SSE41-NEXT: retq
405 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
406 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
407 ; AVX-NEXT: retq
415 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
416 ; SSE2-NEXT: andps %xmm2, %xmm0
417 ; SSE2-NEXT: andnps %xmm1, %xmm2
418 ; SSE2-NEXT: orps %xmm2, %xmm0
419 ; SSE2-NEXT: retq
423 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15]
424 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero
425 ; SSSE3-NEXT: por %xmm1, %xmm0
426 ; SSSE3-NEXT: retq
430 ; SSE41-NEXT: movdqa %xmm0, %xmm2
431 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
432 ; SSE41-NEXT: pblendvb %xmm2, %xmm1
433 ; SSE41-NEXT: movdqa %xmm1, %xmm0
434 ; SSE41-NEXT: retq
438 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
439 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
440 ; AVX-NEXT: retq
448 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0
449 ; SSE-NEXT: retq
453 ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
454 ; AVX-NEXT: retq
462 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
463 ; SSE2-NEXT: andps %xmm2, %xmm0
464 ; SSE2-NEXT: andnps %xmm1, %xmm2
465 ; SSE2-NEXT: orps %xmm2, %xmm0
466 ; SSE2-NEXT: retq
470 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15]
471 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero
472 ; SSSE3-NEXT: por %xmm1, %xmm0
473 ; SSSE3-NEXT: retq
477 ; SSE41-NEXT: movdqa %xmm0, %xmm2
478 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
479 ; SSE41-NEXT: pblendvb %xmm2, %xmm1
480 ; SSE41-NEXT: movdqa %xmm1, %xmm0
481 ; SSE41-NEXT: retq
485 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
486 ; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
487 ; AVX-NEXT: retq
495 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
496 ; SSE2-NEXT: andps %xmm2, %xmm1
497 ; SSE2-NEXT: andnps %xmm0, %xmm2
498 ; SSE2-NEXT: orps %xmm1, %xmm2
499 ; SSE2-NEXT: movaps %xmm2, %xmm0
500 ; SSE2-NEXT: retq
504 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15]
505 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero
506 ; SSSE3-NEXT: por %xmm1, %xmm0
507 ; SSSE3-NEXT: retq
511 ; SSE41-NEXT: movdqa %xmm0, %xmm2
512 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
513 ; SSE41-NEXT: pblendvb %xmm1, %xmm2
514 ; SSE41-NEXT: movdqa %xmm2, %xmm0
515 ; SSE41-NEXT: retq
519 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
520 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
521 ; AVX-NEXT: retq
529 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
530 ; SSE2-NEXT: packuswb %xmm0, %xmm0
531 ; SSE2-NEXT: packuswb %xmm0, %xmm0
532 ; SSE2-NEXT: retq
536 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
537 ; SSSE3-NEXT: retq
541 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
542 ; SSE41-NEXT: retq
546 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
547 ; AVX-NEXT: retq
578 ; ALL-NEXT: retq
600 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
601 ; SSE2-NEXT: packuswb %xmm0, %xmm0
602 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
603 ; SSE2-NEXT: retq
607 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
608 ; SSSE3-NEXT: retq
612 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
613 ; SSE41-NEXT: retq
617 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
618 ; AVX-NEXT: retq
626 ; SSE-NEXT: movzbl %dil, %eax
627 ; SSE-NEXT: movd %eax, %xmm0
628 ; SSE-NEXT: retq
632 ; AVX-NEXT: movzbl %dil, %eax
633 ; AVX-NEXT: vmovd %eax, %xmm0
634 ; AVX-NEXT: retq
643 ; SSE2-NEXT: shll $8, %edi
644 ; SSE2-NEXT: pxor %xmm0, %xmm0
645 ; SSE2-NEXT: pinsrw $2, %edi, %xmm0
646 ; SSE2-NEXT: retq
650 ; SSSE3-NEXT: shll $8, %edi
651 ; SSSE3-NEXT: pxor %xmm0, %xmm0
652 ; SSSE3-NEXT: pinsrw $2, %edi, %xmm0
653 ; SSSE3-NEXT: retq
657 ; SSE41-NEXT: pxor %xmm0, %xmm0
658 ; SSE41-NEXT: pinsrb $5, %edi, %xmm0
659 ; SSE41-NEXT: retq
663 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
664 ; AVX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
665 ; AVX-NEXT: retq
674 ; SSE2-NEXT: shll $8, %edi
675 ; SSE2-NEXT: pxor %xmm0, %xmm0
676 ; SSE2-NEXT: pinsrw $7, %edi, %xmm0
677 ; SSE2-NEXT: retq
681 ; SSSE3-NEXT: shll $8, %edi
682 ; SSSE3-NEXT: pxor %xmm0, %xmm0
683 ; SSSE3-NEXT: pinsrw $7, %edi, %xmm0
684 ; SSSE3-NEXT: retq
688 ; SSE41-NEXT: pxor %xmm0, %xmm0
689 ; SSE41-NEXT: pinsrb $15, %edi, %xmm0
690 ; SSE41-NEXT: retq
694 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
695 ; AVX-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0
696 ; AVX-NEXT: retq
705 ; SSE2-NEXT: movzbl %dil, %eax
706 ; SSE2-NEXT: pxor %xmm0, %xmm0
707 ; SSE2-NEXT: pinsrw $1, %eax, %xmm0
708 ; SSE2-NEXT: retq
712 ; SSSE3-NEXT: movzbl %dil, %eax
713 ; SSSE3-NEXT: pxor %xmm0, %xmm0
714 ; SSSE3-NEXT: pinsrw $1, %eax, %xmm0
715 ; SSSE3-NEXT: retq
719 ; SSE41-NEXT: pxor %xmm0, %xmm0
720 ; SSE41-NEXT: pinsrb $2, %edi, %xmm0
721 ; SSE41-NEXT: retq
725 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
726 ; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0
727 ; AVX-NEXT: retq
736 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
737 ; SSE-NEXT: retq
741 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
742 ; AVX-NEXT: retq
750 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
751 ; SSE-NEXT: retq
755 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
756 ; AVX-NEXT: retq
764 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
765 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
766 ; SSE2-NEXT: por %xmm1, %xmm0
767 ; SSE2-NEXT: retq
771 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
772 ; SSSE3-NEXT: retq
776 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
777 ; SSE41-NEXT: retq
781 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
782 ; AVX-NEXT: retq
790 ; SSE2-NEXT: movdqa %xmm0, %xmm1
791 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
792 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
793 ; SSE2-NEXT: por %xmm1, %xmm0
794 ; SSE2-NEXT: retq
798 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
799 ; SSSE3-NEXT: retq
803 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
804 ; SSE41-NEXT: retq
808 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
809 ; AVX-NEXT: retq
817 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
818 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
819 ; SSE2-NEXT: por %xmm1, %xmm0
820 ; SSE2-NEXT: retq
824 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
825 ; SSSE3-NEXT: retq
829 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
830 ; SSE41-NEXT: retq
834 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
835 ; AVX-NEXT: retq
843 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
844 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
845 ; SSE2-NEXT: por %xmm1, %xmm0
846 ; SSE2-NEXT: retq
850 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
851 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
852 ; SSSE3-NEXT: retq
856 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
857 ; SSE41-NEXT: movdqa %xmm1, %xmm0
858 ; SSE41-NEXT: retq
862 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
863 ; AVX-NEXT: retq
871 ; SSE2-NEXT: movdqa %xmm0, %xmm1
872 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
873 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
874 ; SSE2-NEXT: por %xmm1, %xmm0
875 ; SSE2-NEXT: retq
879 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
880 ; SSSE3-NEXT: retq
884 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
885 ; SSE41-NEXT: retq
889 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
890 ; AVX-NEXT: retq
898 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
899 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
900 ; SSE2-NEXT: por %xmm1, %xmm0
901 ; SSE2-NEXT: retq
905 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
906 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
907 ; SSSE3-NEXT: retq
911 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
912 ; SSE41-NEXT: movdqa %xmm1, %xmm0
913 ; SSE41-NEXT: retq
917 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
918 ; AVX-NEXT: retq
926 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
927 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
928 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
929 ; SSE2-NEXT: retq
933 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
934 ; SSSE3-NEXT: retq
938 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
939 ; SSE41-NEXT: retq
943 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
944 ; AVX-NEXT: retq
952 ; SSE2-NEXT: pxor %xmm1, %xmm1
953 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
954 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
955 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
956 ; SSE2-NEXT: retq
960 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
961 ; SSSE3-NEXT: retq
965 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
966 ; SSE41-NEXT: retq
970 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
971 ; AVX-NEXT: retq
979 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
980 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
981 ; SSE2-NEXT: retq
985 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
986 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
987 ; SSSE3-NEXT: retq
991 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
992 ; SSE41-NEXT: retq
996 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
997 ; AVX-NEXT: retq
1005 ; SSE2-NEXT: pxor %xmm1, %xmm1
1006 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1007 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1008 ; SSE2-NEXT: retq
1012 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1013 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1014 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1015 ; SSSE3-NEXT: retq
1019 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1020 ; SSE41-NEXT: retq
1024 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1025 ; AVX-NEXT: retq
1033 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1034 ; SSE2-NEXT: retq
1038 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1039 ; SSSE3-NEXT: retq
1043 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1044 ; SSE41-NEXT: retq
1048 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1049 ; AVX-NEXT: retq
1057 ; SSE2-NEXT: pxor %xmm1, %xmm1
1058 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1059 ; SSE2-NEXT: retq
1063 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1064 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1065 ; SSSE3-NEXT: retq
1069 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1070 ; SSE41-NEXT: retq
1074 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1075 ; AVX-NEXT: retq
1083 ; SSE2-NEXT: pxor %xmm2, %xmm2
1084 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1085 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
1086 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,3,0,1]
1087 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,2,2,4,5,6,7]
1088 ; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,7,7]
1089 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535]
1090 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1091 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,3]
1092 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,3,1,3,4,5,6,7]
1093 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,4]
1094 ; SSE2-NEXT: pand %xmm5, %xmm2
1095 ; SSE2-NEXT: pandn %xmm4, %xmm5
1096 ; SSE2-NEXT: por %xmm2, %xmm5
1097 ; SSE2-NEXT: psrlq $16, %xmm3
1098 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
1099 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,1,3]
1100 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1101 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4]
1102 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
1103 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1104 ; SSE2-NEXT: packuswb %xmm5, %xmm2
1105 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
1106 ; SSE2-NEXT: pand %xmm0, %xmm2
1107 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,3,1,1,4,5,6,7]
1108 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1109 ; SSE2-NEXT: pandn %xmm1, %xmm0
1110 ; SSE2-NEXT: por %xmm2, %xmm0
1111 ; SSE2-NEXT: retq
1115 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1116 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1117 ; SSSE3-NEXT: por %xmm1, %xmm0
1118 ; SSSE3-NEXT: retq
1122 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1123 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1124 ; SSE41-NEXT: por %xmm1, %xmm0
1125 ; SSE41-NEXT: retq
1129 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
1130 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
1131 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
1132 ; AVX-NEXT: retq
1154 ; SSE-NEXT: xorps %xmm0, %xmm0
1155 ; SSE-NEXT: movaps %xmm0, (%rdi)
1156 ; SSE-NEXT: movaps %xmm0, (%rsi)
1157 ; SSE-NEXT: retq
1161 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1162 ; AVX-NEXT: vmovaps %xmm0, (%rdi)
1163 ; AVX-NEXT: vmovaps %xmm0, (%rsi)
1164 ; AVX-NEXT: retq
1181 ; SSE-NEXT: psllw $8, %xmm0
1182 ; SSE-NEXT: retq
1186 ; AVX-NEXT: vpsllw $8, %xmm0, %xmm0
1187 ; AVX-NEXT: retq
1195 ; SSE-NEXT: pslld $24, %xmm0
1196 ; SSE-NEXT: retq
1200 ; AVX-NEXT: vpslld $24, %xmm0, %xmm0
1201 ; AVX-NEXT: retq
1209 ; SSE-NEXT: psllq $56, %xmm0
1210 ; SSE-NEXT: retq
1214 ; AVX-NEXT: vpsllq $56, %xmm0, %xmm0
1215 ; AVX-NEXT: retq
1223 ; SSE-NEXT: psllq $8, %xmm0
1224 ; SSE-NEXT: retq
1228 ; AVX-NEXT: vpsllq $8, %xmm0, %xmm0
1229 ; AVX-NEXT: retq
1237 ; SSE-NEXT: psrlw $8, %xmm0
1238 ; SSE-NEXT: retq
1242 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
1243 ; AVX-NEXT: retq
1251 ; SSE-NEXT: psrld $16, %xmm0
1252 ; SSE-NEXT: retq
1256 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
1257 ; AVX-NEXT: retq
1265 ; SSE-NEXT: psrlq $56, %xmm0
1266 ; SSE-NEXT: retq
1270 ; AVX-NEXT: vpsrlq $56, %xmm0, %xmm0
1271 ; AVX-NEXT: retq
1279 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1280 ; SSE2-NEXT: pand %xmm2, %xmm1
1281 ; SSE2-NEXT: pand %xmm2, %xmm0
1282 ; SSE2-NEXT: packuswb %xmm1, %xmm0
1283 ; SSE2-NEXT: retq
1287 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1288 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
1289 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
1290 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1291 ; SSSE3-NEXT: retq
1295 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1296 ; SSE41-NEXT: pshufb %xmm2, %xmm1
1297 ; SSE41-NEXT: pshufb %xmm2, %xmm0
1298 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1299 ; SSE41-NEXT: retq
1303 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1304 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1305 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1306 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1307 ; AVX-NEXT: retq
1316 ; SSE-NEXT: psrld $8, %xmm0
1317 ; SSE-NEXT: retq
1321 ; AVX-NEXT: vpsrld $8, %xmm0, %xmm0
1322 ; AVX-NEXT: retq
1330 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1331 ; SSE-NEXT: retq
1335 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1336 ; AVX-NEXT: retq
1349 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1350 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1351 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1352 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1353 ; SSE2-NEXT: retq
1357 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1358 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1359 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
1360 ; SSSE3-NEXT: retq
1364 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1365 ; SSE41-NEXT: pxor %xmm1, %xmm1
1366 ; SSE41-NEXT: pshufb %xmm1, %xmm0
1367 ; SSE41-NEXT: retq
1371 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1372 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1373 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1374 ; AVX1-NEXT: retq
1378 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
1379 ; AVX2-NEXT: retq
1390 ; SSE2-NEXT: movsbl (%rdi), %eax
1391 ; SSE2-NEXT: movd %eax, %xmm0
1392 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1393 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1394 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1395 ; SSE2-NEXT: retq
1399 ; SSSE3-NEXT: movsbl (%rdi), %eax
1400 ; SSSE3-NEXT: movd %eax, %xmm0
1401 ; SSSE3-NEXT: pxor %xmm1, %xmm1
1402 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
1403 ; SSSE3-NEXT: retq
1407 ; SSE41-NEXT: movsbl (%rdi), %eax
1408 ; SSE41-NEXT: movd %eax, %xmm0
1409 ; SSE41-NEXT: pxor %xmm1, %xmm1
1410 ; SSE41-NEXT: pshufb %xmm1, %xmm0
1411 ; SSE41-NEXT: retq
1415 ; AVX1-NEXT: movsbl (%rdi), %eax
1416 ; AVX1-NEXT: vmovd %eax, %xmm0
1417 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1418 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1419 ; AVX1-NEXT: retq
1423 ; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
1424 ; AVX2-NEXT: retq
1436 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1437 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1438 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1439 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1440 ; SSE2-NEXT: retq
1444 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1445 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1446 ; SSSE3-NEXT: retq
1450 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1451 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1452 ; SSE41-NEXT: retq
1456 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1457 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1458 ; AVX1-NEXT: retq
1462 ; AVX2-NEXT: vpbroadcastb 1(%rdi), %xmm0
1463 ; AVX2-NEXT: retq
1474 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1475 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1476 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
1477 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1478 ; SSE2-NEXT: retq
1482 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1483 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1484 ; SSSE3-NEXT: retq
1488 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1489 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1490 ; SSE41-NEXT: retq
1494 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1495 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1496 ; AVX1-NEXT: retq
1500 ; AVX2-NEXT: vpbroadcastb 2(%rdi), %xmm0
1501 ; AVX2-NEXT: retq
1512 ; SSE2-NEXT: movsbl (%rdi), %eax
1513 ; SSE2-NEXT: movd %eax, %xmm0
1514 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1515 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
1516 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1517 ; SSE2-NEXT: retq
1521 ; SSSE3-NEXT: movsbl (%rdi), %eax
1522 ; SSSE3-NEXT: movd %eax, %xmm0
1523 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1524 ; SSSE3-NEXT: retq
1528 ; SSE41-NEXT: movsbl (%rdi), %eax
1529 ; SSE41-NEXT: movd %eax, %xmm0
1530 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1531 ; SSE41-NEXT: retq
1535 ; AVX1-NEXT: movsbl (%rdi), %eax
1536 ; AVX1-NEXT: vmovd %eax, %xmm0
1537 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1538 ; AVX1-NEXT: retq
1542 ; AVX2-NEXT: movsbl (%rdi), %eax
1543 ; AVX2-NEXT: shrl $8, %eax
1544 ; AVX2-NEXT: vmovd %eax, %xmm0
1545 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
1546 ; AVX2-NEXT: retq
1558 ; SSE2-NEXT: movsbl (%rdi), %eax
1559 ; SSE2-NEXT: movd %eax, %xmm0
1560 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1561 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
1562 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1563 ; SSE2-NEXT: retq
1567 ; SSSE3-NEXT: movsbl (%rdi), %eax
1568 ; SSSE3-NEXT: movd %eax, %xmm0
1569 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1570 ; SSSE3-NEXT: retq
1574 ; SSE41-NEXT: movsbl (%rdi), %eax
1575 ; SSE41-NEXT: movd %eax, %xmm0
1576 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1577 ; SSE41-NEXT: retq
1581 ; AVX1-NEXT: movsbl (%rdi), %eax
1582 ; AVX1-NEXT: vmovd %eax, %xmm0
1583 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1584 ; AVX1-NEXT: retq
1588 ; AVX2-NEXT: movsbl (%rdi), %eax
1589 ; AVX2-NEXT: shrl $16, %eax
1590 ; AVX2-NEXT: vmovd %eax, %xmm0
1591 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
1592 ; AVX2-NEXT: retq