1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 3 4 target triple = "x86_64-unknown-unknown" 5 6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) { 7 ; AVX1-LABEL: shuffle_v8f32_00000000: 8 ; AVX1: # BB#0: 9 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 11 ; AVX1-NEXT: retq 12 ; 13 ; AVX2-LABEL: shuffle_v8f32_00000000: 14 ; AVX2: # BB#0: 15 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 16 ; AVX2-NEXT: retq 17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 18 ret <8 x float> %shuffle 19 } 20 21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) { 22 ; AVX1-LABEL: shuffle_v8f32_00000010: 23 ; AVX1: # BB#0: 24 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 26 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 27 ; AVX1-NEXT: retq 28 ; 29 ; AVX2-LABEL: shuffle_v8f32_00000010: 30 ; AVX2: # BB#0: 31 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 32 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 33 ; AVX2-NEXT: retq 34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 35 ret <8 x float> %shuffle 36 } 37 38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) { 39 ; AVX1-LABEL: shuffle_v8f32_00000200: 40 ; AVX1: # BB#0: 41 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 43 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 44 ; AVX1-NEXT: retq 45 ; 46 ; AVX2-LABEL: shuffle_v8f32_00000200: 47 ; AVX2: # BB#0: 48 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 49 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 50 ; AVX2-NEXT: retq 51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 52 ret <8 x float> %shuffle 53 } 54 55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) { 56 ; AVX1-LABEL: shuffle_v8f32_00003000: 57 ; AVX1: # BB#0: 58 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 60 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 61 ; AVX1-NEXT: retq 62 ; 63 ; AVX2-LABEL: shuffle_v8f32_00003000: 64 ; AVX2: # BB#0: 65 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 66 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 67 ; AVX2-NEXT: retq 68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 69 ret <8 x float> %shuffle 70 } 71 72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) { 73 ; AVX1-LABEL: shuffle_v8f32_00040000: 74 ; AVX1: # BB#0: 75 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 76 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 78 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 79 ; AVX1-NEXT: retq 80 ; 81 ; AVX2-LABEL: shuffle_v8f32_00040000: 82 ; AVX2: # BB#0: 83 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 84 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 85 ; AVX2-NEXT: retq 86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 87 ret <8 x float> %shuffle 88 } 89 90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) { 91 ; AVX1-LABEL: shuffle_v8f32_00500000: 92 ; AVX1: # BB#0: 93 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 94 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 95 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 96 ; AVX1-NEXT: retq 97 ; 98 ; AVX2-LABEL: shuffle_v8f32_00500000: 99 ; AVX2: # BB#0: 100 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 101 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 102 ; AVX2-NEXT: retq 103 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 104 ret <8 x float> %shuffle 105 } 106 107 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) { 108 ; AVX1-LABEL: shuffle_v8f32_06000000: 109 ; AVX1: # BB#0: 110 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 111 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 112 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 113 ; AVX1-NEXT: retq 114 ; 115 ; AVX2-LABEL: shuffle_v8f32_06000000: 116 ; AVX2: # BB#0: 117 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 118 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 119 ; AVX2-NEXT: retq 120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 121 ret <8 x float> %shuffle 122 } 123 124 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { 125 ; AVX1-LABEL: shuffle_v8f32_70000000: 126 ; AVX1: # BB#0: 127 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 128 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 129 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 130 ; AVX1-NEXT: retq 131 ; 132 ; AVX2-LABEL: shuffle_v8f32_70000000: 133 ; AVX2: # BB#0: 134 ; AVX2-NEXT: movl $7, %eax 135 ; AVX2-NEXT: vmovd %eax, %xmm1 136 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 137 ; AVX2-NEXT: retq 138 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 139 ret <8 x float> %shuffle 140 } 141 142 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { 143 ; ALL-LABEL: shuffle_v8f32_01014545: 144 ; ALL: # BB#0: 145 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 146 ; ALL-NEXT: retq 147 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 148 ret <8 x float> %shuffle 149 } 150 151 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { 152 ; AVX1-LABEL: shuffle_v8f32_00112233: 153 ; AVX1: # BB#0: 154 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1] 155 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] 156 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 157 ; AVX1-NEXT: retq 158 ; 159 ; AVX2-LABEL: shuffle_v8f32_00112233: 160 ; AVX2: # BB#0: 161 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 162 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 163 ; AVX2-NEXT: retq 164 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 165 ret <8 x float> %shuffle 166 } 167 168 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) { 169 ; AVX1-LABEL: shuffle_v8f32_00001111: 170 ; AVX1: # BB#0: 171 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 172 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 173 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 174 ; AVX1-NEXT: retq 175 ; 176 ; AVX2-LABEL: shuffle_v8f32_00001111: 177 ; AVX2: # BB#0: 178 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 179 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 180 ; AVX2-NEXT: retq 181 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 182 ret <8 x float> %shuffle 183 } 184 185 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { 186 ; ALL-LABEL: shuffle_v8f32_81a3c5e7: 187 ; ALL: # BB#0: 188 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 189 ; ALL-NEXT: retq 190 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 191 ret <8 x float> %shuffle 192 } 193 194 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { 195 ; AVX1-LABEL: shuffle_v8f32_08080808: 196 ; AVX1: # BB#0: 197 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 198 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 199 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 200 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 201 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 202 ; AVX1-NEXT: retq 203 ; 204 ; AVX2-LABEL: shuffle_v8f32_08080808: 205 ; AVX2: # BB#0: 206 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 207 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 208 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 209 ; AVX2-NEXT: retq 210 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 211 ret <8 x float> %shuffle 212 } 213 214 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) { 215 ; ALL-LABEL: shuffle_v8f32_08084c4c: 216 ; ALL: # BB#0: 217 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 218 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 219 ; ALL-NEXT: retq 220 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 221 ret <8 x float> %shuffle 222 } 223 224 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) { 225 ; ALL-LABEL: shuffle_v8f32_8823cc67: 226 ; ALL: # BB#0: 227 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 228 ; ALL-NEXT: retq 229 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 230 ret <8 x float> %shuffle 231 } 232 233 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) { 234 ; ALL-LABEL: shuffle_v8f32_9832dc76: 235 ; ALL: # BB#0: 236 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 237 ; ALL-NEXT: retq 238 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 239 ret <8 x float> %shuffle 240 } 241 242 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) { 243 ; ALL-LABEL: shuffle_v8f32_9810dc54: 244 ; ALL: # BB#0: 245 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 246 ; ALL-NEXT: retq 247 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 248 ret <8 x float> %shuffle 249 } 250 251 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) { 252 ; ALL-LABEL: shuffle_v8f32_08194c5d: 253 ; ALL: # BB#0: 254 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 255 ; ALL-NEXT: retq 256 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 257 ret <8 x float> %shuffle 258 } 259 260 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) { 261 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f: 262 ; ALL: # BB#0: 263 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 264 ; ALL-NEXT: retq 265 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 266 ret <8 x float> %shuffle 267 } 268 269 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) { 270 ; AVX1-LABEL: shuffle_v8f32_08192a3b: 271 ; AVX1: # BB#0: 272 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 273 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 274 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 275 ; AVX1-NEXT: retq 276 ; 277 ; AVX2-LABEL: shuffle_v8f32_08192a3b: 278 ; AVX2: # BB#0: 279 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 280 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 281 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 282 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 283 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 284 ; AVX2-NEXT: retq 285 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 286 ret <8 x float> %shuffle 287 } 288 289 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) { 290 ; AVX1-LABEL: shuffle_v8f32_08991abb: 291 ; AVX1: # BB#0: 292 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 293 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 294 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 295 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 296 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 297 ; AVX1-NEXT: retq 298 ; 299 ; AVX2-LABEL: shuffle_v8f32_08991abb: 300 ; AVX2: # BB#0: 301 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 302 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 303 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 304 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 305 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 306 ; AVX2-NEXT: retq 307 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 308 ret <8 x float> %shuffle 309 } 310 311 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { 312 ; AVX1-LABEL: shuffle_v8f32_091b2d3f: 313 ; AVX1: # BB#0: 314 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 315 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 316 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 317 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 318 ; AVX1-NEXT: retq 319 ; 320 ; AVX2-LABEL: shuffle_v8f32_091b2d3f: 321 ; AVX2: # BB#0: 322 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 323 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 324 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 325 ; AVX2-NEXT: retq 326 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 327 ret <8 x float> %shuffle 328 } 329 330 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { 331 ; AVX1-LABEL: shuffle_v8f32_09ab1def: 332 ; AVX1: # BB#0: 333 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 334 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 335 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 336 ; AVX1-NEXT: retq 337 ; 338 ; AVX2-LABEL: shuffle_v8f32_09ab1def: 339 ; AVX2: # BB#0: 340 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 341 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 342 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 343 ; AVX2-NEXT: retq 344 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 345 ret <8 x float> %shuffle 346 } 347 348 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) { 349 ; ALL-LABEL: shuffle_v8f32_00014445: 350 ; ALL: # BB#0: 351 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 352 ; ALL-NEXT: retq 353 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 354 ret <8 x float> %shuffle 355 } 356 357 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) { 358 ; ALL-LABEL: shuffle_v8f32_00204464: 359 ; ALL: # BB#0: 360 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 361 ; ALL-NEXT: retq 362 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 363 ret <8 x float> %shuffle 364 } 365 366 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) { 367 ; ALL-LABEL: shuffle_v8f32_03004744: 368 ; ALL: # BB#0: 369 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 370 ; ALL-NEXT: retq 371 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 372 ret <8 x float> %shuffle 373 } 374 375 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) { 376 ; ALL-LABEL: shuffle_v8f32_10005444: 377 ; ALL: # BB#0: 378 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 379 ; ALL-NEXT: retq 380 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 381 ret <8 x float> %shuffle 382 } 383 384 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) { 385 ; ALL-LABEL: shuffle_v8f32_22006644: 386 ; ALL: # BB#0: 387 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 388 ; ALL-NEXT: retq 389 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 390 ret <8 x float> %shuffle 391 } 392 393 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) { 394 ; ALL-LABEL: shuffle_v8f32_33307774: 395 ; ALL: # BB#0: 396 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 397 ; ALL-NEXT: retq 398 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 399 ret <8 x float> %shuffle 400 } 401 402 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) { 403 ; ALL-LABEL: shuffle_v8f32_32107654: 404 ; ALL: # BB#0: 405 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 406 ; ALL-NEXT: retq 407 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 408 ret <8 x float> %shuffle 409 } 410 411 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { 412 ; ALL-LABEL: shuffle_v8f32_00234467: 413 ; ALL: # BB#0: 414 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 415 ; ALL-NEXT: retq 416 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 417 ret <8 x float> %shuffle 418 } 419 420 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 421 ; ALL-LABEL: shuffle_v8f32_00224466: 422 ; ALL: # BB#0: 423 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 424 ; ALL-NEXT: retq 425 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 426 ret <8 x float> %shuffle 427 } 428 429 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { 430 ; ALL-LABEL: shuffle_v8f32_10325476: 431 ; ALL: # BB#0: 432 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 433 ; ALL-NEXT: retq 434 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 435 ret <8 x float> %shuffle 436 } 437 438 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 439 ; ALL-LABEL: shuffle_v8f32_11335577: 440 ; ALL: # BB#0: 441 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 442 ; ALL-NEXT: retq 443 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 444 ret <8 x float> %shuffle 445 } 446 447 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { 448 ; ALL-LABEL: shuffle_v8f32_10235467: 449 ; ALL: # BB#0: 450 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 451 ; ALL-NEXT: retq 452 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 453 ret <8 x float> %shuffle 454 } 455 456 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { 457 ; ALL-LABEL: shuffle_v8f32_10225466: 458 ; ALL: # BB#0: 459 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 460 ; ALL-NEXT: retq 461 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 462 ret <8 x float> %shuffle 463 } 464 465 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { 466 ; ALL-LABEL: shuffle_v8f32_00015444: 467 ; ALL: # BB#0: 468 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 469 ; ALL-NEXT: retq 470 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 471 ret <8 x float> %shuffle 472 } 473 474 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { 475 ; ALL-LABEL: shuffle_v8f32_00204644: 476 ; ALL: # BB#0: 477 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 478 ; ALL-NEXT: retq 479 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 480 ret <8 x float> %shuffle 481 } 482 483 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { 484 ; ALL-LABEL: shuffle_v8f32_03004474: 485 ; ALL: # BB#0: 486 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 487 ; ALL-NEXT: retq 488 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 489 ret <8 x float> %shuffle 490 } 491 492 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { 493 ; ALL-LABEL: shuffle_v8f32_10004444: 494 ; ALL: # BB#0: 495 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 496 ; ALL-NEXT: retq 497 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 498 ret <8 x float> %shuffle 499 } 500 501 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { 502 ; ALL-LABEL: shuffle_v8f32_22006446: 503 ; ALL: # BB#0: 504 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 505 ; ALL-NEXT: retq 506 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 507 ret <8 x float> %shuffle 508 } 509 510 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { 511 ; ALL-LABEL: shuffle_v8f32_33307474: 512 ; ALL: # BB#0: 513 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 514 ; ALL-NEXT: retq 515 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 516 ret <8 x float> %shuffle 517 } 518 519 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { 520 ; ALL-LABEL: shuffle_v8f32_32104567: 521 ; ALL: # BB#0: 522 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 523 ; ALL-NEXT: retq 524 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 525 ret <8 x float> %shuffle 526 } 527 528 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { 529 ; ALL-LABEL: shuffle_v8f32_00236744: 530 ; ALL: # BB#0: 531 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 532 ; ALL-NEXT: retq 533 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 534 ret <8 x float> %shuffle 535 } 536 537 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { 538 ; ALL-LABEL: shuffle_v8f32_00226644: 539 ; ALL: # BB#0: 540 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 541 ; ALL-NEXT: retq 542 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 543 ret <8 x float> %shuffle 544 } 545 546 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { 547 ; ALL-LABEL: shuffle_v8f32_10324567: 548 ; ALL: # BB#0: 549 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 550 ; ALL-NEXT: retq 551 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 552 ret <8 x float> %shuffle 553 } 554 555 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { 556 ; ALL-LABEL: shuffle_v8f32_11334567: 557 ; ALL: # BB#0: 558 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 559 ; ALL-NEXT: retq 560 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 561 ret <8 x float> %shuffle 562 } 563 564 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { 565 ; ALL-LABEL: shuffle_v8f32_01235467: 566 ; ALL: # BB#0: 567 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 568 ; ALL-NEXT: retq 569 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 570 ret <8 x float> %shuffle 571 } 572 573 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { 574 ; ALL-LABEL: shuffle_v8f32_01235466: 575 ; ALL: # BB#0: 576 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 577 ; ALL-NEXT: retq 578 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 579 ret <8 x float> %shuffle 580 } 581 582 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { 583 ; ALL-LABEL: shuffle_v8f32_002u6u44: 584 ; ALL: # BB#0: 585 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 586 ; ALL-NEXT: retq 587 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 588 ret <8 x float> %shuffle 589 } 590 591 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { 592 ; ALL-LABEL: shuffle_v8f32_00uu66uu: 593 ; ALL: # BB#0: 594 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 595 ; ALL-NEXT: retq 596 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 597 ret <8 x float> %shuffle 598 } 599 600 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { 601 ; ALL-LABEL: shuffle_v8f32_103245uu: 602 ; ALL: # BB#0: 603 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 604 ; ALL-NEXT: retq 605 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 606 ret <8 x float> %shuffle 607 } 608 609 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { 610 ; ALL-LABEL: shuffle_v8f32_1133uu67: 611 ; ALL: # BB#0: 612 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 613 ; ALL-NEXT: retq 614 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 615 ret <8 x float> %shuffle 616 } 617 618 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { 619 ; ALL-LABEL: shuffle_v8f32_0uu354uu: 620 ; ALL: # BB#0: 621 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 622 ; ALL-NEXT: retq 623 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 624 ret <8 x float> %shuffle 625 } 626 627 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { 628 ; ALL-LABEL: shuffle_v8f32_uuu3uu66: 629 ; ALL: # BB#0: 630 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 631 ; ALL-NEXT: retq 632 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 633 ret <8 x float> %shuffle 634 } 635 636 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { 637 ; AVX1-LABEL: shuffle_v8f32_c348cda0: 638 ; AVX1: # BB#0: 639 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 640 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] 641 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 642 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 643 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3] 644 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 645 ; AVX1-NEXT: retq 646 ; 647 ; AVX2-LABEL: shuffle_v8f32_c348cda0: 648 ; AVX2: # BB#0: 649 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0> 650 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 651 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 652 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 653 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 654 ; AVX2-NEXT: retq 655 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0> 656 ret <8 x float> %shuffle 657 } 658 659 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { 660 ; AVX1-LABEL: shuffle_v8f32_f511235a: 661 ; AVX1: # BB#0: 662 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 663 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2] 664 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5] 665 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3] 666 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 667 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 668 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 669 ; AVX1-NEXT: retq 670 ; 671 ; AVX2-LABEL: shuffle_v8f32_f511235a: 672 ; AVX2: # BB#0: 673 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2> 674 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 675 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u> 676 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 677 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 678 ; AVX2-NEXT: retq 679 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> 680 ret <8 x float> %shuffle 681 } 682 683 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { 684 ; AVX1-LABEL: shuffle_v8f32_32103210: 685 ; AVX1: # BB#0: 686 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 687 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 688 ; AVX1-NEXT: retq 689 ; 690 ; AVX2-LABEL: shuffle_v8f32_32103210: 691 ; AVX2: # BB#0: 692 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 693 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 694 ; AVX2-NEXT: retq 695 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 696 ret <8 x float> %shuffle 697 } 698 699 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { 700 ; AVX1-LABEL: shuffle_v8f32_76547654: 701 ; AVX1: # BB#0: 702 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 703 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 704 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 705 ; AVX1-NEXT: retq 706 ; 707 ; AVX2-LABEL: shuffle_v8f32_76547654: 708 ; AVX2: # BB#0: 709 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 710 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 711 ; AVX2-NEXT: retq 712 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 713 ret <8 x float> %shuffle 714 } 715 716 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { 717 ; AVX1-LABEL: shuffle_v8f32_76543210: 718 ; AVX1: # BB#0: 719 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 720 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 721 ; AVX1-NEXT: retq 722 ; 723 ; AVX2-LABEL: shuffle_v8f32_76543210: 724 ; AVX2: # BB#0: 725 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 726 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 727 ; AVX2-NEXT: retq 728 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 729 ret <8 x float> %shuffle 730 } 731 732 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { 733 ; ALL-LABEL: shuffle_v8f32_3210ba98: 734 ; ALL: # BB#0: 735 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 736 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 737 ; ALL-NEXT: retq 738 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 739 ret <8 x float> %shuffle 740 } 741 742 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { 743 ; ALL-LABEL: shuffle_v8f32_3210fedc: 744 ; ALL: # BB#0: 745 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 746 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 747 ; ALL-NEXT: retq 748 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 749 ret <8 x float> %shuffle 750 } 751 752 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { 753 ; ALL-LABEL: shuffle_v8f32_7654fedc: 754 ; ALL: # BB#0: 755 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 756 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 757 ; ALL-NEXT: retq 758 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 759 ret <8 x float> %shuffle 760 } 761 762 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { 763 ; ALL-LABEL: shuffle_v8f32_fedc7654: 764 ; ALL: # BB#0: 765 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 766 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 767 ; ALL-NEXT: retq 768 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 769 ret <8 x float> %shuffle 770 } 771 772 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) { 773 ; AVX1-LABEL: PR21138: 774 ; AVX1: # BB#0: 775 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 776 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 777 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 779 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 780 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 781 ; AVX1-NEXT: retq 782 ; 783 ; AVX2-LABEL: PR21138: 784 ; AVX2: # BB#0: 785 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7> 786 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 787 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u> 788 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 789 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 790 ; AVX2-NEXT: retq 791 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 792 ret <8 x float> %shuffle 793 } 794 795 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { 796 ; ALL-LABEL: shuffle_v8f32_ba987654: 797 ; ALL: # BB#0: 798 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 799 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 800 ; ALL-NEXT: retq 801 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 802 ret <8 x float> %shuffle 803 } 804 805 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { 806 ; ALL-LABEL: shuffle_v8f32_ba983210: 807 ; ALL: # BB#0: 808 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 809 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 810 ; ALL-NEXT: retq 811 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 812 ret <8 x float> %shuffle 813 } 814 815 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) { 816 ; ALL-LABEL: shuffle_v8f32_80u1c4u5: 817 ; ALL: # BB#0: 818 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 819 ; ALL-NEXT: retq 820 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5> 821 ret <8 x float> %shuffle 822 } 823 824 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { 825 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7: 826 ; ALL: # BB#0: 827 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 828 ; ALL-NEXT: retq 829 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7> 830 ret <8 x float> %shuffle 831 } 832 833 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { 834 ; AVX1-LABEL: shuffle_v8i32_00000000: 835 ; AVX1: # BB#0: 836 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 837 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 838 ; AVX1-NEXT: retq 839 ; 840 ; AVX2-LABEL: shuffle_v8i32_00000000: 841 ; AVX2: # BB#0: 842 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 843 ; AVX2-NEXT: retq 844 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 845 ret <8 x i32> %shuffle 846 } 847 848 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) { 849 ; AVX1-LABEL: shuffle_v8i32_00000010: 850 ; AVX1: # BB#0: 851 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 852 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 853 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 854 ; AVX1-NEXT: retq 855 ; 856 ; AVX2-LABEL: shuffle_v8i32_00000010: 857 ; AVX2: # BB#0: 858 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 859 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 860 ; AVX2-NEXT: retq 861 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 862 ret <8 x i32> %shuffle 863 } 864 865 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) { 866 ; AVX1-LABEL: shuffle_v8i32_00000200: 867 ; AVX1: # BB#0: 868 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 869 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 870 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 871 ; AVX1-NEXT: retq 872 ; 873 ; AVX2-LABEL: shuffle_v8i32_00000200: 874 ; AVX2: # BB#0: 875 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 876 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 877 ; AVX2-NEXT: retq 878 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 879 ret <8 x i32> %shuffle 880 } 881 882 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) { 883 ; AVX1-LABEL: shuffle_v8i32_00003000: 884 ; AVX1: # BB#0: 885 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 886 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 887 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 888 ; AVX1-NEXT: retq 889 ; 890 ; AVX2-LABEL: shuffle_v8i32_00003000: 891 ; AVX2: # BB#0: 892 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 893 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 894 ; AVX2-NEXT: retq 895 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 896 ret <8 x i32> %shuffle 897 } 898 899 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) { 900 ; AVX1-LABEL: shuffle_v8i32_00040000: 901 ; AVX1: # BB#0: 902 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 903 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 904 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 905 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 906 ; AVX1-NEXT: retq 907 ; 908 ; AVX2-LABEL: shuffle_v8i32_00040000: 909 ; AVX2: # BB#0: 910 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 911 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 912 ; AVX2-NEXT: retq 913 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 914 ret <8 x i32> %shuffle 915 } 916 917 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) { 918 ; AVX1-LABEL: shuffle_v8i32_00500000: 919 ; AVX1: # BB#0: 920 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 921 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 922 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 923 ; AVX1-NEXT: retq 924 ; 925 ; AVX2-LABEL: shuffle_v8i32_00500000: 926 ; AVX2: # BB#0: 927 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 928 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 929 ; AVX2-NEXT: retq 930 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 931 ret <8 x i32> %shuffle 932 } 933 934 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) { 935 ; AVX1-LABEL: shuffle_v8i32_06000000: 936 ; AVX1: # BB#0: 937 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 938 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 939 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 940 ; AVX1-NEXT: retq 941 ; 942 ; AVX2-LABEL: shuffle_v8i32_06000000: 943 ; AVX2: # BB#0: 944 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 945 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 946 ; AVX2-NEXT: retq 947 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 948 ret <8 x i32> %shuffle 949 } 950 951 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { 952 ; AVX1-LABEL: shuffle_v8i32_70000000: 953 ; AVX1: # BB#0: 954 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 955 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 956 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 957 ; AVX1-NEXT: retq 958 ; 959 ; AVX2-LABEL: shuffle_v8i32_70000000: 960 ; AVX2: # BB#0: 961 ; AVX2-NEXT: movl $7, %eax 962 ; AVX2-NEXT: vmovd %eax, %xmm1 963 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 964 ; AVX2-NEXT: retq 965 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 966 ret <8 x i32> %shuffle 967 } 968 969 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { 970 ; AVX1-LABEL: shuffle_v8i32_01014545: 971 ; AVX1: # BB#0: 972 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 973 ; AVX1-NEXT: retq 974 ; 975 ; AVX2-LABEL: shuffle_v8i32_01014545: 976 ; AVX2: # BB#0: 977 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 978 ; AVX2-NEXT: retq 979 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 980 ret <8 x i32> %shuffle 981 } 982 983 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) { 984 ; AVX1-LABEL: shuffle_v8i32_00112233: 985 ; AVX1: # BB#0: 986 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 987 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 988 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 989 ; AVX1-NEXT: retq 990 ; 991 ; AVX2-LABEL: shuffle_v8i32_00112233: 992 ; AVX2: # BB#0: 993 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 994 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 995 ; AVX2-NEXT: retq 996 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 997 ret <8 x i32> %shuffle 998 } 999 1000 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) { 1001 ; AVX1-LABEL: shuffle_v8i32_00001111: 1002 ; AVX1: # BB#0: 1003 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1004 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1005 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1006 ; AVX1-NEXT: retq 1007 ; 1008 ; AVX2-LABEL: shuffle_v8i32_00001111: 1009 ; AVX2: # BB#0: 1010 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 1011 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1012 ; AVX2-NEXT: retq 1013 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1014 ret <8 x i32> %shuffle 1015 } 1016 1017 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { 1018 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7: 1019 ; AVX1: # BB#0: 1020 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1021 ; AVX1-NEXT: retq 1022 ; 1023 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7: 1024 ; AVX2: # BB#0: 1025 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1026 ; AVX2-NEXT: retq 1027 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 1028 ret <8 x i32> %shuffle 1029 } 1030 1031 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { 1032 ; AVX1-LABEL: shuffle_v8i32_08080808: 1033 ; AVX1: # BB#0: 1034 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 1035 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1036 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1037 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1038 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1039 ; AVX1-NEXT: retq 1040 ; 1041 ; AVX2-LABEL: shuffle_v8i32_08080808: 1042 ; AVX2: # BB#0: 1043 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 1044 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 1045 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1046 ; AVX2-NEXT: retq 1047 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 1048 ret <8 x i32> %shuffle 1049 } 1050 1051 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) { 1052 ; AVX1-LABEL: shuffle_v8i32_08084c4c: 1053 ; AVX1: # BB#0: 1054 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 1055 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1056 ; AVX1-NEXT: retq 1057 ; 1058 ; AVX2-LABEL: shuffle_v8i32_08084c4c: 1059 ; AVX2: # BB#0: 1060 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4] 1061 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1062 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1063 ; AVX2-NEXT: retq 1064 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 1065 ret <8 x i32> %shuffle 1066 } 1067 1068 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) { 1069 ; AVX1-LABEL: shuffle_v8i32_8823cc67: 1070 ; AVX1: # BB#0: 1071 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 1072 ; AVX1-NEXT: retq 1073 ; 1074 ; AVX2-LABEL: shuffle_v8i32_8823cc67: 1075 ; AVX2: # BB#0: 1076 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7] 1077 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1078 ; AVX2-NEXT: retq 1079 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 1080 ret <8 x i32> %shuffle 1081 } 1082 1083 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) { 1084 ; AVX1-LABEL: shuffle_v8i32_9832dc76: 1085 ; AVX1: # BB#0: 1086 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 1087 ; AVX1-NEXT: retq 1088 ; 1089 ; AVX2-LABEL: shuffle_v8i32_9832dc76: 1090 ; AVX2: # BB#0: 1091 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1092 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1093 ; AVX2-NEXT: retq 1094 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 1095 ret <8 x i32> %shuffle 1096 } 1097 1098 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { 1099 ; AVX1-LABEL: shuffle_v8i32_9810dc54: 1100 ; AVX1: # BB#0: 1101 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 1102 ; AVX1-NEXT: retq 1103 ; 1104 ; AVX2-LABEL: shuffle_v8i32_9810dc54: 1105 ; AVX2: # BB#0: 1106 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4] 1107 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7] 1108 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1109 ; AVX2-NEXT: retq 1110 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 1111 ret <8 x i32> %shuffle 1112 } 1113 1114 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { 1115 ; AVX1-LABEL: shuffle_v8i32_08194c5d: 1116 ; AVX1: # BB#0: 1117 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1118 ; AVX1-NEXT: retq 1119 ; 1120 ; AVX2-LABEL: shuffle_v8i32_08194c5d: 1121 ; AVX2: # BB#0: 1122 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1123 ; AVX2-NEXT: retq 1124 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1125 ret <8 x i32> %shuffle 1126 } 1127 1128 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { 1129 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f: 1130 ; AVX1: # BB#0: 1131 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1132 ; AVX1-NEXT: retq 1133 ; 1134 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f: 1135 ; AVX2: # BB#0: 1136 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1137 ; AVX2-NEXT: retq 1138 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1139 ret <8 x i32> %shuffle 1140 } 1141 1142 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { 1143 ; AVX1-LABEL: shuffle_v8i32_08192a3b: 1144 ; AVX1: # BB#0: 1145 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1146 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1147 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1148 ; AVX1-NEXT: retq 1149 ; 1150 ; AVX2-LABEL: shuffle_v8i32_08192a3b: 1151 ; AVX2: # BB#0: 1152 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 1153 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1154 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1155 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1156 ; AVX2-NEXT: retq 1157 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1158 ret <8 x i32> %shuffle 1159 } 1160 1161 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { 1162 ; AVX1-LABEL: shuffle_v8i32_08991abb: 1163 ; AVX1: # BB#0: 1164 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 1165 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 1166 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1167 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 1168 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1169 ; AVX1-NEXT: retq 1170 ; 1171 ; AVX2-LABEL: shuffle_v8i32_08991abb: 1172 ; AVX2: # BB#0: 1173 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1174 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1175 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 1176 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1177 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1178 ; AVX2-NEXT: retq 1179 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 1180 ret <8 x i32> %shuffle 1181 } 1182 1183 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { 1184 ; AVX1-LABEL: shuffle_v8i32_091b2d3f: 1185 ; AVX1: # BB#0: 1186 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 1187 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 1188 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1189 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1190 ; AVX1-NEXT: retq 1191 ; 1192 ; AVX2-LABEL: shuffle_v8i32_091b2d3f: 1193 ; AVX2: # BB#0: 1194 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1195 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1196 ; AVX2-NEXT: retq 1197 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1198 ret <8 x i32> %shuffle 1199 } 1200 1201 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { 1202 ; AVX1-LABEL: shuffle_v8i32_09ab1def: 1203 ; AVX1: # BB#0: 1204 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 1205 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1206 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1207 ; AVX1-NEXT: retq 1208 ; 1209 ; AVX2-LABEL: shuffle_v8i32_09ab1def: 1210 ; AVX2: # BB#0: 1211 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1212 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1213 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1214 ; AVX2-NEXT: retq 1215 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1216 ret <8 x i32> %shuffle 1217 } 1218 1219 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) { 1220 ; AVX1-LABEL: shuffle_v8i32_00014445: 1221 ; AVX1: # BB#0: 1222 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1223 ; AVX1-NEXT: retq 1224 ; 1225 ; AVX2-LABEL: shuffle_v8i32_00014445: 1226 ; AVX2: # BB#0: 1227 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1228 ; AVX2-NEXT: retq 1229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 1230 ret <8 x i32> %shuffle 1231 } 1232 1233 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) { 1234 ; AVX1-LABEL: shuffle_v8i32_00204464: 1235 ; AVX1: # BB#0: 1236 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1237 ; AVX1-NEXT: retq 1238 ; 1239 ; AVX2-LABEL: shuffle_v8i32_00204464: 1240 ; AVX2: # BB#0: 1241 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1242 ; AVX2-NEXT: retq 1243 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 1244 ret <8 x i32> %shuffle 1245 } 1246 1247 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) { 1248 ; AVX1-LABEL: shuffle_v8i32_03004744: 1249 ; AVX1: # BB#0: 1250 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1251 ; AVX1-NEXT: retq 1252 ; 1253 ; AVX2-LABEL: shuffle_v8i32_03004744: 1254 ; AVX2: # BB#0: 1255 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1256 ; AVX2-NEXT: retq 1257 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 1258 ret <8 x i32> %shuffle 1259 } 1260 1261 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) { 1262 ; AVX1-LABEL: shuffle_v8i32_10005444: 1263 ; AVX1: # BB#0: 1264 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1265 ; AVX1-NEXT: retq 1266 ; 1267 ; AVX2-LABEL: shuffle_v8i32_10005444: 1268 ; AVX2: # BB#0: 1269 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1270 ; AVX2-NEXT: retq 1271 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 1272 ret <8 x i32> %shuffle 1273 } 1274 1275 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) { 1276 ; AVX1-LABEL: shuffle_v8i32_22006644: 1277 ; AVX1: # BB#0: 1278 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1279 ; AVX1-NEXT: retq 1280 ; 1281 ; AVX2-LABEL: shuffle_v8i32_22006644: 1282 ; AVX2: # BB#0: 1283 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1284 ; AVX2-NEXT: retq 1285 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 1286 ret <8 x i32> %shuffle 1287 } 1288 1289 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) { 1290 ; AVX1-LABEL: shuffle_v8i32_33307774: 1291 ; AVX1: # BB#0: 1292 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1293 ; AVX1-NEXT: retq 1294 ; 1295 ; AVX2-LABEL: shuffle_v8i32_33307774: 1296 ; AVX2: # BB#0: 1297 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1298 ; AVX2-NEXT: retq 1299 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 1300 ret <8 x i32> %shuffle 1301 } 1302 1303 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) { 1304 ; AVX1-LABEL: shuffle_v8i32_32107654: 1305 ; AVX1: # BB#0: 1306 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1307 ; AVX1-NEXT: retq 1308 ; 1309 ; AVX2-LABEL: shuffle_v8i32_32107654: 1310 ; AVX2: # BB#0: 1311 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1312 ; AVX2-NEXT: retq 1313 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1314 ret <8 x i32> %shuffle 1315 } 1316 1317 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { 1318 ; AVX1-LABEL: shuffle_v8i32_00234467: 1319 ; AVX1: # BB#0: 1320 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1321 ; AVX1-NEXT: retq 1322 ; 1323 ; AVX2-LABEL: shuffle_v8i32_00234467: 1324 ; AVX2: # BB#0: 1325 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1326 ; AVX2-NEXT: retq 1327 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 1328 ret <8 x i32> %shuffle 1329 } 1330 1331 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { 1332 ; AVX1-LABEL: shuffle_v8i32_00224466: 1333 ; AVX1: # BB#0: 1334 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1335 ; AVX1-NEXT: retq 1336 ; 1337 ; AVX2-LABEL: shuffle_v8i32_00224466: 1338 ; AVX2: # BB#0: 1339 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1340 ; AVX2-NEXT: retq 1341 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1342 ret <8 x i32> %shuffle 1343 } 1344 1345 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { 1346 ; AVX1-LABEL: shuffle_v8i32_10325476: 1347 ; AVX1: # BB#0: 1348 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1349 ; AVX1-NEXT: retq 1350 ; 1351 ; AVX2-LABEL: shuffle_v8i32_10325476: 1352 ; AVX2: # BB#0: 1353 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1354 ; AVX2-NEXT: retq 1355 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1356 ret <8 x i32> %shuffle 1357 } 1358 1359 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { 1360 ; AVX1-LABEL: shuffle_v8i32_11335577: 1361 ; AVX1: # BB#0: 1362 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1363 ; AVX1-NEXT: retq 1364 ; 1365 ; AVX2-LABEL: shuffle_v8i32_11335577: 1366 ; AVX2: # BB#0: 1367 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1368 ; AVX2-NEXT: retq 1369 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1370 ret <8 x i32> %shuffle 1371 } 1372 1373 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) { 1374 ; AVX1-LABEL: shuffle_v8i32_10235467: 1375 ; AVX1: # BB#0: 1376 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1377 ; AVX1-NEXT: retq 1378 ; 1379 ; AVX2-LABEL: shuffle_v8i32_10235467: 1380 ; AVX2: # BB#0: 1381 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1382 ; AVX2-NEXT: retq 1383 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1384 ret <8 x i32> %shuffle 1385 } 1386 1387 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { 1388 ; AVX1-LABEL: shuffle_v8i32_10225466: 1389 ; AVX1: # BB#0: 1390 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1391 ; AVX1-NEXT: retq 1392 ; 1393 ; AVX2-LABEL: shuffle_v8i32_10225466: 1394 ; AVX2: # BB#0: 1395 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1396 ; AVX2-NEXT: retq 1397 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 1398 ret <8 x i32> %shuffle 1399 } 1400 1401 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { 1402 ; AVX1-LABEL: shuffle_v8i32_00015444: 1403 ; AVX1: # BB#0: 1404 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 1405 ; AVX1-NEXT: retq 1406 ; 1407 ; AVX2-LABEL: shuffle_v8i32_00015444: 1408 ; AVX2: # BB#0: 1409 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] 1410 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1411 ; AVX2-NEXT: retq 1412 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 1413 ret <8 x i32> %shuffle 1414 } 1415 1416 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { 1417 ; AVX1-LABEL: shuffle_v8i32_00204644: 1418 ; AVX1: # BB#0: 1419 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 1420 ; AVX1-NEXT: retq 1421 ; 1422 ; AVX2-LABEL: shuffle_v8i32_00204644: 1423 ; AVX2: # BB#0: 1424 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] 1425 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1426 ; AVX2-NEXT: retq 1427 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 1428 ret <8 x i32> %shuffle 1429 } 1430 1431 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { 1432 ; AVX1-LABEL: shuffle_v8i32_03004474: 1433 ; AVX1: # BB#0: 1434 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 1435 ; AVX1-NEXT: retq 1436 ; 1437 ; AVX2-LABEL: shuffle_v8i32_03004474: 1438 ; AVX2: # BB#0: 1439 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] 1440 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1441 ; AVX2-NEXT: retq 1442 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 1443 ret <8 x i32> %shuffle 1444 } 1445 1446 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { 1447 ; AVX1-LABEL: shuffle_v8i32_10004444: 1448 ; AVX1: # BB#0: 1449 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 1450 ; AVX1-NEXT: retq 1451 ; 1452 ; AVX2-LABEL: shuffle_v8i32_10004444: 1453 ; AVX2: # BB#0: 1454 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] 1455 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1456 ; AVX2-NEXT: retq 1457 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1458 ret <8 x i32> %shuffle 1459 } 1460 1461 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { 1462 ; AVX1-LABEL: shuffle_v8i32_22006446: 1463 ; AVX1: # BB#0: 1464 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 1465 ; AVX1-NEXT: retq 1466 ; 1467 ; AVX2-LABEL: shuffle_v8i32_22006446: 1468 ; AVX2: # BB#0: 1469 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] 1470 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1471 ; AVX2-NEXT: retq 1472 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 1473 ret <8 x i32> %shuffle 1474 } 1475 1476 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { 1477 ; AVX1-LABEL: shuffle_v8i32_33307474: 1478 ; AVX1: # BB#0: 1479 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 1480 ; AVX1-NEXT: retq 1481 ; 1482 ; AVX2-LABEL: shuffle_v8i32_33307474: 1483 ; AVX2: # BB#0: 1484 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] 1485 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1486 ; AVX2-NEXT: retq 1487 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 1488 ret <8 x i32> %shuffle 1489 } 1490 1491 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { 1492 ; AVX1-LABEL: shuffle_v8i32_32104567: 1493 ; AVX1: # BB#0: 1494 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 1495 ; AVX1-NEXT: retq 1496 ; 1497 ; AVX2-LABEL: shuffle_v8i32_32104567: 1498 ; AVX2: # BB#0: 1499 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] 1500 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1501 ; AVX2-NEXT: retq 1502 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 1503 ret <8 x i32> %shuffle 1504 } 1505 1506 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { 1507 ; AVX1-LABEL: shuffle_v8i32_00236744: 1508 ; AVX1: # BB#0: 1509 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 1510 ; AVX1-NEXT: retq 1511 ; 1512 ; AVX2-LABEL: shuffle_v8i32_00236744: 1513 ; AVX2: # BB#0: 1514 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] 1515 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1516 ; AVX2-NEXT: retq 1517 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 1518 ret <8 x i32> %shuffle 1519 } 1520 1521 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { 1522 ; AVX1-LABEL: shuffle_v8i32_00226644: 1523 ; AVX1: # BB#0: 1524 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 1525 ; AVX1-NEXT: retq 1526 ; 1527 ; AVX2-LABEL: shuffle_v8i32_00226644: 1528 ; AVX2: # BB#0: 1529 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] 1530 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1531 ; AVX2-NEXT: retq 1532 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 1533 ret <8 x i32> %shuffle 1534 } 1535 1536 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { 1537 ; AVX1-LABEL: shuffle_v8i32_10324567: 1538 ; AVX1: # BB#0: 1539 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 1540 ; AVX1-NEXT: retq 1541 ; 1542 ; AVX2-LABEL: shuffle_v8i32_10324567: 1543 ; AVX2: # BB#0: 1544 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] 1545 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1546 ; AVX2-NEXT: retq 1547 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1548 ret <8 x i32> %shuffle 1549 } 1550 1551 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { 1552 ; AVX1-LABEL: shuffle_v8i32_11334567: 1553 ; AVX1: # BB#0: 1554 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 1555 ; AVX1-NEXT: retq 1556 ; 1557 ; AVX2-LABEL: shuffle_v8i32_11334567: 1558 ; AVX2: # BB#0: 1559 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] 1560 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1561 ; AVX2-NEXT: retq 1562 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 1563 ret <8 x i32> %shuffle 1564 } 1565 1566 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { 1567 ; AVX1-LABEL: shuffle_v8i32_01235467: 1568 ; AVX1: # BB#0: 1569 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 1570 ; AVX1-NEXT: retq 1571 ; 1572 ; AVX2-LABEL: shuffle_v8i32_01235467: 1573 ; AVX2: # BB#0: 1574 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] 1575 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1576 ; AVX2-NEXT: retq 1577 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1578 ret <8 x i32> %shuffle 1579 } 1580 1581 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { 1582 ; AVX1-LABEL: shuffle_v8i32_01235466: 1583 ; AVX1: # BB#0: 1584 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 1585 ; AVX1-NEXT: retq 1586 ; 1587 ; AVX2-LABEL: shuffle_v8i32_01235466: 1588 ; AVX2: # BB#0: 1589 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] 1590 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1591 ; AVX2-NEXT: retq 1592 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 1593 ret <8 x i32> %shuffle 1594 } 1595 1596 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { 1597 ; AVX1-LABEL: shuffle_v8i32_002u6u44: 1598 ; AVX1: # BB#0: 1599 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 1600 ; AVX1-NEXT: retq 1601 ; 1602 ; AVX2-LABEL: shuffle_v8i32_002u6u44: 1603 ; AVX2: # BB#0: 1604 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> 1605 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1606 ; AVX2-NEXT: retq 1607 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 1608 ret <8 x i32> %shuffle 1609 } 1610 1611 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { 1612 ; AVX1-LABEL: shuffle_v8i32_00uu66uu: 1613 ; AVX1: # BB#0: 1614 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 1615 ; AVX1-NEXT: retq 1616 ; 1617 ; AVX2-LABEL: shuffle_v8i32_00uu66uu: 1618 ; AVX2: # BB#0: 1619 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> 1620 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1621 ; AVX2-NEXT: retq 1622 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 1623 ret <8 x i32> %shuffle 1624 } 1625 1626 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { 1627 ; AVX1-LABEL: shuffle_v8i32_103245uu: 1628 ; AVX1: # BB#0: 1629 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 1630 ; AVX1-NEXT: retq 1631 ; 1632 ; AVX2-LABEL: shuffle_v8i32_103245uu: 1633 ; AVX2: # BB#0: 1634 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> 1635 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1636 ; AVX2-NEXT: retq 1637 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 1638 ret <8 x i32> %shuffle 1639 } 1640 1641 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { 1642 ; AVX1-LABEL: shuffle_v8i32_1133uu67: 1643 ; AVX1: # BB#0: 1644 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 1645 ; AVX1-NEXT: retq 1646 ; 1647 ; AVX2-LABEL: shuffle_v8i32_1133uu67: 1648 ; AVX2: # BB#0: 1649 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> 1650 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1651 ; AVX2-NEXT: retq 1652 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 1653 ret <8 x i32> %shuffle 1654 } 1655 1656 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { 1657 ; AVX1-LABEL: shuffle_v8i32_0uu354uu: 1658 ; AVX1: # BB#0: 1659 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 1660 ; AVX1-NEXT: retq 1661 ; 1662 ; AVX2-LABEL: shuffle_v8i32_0uu354uu: 1663 ; AVX2: # BB#0: 1664 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> 1665 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1666 ; AVX2-NEXT: retq 1667 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 1668 ret <8 x i32> %shuffle 1669 } 1670 1671 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { 1672 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66: 1673 ; AVX1: # BB#0: 1674 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 1675 ; AVX1-NEXT: retq 1676 ; 1677 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66: 1678 ; AVX2: # BB#0: 1679 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6> 1680 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1681 ; AVX2-NEXT: retq 1682 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 1683 ret <8 x i32> %shuffle 1684 } 1685 1686 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { 1687 ; AVX1-LABEL: shuffle_v8i32_6caa87e5: 1688 ; AVX1: # BB#0: 1689 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 1690 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] 1691 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1692 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1693 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1694 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1695 ; AVX1-NEXT: retq 1696 ; 1697 ; AVX2-LABEL: shuffle_v8i32_6caa87e5: 1698 ; AVX2: # BB#0: 1699 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u> 1700 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1701 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2] 1702 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1703 ; AVX2-NEXT: retq 1704 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5> 1705 ret <8 x i32> %shuffle 1706 } 1707 1708 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { 1709 ; AVX1-LABEL: shuffle_v8i32_32103210: 1710 ; AVX1: # BB#0: 1711 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1712 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1713 ; AVX1-NEXT: retq 1714 ; 1715 ; AVX2-LABEL: shuffle_v8i32_32103210: 1716 ; AVX2: # BB#0: 1717 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 1718 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1719 ; AVX2-NEXT: retq 1720 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 1721 ret <8 x i32> %shuffle 1722 } 1723 1724 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { 1725 ; AVX1-LABEL: shuffle_v8i32_76547654: 1726 ; AVX1: # BB#0: 1727 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1728 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1729 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1730 ; AVX1-NEXT: retq 1731 ; 1732 ; AVX2-LABEL: shuffle_v8i32_76547654: 1733 ; AVX2: # BB#0: 1734 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1735 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1736 ; AVX2-NEXT: retq 1737 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 1738 ret <8 x i32> %shuffle 1739 } 1740 1741 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { 1742 ; AVX1-LABEL: shuffle_v8i32_76543210: 1743 ; AVX1: # BB#0: 1744 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1745 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1746 ; AVX1-NEXT: retq 1747 ; 1748 ; AVX2-LABEL: shuffle_v8i32_76543210: 1749 ; AVX2: # BB#0: 1750 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1751 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1752 ; AVX2-NEXT: retq 1753 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1754 ret <8 x i32> %shuffle 1755 } 1756 1757 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { 1758 ; AVX1-LABEL: shuffle_v8i32_3210ba98: 1759 ; AVX1: # BB#0: 1760 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1761 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1762 ; AVX1-NEXT: retq 1763 ; 1764 ; AVX2-LABEL: shuffle_v8i32_3210ba98: 1765 ; AVX2: # BB#0: 1766 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1767 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1768 ; AVX2-NEXT: retq 1769 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 1770 ret <8 x i32> %shuffle 1771 } 1772 1773 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { 1774 ; AVX1-LABEL: shuffle_v8i32_3210fedc: 1775 ; AVX1: # BB#0: 1776 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1777 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1778 ; AVX1-NEXT: retq 1779 ; 1780 ; AVX2-LABEL: shuffle_v8i32_3210fedc: 1781 ; AVX2: # BB#0: 1782 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1783 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1784 ; AVX2-NEXT: retq 1785 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 1786 ret <8 x i32> %shuffle 1787 } 1788 1789 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { 1790 ; AVX1-LABEL: shuffle_v8i32_7654fedc: 1791 ; AVX1: # BB#0: 1792 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1793 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1794 ; AVX1-NEXT: retq 1795 ; 1796 ; AVX2-LABEL: shuffle_v8i32_7654fedc: 1797 ; AVX2: # BB#0: 1798 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1799 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1800 ; AVX2-NEXT: retq 1801 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 1802 ret <8 x i32> %shuffle 1803 } 1804 1805 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { 1806 ; AVX1-LABEL: shuffle_v8i32_fedc7654: 1807 ; AVX1: # BB#0: 1808 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1809 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1810 ; AVX1-NEXT: retq 1811 ; 1812 ; AVX2-LABEL: shuffle_v8i32_fedc7654: 1813 ; AVX2: # BB#0: 1814 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1815 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1816 ; AVX2-NEXT: retq 1817 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 1818 ret <8 x i32> %shuffle 1819 } 1820 1821 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { 1822 ; AVX1-LABEL: shuffle_v8i32_ba987654: 1823 ; AVX1: # BB#0: 1824 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1825 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1826 ; AVX1-NEXT: retq 1827 ; 1828 ; AVX2-LABEL: shuffle_v8i32_ba987654: 1829 ; AVX2: # BB#0: 1830 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1831 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1832 ; AVX2-NEXT: retq 1833 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1834 ret <8 x i32> %shuffle 1835 } 1836 1837 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { 1838 ; AVX1-LABEL: shuffle_v8i32_ba983210: 1839 ; AVX1: # BB#0: 1840 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1841 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1842 ; AVX1-NEXT: retq 1843 ; 1844 ; AVX2-LABEL: shuffle_v8i32_ba983210: 1845 ; AVX2: # BB#0: 1846 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1847 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1848 ; AVX2-NEXT: retq 1849 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1850 ret <8 x i32> %shuffle 1851 } 1852 1853 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { 1854 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: 1855 ; AVX1: # BB#0: 1856 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1857 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] 1858 ; AVX1-NEXT: retq 1859 ; 1860 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc: 1861 ; AVX2: # BB#0: 1862 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19] 1863 ; AVX2-NEXT: retq 1864 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12> 1865 ret <8 x i32> %shuffle 1866 } 1867 1868 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { 1869 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz: 1870 ; AVX1: # BB#0: 1871 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1872 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] 1873 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 1874 ; AVX1-NEXT: retq 1875 ; 1876 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz: 1877 ; AVX2: # BB#0: 1878 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero 1879 ; AVX2-NEXT: retq 1880 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0> 1881 ret <8 x i32> %shuffle 1882 } 1883 1884 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { 1885 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu: 1886 ; AVX1: # BB#0: 1887 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1888 ; AVX1-NEXT: retq 1889 ; 1890 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu: 1891 ; AVX2: # BB#0: 1892 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1893 ; AVX2-NEXT: retq 1894 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef> 1895 ret <8 x i32> %shuffle 1896 } 1897 1898 define <8 x float> @splat_mem_v8f32_2(float* %p) { 1899 ; ALL-LABEL: splat_mem_v8f32_2: 1900 ; ALL: # BB#0: 1901 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0 1902 ; ALL-NEXT: retq 1903 %1 = load float, float* %p 1904 %2 = insertelement <4 x float> undef, float %1, i32 0 1905 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer 1906 ret <8 x float> %3 1907 } 1908 1909 define <8 x float> @splat_v8f32(<4 x float> %r) { 1910 ; AVX1-LABEL: splat_v8f32: 1911 ; AVX1: # BB#0: 1912 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1913 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1914 ; AVX1-NEXT: retq 1915 ; 1916 ; AVX2-LABEL: splat_v8f32: 1917 ; AVX2: # BB#0: 1918 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 1919 ; AVX2-NEXT: retq 1920 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer 1921 ret <8 x float> %1 1922 } 1923 1924 ; 1925 ; Shuffle to logical bit shifts 1926 ; 1927 1928 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { 1929 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: 1930 ; AVX1: # BB#0: 1931 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1932 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 1933 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 1934 ; AVX1-NEXT: retq 1935 ; 1936 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6: 1937 ; AVX2: # BB#0: 1938 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 1939 ; AVX2-NEXT: retq 1940 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6> 1941 ret <8 x i32> %shuffle 1942 } 1943 1944 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { 1945 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: 1946 ; AVX1: # BB#0: 1947 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1948 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 1949 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1950 ; AVX1-NEXT: retq 1951 ; 1952 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU: 1953 ; AVX2: # BB#0: 1954 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 1955 ; AVX2-NEXT: retq 1956 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef> 1957 ret <8 x i32> %shuffle 1958 } 1959 1960 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) { 1961 ; AVX1-LABEL: shuffle_v8i32_B012F456: 1962 ; AVX1: # BB#0: 1963 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4] 1964 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6] 1965 ; AVX1-NEXT: retq 1966 ; 1967 ; AVX2-LABEL: shuffle_v8i32_B012F456: 1968 ; AVX2: # BB#0: 1969 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27] 1970 ; AVX2-NEXT: retq 1971 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 1972 ret <8 x i32> %shuffle 1973 } 1974 1975 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) { 1976 ; AVX1-LABEL: shuffle_v8i32_1238567C: 1977 ; AVX1: # BB#0: 1978 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 1979 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 1980 ; AVX1-NEXT: retq 1981 ; 1982 ; AVX2-LABEL: shuffle_v8i32_1238567C: 1983 ; AVX2: # BB#0: 1984 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19] 1985 ; AVX2-NEXT: retq 1986 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12> 1987 ret <8 x i32> %shuffle 1988 } 1989 1990 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) { 1991 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4: 1992 ; AVX1: # BB#0: 1993 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4] 1994 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4] 1995 ; AVX1-NEXT: retq 1996 ; 1997 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4: 1998 ; AVX2: # BB#0: 1999 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19] 2000 ; AVX2-NEXT: retq 2001 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4> 2002 ret <8 x i32> %shuffle 2003 } 2004 2005 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) { 2006 ; AVX1-LABEL: shuffle_v8i32_389A7CDE: 2007 ; AVX1: # BB#0: 2008 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4] 2009 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6] 2010 ; AVX1-NEXT: retq 2011 ; 2012 ; AVX2-LABEL: shuffle_v8i32_389A7CDE: 2013 ; AVX2: # BB#0: 2014 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27] 2015 ; AVX2-NEXT: retq 2016 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14> 2017 ret <8 x i32> %shuffle 2018 } 2019 2020 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) { 2021 ; AVX1-LABEL: shuffle_v8i32_30127456: 2022 ; AVX1: # BB#0: 2023 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2024 ; AVX1-NEXT: retq 2025 ; 2026 ; AVX2-LABEL: shuffle_v8i32_30127456: 2027 ; AVX2: # BB#0: 2028 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2029 ; AVX2-NEXT: retq 2030 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6> 2031 ret <8 x i32> %shuffle 2032 } 2033 2034 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) { 2035 ; AVX1-LABEL: shuffle_v8i32_12305674: 2036 ; AVX1: # BB#0: 2037 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2038 ; AVX1-NEXT: retq 2039 ; 2040 ; AVX2-LABEL: shuffle_v8i32_12305674: 2041 ; AVX2: # BB#0: 2042 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2043 ; AVX2-NEXT: retq 2044 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 2045 ret <8 x i32> %shuffle 2046 } 2047 2048 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2049 ; ALL-LABEL: concat_v2f32_1: 2050 ; ALL: # BB#0: # %entry 2051 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2052 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2053 ; ALL-NEXT: retq 2054 entry: 2055 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2056 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2057 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2058 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2059 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> 2060 ret <8 x float> %tmp76 2061 } 2062 2063 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2064 ; ALL-LABEL: concat_v2f32_2: 2065 ; ALL: # BB#0: # %entry 2066 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2067 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2068 ; ALL-NEXT: retq 2069 entry: 2070 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2071 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2072 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2073 ret <8 x float> %tmp76 2074 } 2075 2076 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2077 ; ALL-LABEL: concat_v2f32_3: 2078 ; ALL: # BB#0: # %entry 2079 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2080 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2081 ; ALL-NEXT: retq 2082 entry: 2083 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2084 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2085 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2086 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2087 ret <8 x float> %res 2088 } 2089 2090 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) { 2091 ; AVX1-LABEL: insert_mem_and_zero_v8i32: 2092 ; AVX1: # BB#0: 2093 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2094 ; AVX1-NEXT: retq 2095 ; 2096 ; AVX2-LABEL: insert_mem_and_zero_v8i32: 2097 ; AVX2: # BB#0: 2098 ; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2099 ; AVX2-NEXT: retq 2100 %a = load i32, i32* %ptr 2101 %v = insertelement <8 x i32> undef, i32 %a, i32 0 2102 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2103 ret <8 x i32> %shuffle 2104 } 2105 2106