1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5 target triple = "x86_64-unknown-unknown" 6 7 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) { 8 ; AVX1-LABEL: shuffle_v8f32_00000000: 9 ; AVX1: # BB#0: 10 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 11 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 12 ; AVX1-NEXT: retq 13 ; 14 ; AVX2-LABEL: shuffle_v8f32_00000000: 15 ; AVX2: # BB#0: 16 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 17 ; AVX2-NEXT: retq 18 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19 ret <8 x float> %shuffle 20 } 21 22 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) { 23 ; AVX1-LABEL: shuffle_v8f32_00000010: 24 ; AVX1: # BB#0: 25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 26 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 27 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 28 ; AVX1-NEXT: retq 29 ; 30 ; AVX2-LABEL: shuffle_v8f32_00000010: 31 ; AVX2: # BB#0: 32 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 33 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 34 ; AVX2-NEXT: retq 35 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 36 ret <8 x float> %shuffle 37 } 38 39 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) { 40 ; AVX1-LABEL: shuffle_v8f32_00000200: 41 ; AVX1: # BB#0: 42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 43 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 44 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 45 ; AVX1-NEXT: retq 46 ; 47 ; AVX2-LABEL: shuffle_v8f32_00000200: 48 ; AVX2: # BB#0: 49 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 50 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 51 ; AVX2-NEXT: retq 52 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 53 ret <8 x float> %shuffle 54 } 55 56 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) { 57 ; AVX1-LABEL: shuffle_v8f32_00003000: 58 ; AVX1: # BB#0: 59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 60 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 61 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 62 ; AVX1-NEXT: retq 63 ; 64 ; AVX2-LABEL: shuffle_v8f32_00003000: 65 ; AVX2: # BB#0: 66 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 67 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 68 ; AVX2-NEXT: retq 69 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 70 ret <8 x float> %shuffle 71 } 72 73 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) { 74 ; AVX1-LABEL: shuffle_v8f32_00040000: 75 ; AVX1: # BB#0: 76 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 77 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 78 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 79 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 80 ; AVX1-NEXT: retq 81 ; 82 ; AVX2-LABEL: shuffle_v8f32_00040000: 83 ; AVX2: # BB#0: 84 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 85 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 86 ; AVX2-NEXT: retq 87 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 88 ret <8 x float> %shuffle 89 } 90 91 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) { 92 ; AVX1-LABEL: shuffle_v8f32_00500000: 93 ; AVX1: # BB#0: 94 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 95 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 96 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 97 ; AVX1-NEXT: retq 98 ; 99 ; AVX2-LABEL: shuffle_v8f32_00500000: 100 ; AVX2: # BB#0: 101 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 102 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 103 ; AVX2-NEXT: retq 104 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 105 ret <8 x float> %shuffle 106 } 107 108 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) { 109 ; AVX1-LABEL: shuffle_v8f32_06000000: 110 ; AVX1: # BB#0: 111 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 112 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 113 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 114 ; AVX1-NEXT: retq 115 ; 116 ; AVX2-LABEL: shuffle_v8f32_06000000: 117 ; AVX2: # BB#0: 118 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 119 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 120 ; AVX2-NEXT: retq 121 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 122 ret <8 x float> %shuffle 123 } 124 125 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { 126 ; AVX1-LABEL: shuffle_v8f32_70000000: 127 ; AVX1: # BB#0: 128 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 129 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 130 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 131 ; AVX1-NEXT: retq 132 ; 133 ; AVX2-LABEL: shuffle_v8f32_70000000: 134 ; AVX2: # BB#0: 135 ; AVX2-NEXT: movl $7, %eax 136 ; AVX2-NEXT: vmovd %eax, %xmm1 137 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 138 ; AVX2-NEXT: retq 139 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 140 ret <8 x float> %shuffle 141 } 142 143 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { 144 ; ALL-LABEL: shuffle_v8f32_01014545: 145 ; ALL: # BB#0: 146 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 147 ; ALL-NEXT: retq 148 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 149 ret <8 x float> %shuffle 150 } 151 152 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { 153 ; AVX1-LABEL: shuffle_v8f32_00112233: 154 ; AVX1: # BB#0: 155 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1] 156 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] 157 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 158 ; AVX1-NEXT: retq 159 ; 160 ; AVX2-LABEL: shuffle_v8f32_00112233: 161 ; AVX2: # BB#0: 162 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 163 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 164 ; AVX2-NEXT: retq 165 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 166 ret <8 x float> %shuffle 167 } 168 169 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) { 170 ; AVX1-LABEL: shuffle_v8f32_00001111: 171 ; AVX1: # BB#0: 172 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 173 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 174 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 175 ; AVX1-NEXT: retq 176 ; 177 ; AVX2-LABEL: shuffle_v8f32_00001111: 178 ; AVX2: # BB#0: 179 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 180 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 181 ; AVX2-NEXT: retq 182 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 183 ret <8 x float> %shuffle 184 } 185 186 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { 187 ; ALL-LABEL: shuffle_v8f32_81a3c5e7: 188 ; ALL: # BB#0: 189 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 190 ; ALL-NEXT: retq 191 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 192 ret <8 x float> %shuffle 193 } 194 195 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { 196 ; AVX1-LABEL: shuffle_v8f32_08080808: 197 ; AVX1: # BB#0: 198 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 199 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 200 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 201 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 202 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 203 ; AVX1-NEXT: retq 204 ; 205 ; AVX2-LABEL: shuffle_v8f32_08080808: 206 ; AVX2: # BB#0: 207 ; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 208 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 209 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 210 ; AVX2-NEXT: retq 211 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 212 ret <8 x float> %shuffle 213 } 214 215 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) { 216 ; ALL-LABEL: shuffle_v8f32_08084c4c: 217 ; ALL: # BB#0: 218 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 219 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 220 ; ALL-NEXT: retq 221 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 222 ret <8 x float> %shuffle 223 } 224 225 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) { 226 ; ALL-LABEL: shuffle_v8f32_8823cc67: 227 ; ALL: # BB#0: 228 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 229 ; ALL-NEXT: retq 230 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 231 ret <8 x float> %shuffle 232 } 233 234 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) { 235 ; ALL-LABEL: shuffle_v8f32_9832dc76: 236 ; ALL: # BB#0: 237 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 238 ; ALL-NEXT: retq 239 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 240 ret <8 x float> %shuffle 241 } 242 243 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) { 244 ; ALL-LABEL: shuffle_v8f32_9810dc54: 245 ; ALL: # BB#0: 246 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 247 ; ALL-NEXT: retq 248 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 249 ret <8 x float> %shuffle 250 } 251 252 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) { 253 ; ALL-LABEL: shuffle_v8f32_08194c5d: 254 ; ALL: # BB#0: 255 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 256 ; ALL-NEXT: retq 257 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 258 ret <8 x float> %shuffle 259 } 260 261 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) { 262 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f: 263 ; ALL: # BB#0: 264 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 265 ; ALL-NEXT: retq 266 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 267 ret <8 x float> %shuffle 268 } 269 270 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) { 271 ; AVX1-LABEL: shuffle_v8f32_08192a3b: 272 ; AVX1: # BB#0: 273 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 274 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 275 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 276 ; AVX1-NEXT: retq 277 ; 278 ; AVX2-LABEL: shuffle_v8f32_08192a3b: 279 ; AVX2: # BB#0: 280 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 281 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 282 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 283 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 284 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 285 ; AVX2-NEXT: retq 286 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 287 ret <8 x float> %shuffle 288 } 289 290 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) { 291 ; AVX1-LABEL: shuffle_v8f32_08991abb: 292 ; AVX1: # BB#0: 293 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 294 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 295 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 296 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 297 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 298 ; AVX1-NEXT: retq 299 ; 300 ; AVX2-LABEL: shuffle_v8f32_08991abb: 301 ; AVX2: # BB#0: 302 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 303 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 304 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 305 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 306 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 307 ; AVX2-NEXT: retq 308 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 309 ret <8 x float> %shuffle 310 } 311 312 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { 313 ; AVX1-LABEL: shuffle_v8f32_091b2d3f: 314 ; AVX1: # BB#0: 315 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 316 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 317 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 318 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 319 ; AVX1-NEXT: retq 320 ; 321 ; AVX2-LABEL: shuffle_v8f32_091b2d3f: 322 ; AVX2: # BB#0: 323 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 324 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 325 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 326 ; AVX2-NEXT: retq 327 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 328 ret <8 x float> %shuffle 329 } 330 331 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { 332 ; AVX1-LABEL: shuffle_v8f32_09ab1def: 333 ; AVX1: # BB#0: 334 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 335 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 336 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 337 ; AVX1-NEXT: retq 338 ; 339 ; AVX2-LABEL: shuffle_v8f32_09ab1def: 340 ; AVX2: # BB#0: 341 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 342 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 343 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 344 ; AVX2-NEXT: retq 345 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 346 ret <8 x float> %shuffle 347 } 348 349 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) { 350 ; ALL-LABEL: shuffle_v8f32_00014445: 351 ; ALL: # BB#0: 352 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 353 ; ALL-NEXT: retq 354 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 355 ret <8 x float> %shuffle 356 } 357 358 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) { 359 ; ALL-LABEL: shuffle_v8f32_00204464: 360 ; ALL: # BB#0: 361 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 362 ; ALL-NEXT: retq 363 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 364 ret <8 x float> %shuffle 365 } 366 367 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) { 368 ; ALL-LABEL: shuffle_v8f32_03004744: 369 ; ALL: # BB#0: 370 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 371 ; ALL-NEXT: retq 372 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 373 ret <8 x float> %shuffle 374 } 375 376 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) { 377 ; ALL-LABEL: shuffle_v8f32_10005444: 378 ; ALL: # BB#0: 379 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 380 ; ALL-NEXT: retq 381 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 382 ret <8 x float> %shuffle 383 } 384 385 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) { 386 ; ALL-LABEL: shuffle_v8f32_22006644: 387 ; ALL: # BB#0: 388 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 389 ; ALL-NEXT: retq 390 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 391 ret <8 x float> %shuffle 392 } 393 394 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) { 395 ; ALL-LABEL: shuffle_v8f32_33307774: 396 ; ALL: # BB#0: 397 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 398 ; ALL-NEXT: retq 399 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 400 ret <8 x float> %shuffle 401 } 402 403 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) { 404 ; ALL-LABEL: shuffle_v8f32_32107654: 405 ; ALL: # BB#0: 406 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 407 ; ALL-NEXT: retq 408 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 409 ret <8 x float> %shuffle 410 } 411 412 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { 413 ; ALL-LABEL: shuffle_v8f32_00234467: 414 ; ALL: # BB#0: 415 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 416 ; ALL-NEXT: retq 417 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 418 ret <8 x float> %shuffle 419 } 420 421 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 422 ; ALL-LABEL: shuffle_v8f32_00224466: 423 ; ALL: # BB#0: 424 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 425 ; ALL-NEXT: retq 426 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 427 ret <8 x float> %shuffle 428 } 429 430 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { 431 ; ALL-LABEL: shuffle_v8f32_10325476: 432 ; ALL: # BB#0: 433 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 434 ; ALL-NEXT: retq 435 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 436 ret <8 x float> %shuffle 437 } 438 439 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 440 ; ALL-LABEL: shuffle_v8f32_11335577: 441 ; ALL: # BB#0: 442 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 443 ; ALL-NEXT: retq 444 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 445 ret <8 x float> %shuffle 446 } 447 448 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { 449 ; ALL-LABEL: shuffle_v8f32_10235467: 450 ; ALL: # BB#0: 451 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 452 ; ALL-NEXT: retq 453 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 454 ret <8 x float> %shuffle 455 } 456 457 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { 458 ; ALL-LABEL: shuffle_v8f32_10225466: 459 ; ALL: # BB#0: 460 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 461 ; ALL-NEXT: retq 462 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 463 ret <8 x float> %shuffle 464 } 465 466 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { 467 ; ALL-LABEL: shuffle_v8f32_00015444: 468 ; ALL: # BB#0: 469 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 470 ; ALL-NEXT: retq 471 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 472 ret <8 x float> %shuffle 473 } 474 475 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { 476 ; ALL-LABEL: shuffle_v8f32_00204644: 477 ; ALL: # BB#0: 478 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 479 ; ALL-NEXT: retq 480 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 481 ret <8 x float> %shuffle 482 } 483 484 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { 485 ; ALL-LABEL: shuffle_v8f32_03004474: 486 ; ALL: # BB#0: 487 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 488 ; ALL-NEXT: retq 489 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 490 ret <8 x float> %shuffle 491 } 492 493 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { 494 ; ALL-LABEL: shuffle_v8f32_10004444: 495 ; ALL: # BB#0: 496 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 497 ; ALL-NEXT: retq 498 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 499 ret <8 x float> %shuffle 500 } 501 502 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { 503 ; ALL-LABEL: shuffle_v8f32_22006446: 504 ; ALL: # BB#0: 505 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 506 ; ALL-NEXT: retq 507 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 508 ret <8 x float> %shuffle 509 } 510 511 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { 512 ; ALL-LABEL: shuffle_v8f32_33307474: 513 ; ALL: # BB#0: 514 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 515 ; ALL-NEXT: retq 516 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 517 ret <8 x float> %shuffle 518 } 519 520 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { 521 ; ALL-LABEL: shuffle_v8f32_32104567: 522 ; ALL: # BB#0: 523 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 524 ; ALL-NEXT: retq 525 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 526 ret <8 x float> %shuffle 527 } 528 529 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { 530 ; ALL-LABEL: shuffle_v8f32_00236744: 531 ; ALL: # BB#0: 532 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 533 ; ALL-NEXT: retq 534 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 535 ret <8 x float> %shuffle 536 } 537 538 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { 539 ; ALL-LABEL: shuffle_v8f32_00226644: 540 ; ALL: # BB#0: 541 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 542 ; ALL-NEXT: retq 543 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 544 ret <8 x float> %shuffle 545 } 546 547 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { 548 ; ALL-LABEL: shuffle_v8f32_10324567: 549 ; ALL: # BB#0: 550 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 551 ; ALL-NEXT: retq 552 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 553 ret <8 x float> %shuffle 554 } 555 556 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { 557 ; ALL-LABEL: shuffle_v8f32_11334567: 558 ; ALL: # BB#0: 559 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 560 ; ALL-NEXT: retq 561 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 562 ret <8 x float> %shuffle 563 } 564 565 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { 566 ; ALL-LABEL: shuffle_v8f32_01235467: 567 ; ALL: # BB#0: 568 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 569 ; ALL-NEXT: retq 570 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 571 ret <8 x float> %shuffle 572 } 573 574 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { 575 ; ALL-LABEL: shuffle_v8f32_01235466: 576 ; ALL: # BB#0: 577 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 578 ; ALL-NEXT: retq 579 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 580 ret <8 x float> %shuffle 581 } 582 583 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { 584 ; ALL-LABEL: shuffle_v8f32_002u6u44: 585 ; ALL: # BB#0: 586 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 587 ; ALL-NEXT: retq 588 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 589 ret <8 x float> %shuffle 590 } 591 592 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { 593 ; ALL-LABEL: shuffle_v8f32_00uu66uu: 594 ; ALL: # BB#0: 595 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 596 ; ALL-NEXT: retq 597 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 598 ret <8 x float> %shuffle 599 } 600 601 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { 602 ; ALL-LABEL: shuffle_v8f32_103245uu: 603 ; ALL: # BB#0: 604 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 605 ; ALL-NEXT: retq 606 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 607 ret <8 x float> %shuffle 608 } 609 610 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { 611 ; ALL-LABEL: shuffle_v8f32_1133uu67: 612 ; ALL: # BB#0: 613 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 614 ; ALL-NEXT: retq 615 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 616 ret <8 x float> %shuffle 617 } 618 619 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { 620 ; ALL-LABEL: shuffle_v8f32_0uu354uu: 621 ; ALL: # BB#0: 622 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 623 ; ALL-NEXT: retq 624 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 625 ret <8 x float> %shuffle 626 } 627 628 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { 629 ; ALL-LABEL: shuffle_v8f32_uuu3uu66: 630 ; ALL: # BB#0: 631 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 632 ; ALL-NEXT: retq 633 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 634 ret <8 x float> %shuffle 635 } 636 637 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { 638 ; AVX1-LABEL: shuffle_v8f32_c348cda0: 639 ; AVX1: # BB#0: 640 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 641 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] 642 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 643 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 644 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3] 645 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 646 ; AVX1-NEXT: retq 647 ; 648 ; AVX2-LABEL: shuffle_v8f32_c348cda0: 649 ; AVX2: # BB#0: 650 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0> 651 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 652 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 653 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 654 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 655 ; AVX2-NEXT: retq 656 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0> 657 ret <8 x float> %shuffle 658 } 659 660 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { 661 ; AVX1-LABEL: shuffle_v8f32_f511235a: 662 ; AVX1: # BB#0: 663 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 664 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2] 665 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5] 666 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3] 667 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 668 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 669 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 670 ; AVX1-NEXT: retq 671 ; 672 ; AVX2-LABEL: shuffle_v8f32_f511235a: 673 ; AVX2: # BB#0: 674 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2> 675 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 676 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u> 677 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 678 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 679 ; AVX2-NEXT: retq 680 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> 681 ret <8 x float> %shuffle 682 } 683 684 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { 685 ; AVX1-LABEL: shuffle_v8f32_32103210: 686 ; AVX1: # BB#0: 687 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 688 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 689 ; AVX1-NEXT: retq 690 ; 691 ; AVX2-LABEL: shuffle_v8f32_32103210: 692 ; AVX2: # BB#0: 693 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 694 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 695 ; AVX2-NEXT: retq 696 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 697 ret <8 x float> %shuffle 698 } 699 700 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { 701 ; AVX1-LABEL: shuffle_v8f32_76547654: 702 ; AVX1: # BB#0: 703 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 704 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 705 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 706 ; AVX1-NEXT: retq 707 ; 708 ; AVX2-LABEL: shuffle_v8f32_76547654: 709 ; AVX2: # BB#0: 710 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 711 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 712 ; AVX2-NEXT: retq 713 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 714 ret <8 x float> %shuffle 715 } 716 717 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { 718 ; AVX1-LABEL: shuffle_v8f32_76543210: 719 ; AVX1: # BB#0: 720 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 721 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 722 ; AVX1-NEXT: retq 723 ; 724 ; AVX2-LABEL: shuffle_v8f32_76543210: 725 ; AVX2: # BB#0: 726 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 727 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 728 ; AVX2-NEXT: retq 729 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 730 ret <8 x float> %shuffle 731 } 732 733 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { 734 ; ALL-LABEL: shuffle_v8f32_3210ba98: 735 ; ALL: # BB#0: 736 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 737 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 738 ; ALL-NEXT: retq 739 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 740 ret <8 x float> %shuffle 741 } 742 743 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { 744 ; ALL-LABEL: shuffle_v8f32_3210fedc: 745 ; ALL: # BB#0: 746 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 747 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 748 ; ALL-NEXT: retq 749 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 750 ret <8 x float> %shuffle 751 } 752 753 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { 754 ; ALL-LABEL: shuffle_v8f32_7654fedc: 755 ; ALL: # BB#0: 756 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 757 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 758 ; ALL-NEXT: retq 759 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 760 ret <8 x float> %shuffle 761 } 762 763 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { 764 ; ALL-LABEL: shuffle_v8f32_fedc7654: 765 ; ALL: # BB#0: 766 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 767 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 768 ; ALL-NEXT: retq 769 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 770 ret <8 x float> %shuffle 771 } 772 773 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) { 774 ; AVX1-LABEL: PR21138: 775 ; AVX1: # BB#0: 776 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 777 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 778 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 779 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 780 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 781 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 782 ; AVX1-NEXT: retq 783 ; 784 ; AVX2-LABEL: PR21138: 785 ; AVX2: # BB#0: 786 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7> 787 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 788 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u> 789 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 790 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 791 ; AVX2-NEXT: retq 792 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 793 ret <8 x float> %shuffle 794 } 795 796 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { 797 ; ALL-LABEL: shuffle_v8f32_ba987654: 798 ; ALL: # BB#0: 799 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 800 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 801 ; ALL-NEXT: retq 802 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 803 ret <8 x float> %shuffle 804 } 805 806 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { 807 ; ALL-LABEL: shuffle_v8f32_ba983210: 808 ; ALL: # BB#0: 809 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 810 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 811 ; ALL-NEXT: retq 812 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 813 ret <8 x float> %shuffle 814 } 815 816 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) { 817 ; ALL-LABEL: shuffle_v8f32_80u1c4u5: 818 ; ALL: # BB#0: 819 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 820 ; ALL-NEXT: retq 821 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5> 822 ret <8 x float> %shuffle 823 } 824 825 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { 826 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7: 827 ; ALL: # BB#0: 828 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 829 ; ALL-NEXT: retq 830 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7> 831 ret <8 x float> %shuffle 832 } 833 834 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { 835 ; AVX1-LABEL: shuffle_v8f32_uuuu1111: 836 ; AVX1: # BB#0: 837 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 838 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 839 ; AVX1-NEXT: retq 840 ; 841 ; AVX2-LABEL: shuffle_v8f32_uuuu1111: 842 ; AVX2: # BB#0: 843 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 844 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 845 ; AVX2-NEXT: retq 846 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 847 ret <8 x float> %shuffle 848 } 849 850 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { 851 ; AVX1-LABEL: shuffle_v8f32_44444444: 852 ; AVX1: # BB#0: 853 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 854 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 855 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 856 ; AVX1-NEXT: retq 857 ; 858 ; AVX2-LABEL: shuffle_v8f32_44444444: 859 ; AVX2: # BB#0: 860 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 861 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 862 ; AVX2-NEXT: retq 863 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 864 ret <8 x float> %shuffle 865 } 866 867 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) { 868 ; AVX1-LABEL: shuffle_v8f32_5555uuuu: 869 ; AVX1: # BB#0: 870 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 871 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 872 ; AVX1-NEXT: retq 873 ; 874 ; AVX2-LABEL: shuffle_v8f32_5555uuuu: 875 ; AVX2: # BB#0: 876 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 877 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 878 ; AVX2-NEXT: retq 879 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 880 ret <8 x float> %shuffle 881 } 882 883 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { 884 ; AVX1-LABEL: shuffle_v8i32_00000000: 885 ; AVX1: # BB#0: 886 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 887 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 888 ; AVX1-NEXT: retq 889 ; 890 ; AVX2-LABEL: shuffle_v8i32_00000000: 891 ; AVX2: # BB#0: 892 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 893 ; AVX2-NEXT: retq 894 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 895 ret <8 x i32> %shuffle 896 } 897 898 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) { 899 ; AVX1-LABEL: shuffle_v8i32_00000010: 900 ; AVX1: # BB#0: 901 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 902 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 903 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 904 ; AVX1-NEXT: retq 905 ; 906 ; AVX2-LABEL: shuffle_v8i32_00000010: 907 ; AVX2: # BB#0: 908 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 909 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 910 ; AVX2-NEXT: retq 911 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 912 ret <8 x i32> %shuffle 913 } 914 915 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) { 916 ; AVX1-LABEL: shuffle_v8i32_00000200: 917 ; AVX1: # BB#0: 918 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 919 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 920 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 921 ; AVX1-NEXT: retq 922 ; 923 ; AVX2-LABEL: shuffle_v8i32_00000200: 924 ; AVX2: # BB#0: 925 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 926 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 927 ; AVX2-NEXT: retq 928 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 929 ret <8 x i32> %shuffle 930 } 931 932 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) { 933 ; AVX1-LABEL: shuffle_v8i32_00003000: 934 ; AVX1: # BB#0: 935 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 936 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 937 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 938 ; AVX1-NEXT: retq 939 ; 940 ; AVX2-LABEL: shuffle_v8i32_00003000: 941 ; AVX2: # BB#0: 942 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 943 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 944 ; AVX2-NEXT: retq 945 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 946 ret <8 x i32> %shuffle 947 } 948 949 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) { 950 ; AVX1-LABEL: shuffle_v8i32_00040000: 951 ; AVX1: # BB#0: 952 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 953 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 954 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 955 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 956 ; AVX1-NEXT: retq 957 ; 958 ; AVX2-LABEL: shuffle_v8i32_00040000: 959 ; AVX2: # BB#0: 960 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 961 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 962 ; AVX2-NEXT: retq 963 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 964 ret <8 x i32> %shuffle 965 } 966 967 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) { 968 ; AVX1-LABEL: shuffle_v8i32_00500000: 969 ; AVX1: # BB#0: 970 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 971 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 972 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 973 ; AVX1-NEXT: retq 974 ; 975 ; AVX2-LABEL: shuffle_v8i32_00500000: 976 ; AVX2: # BB#0: 977 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 978 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 979 ; AVX2-NEXT: retq 980 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 981 ret <8 x i32> %shuffle 982 } 983 984 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) { 985 ; AVX1-LABEL: shuffle_v8i32_06000000: 986 ; AVX1: # BB#0: 987 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 988 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 989 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 990 ; AVX1-NEXT: retq 991 ; 992 ; AVX2-LABEL: shuffle_v8i32_06000000: 993 ; AVX2: # BB#0: 994 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 995 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 996 ; AVX2-NEXT: retq 997 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 998 ret <8 x i32> %shuffle 999 } 1000 1001 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { 1002 ; AVX1-LABEL: shuffle_v8i32_70000000: 1003 ; AVX1: # BB#0: 1004 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1005 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 1006 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 1007 ; AVX1-NEXT: retq 1008 ; 1009 ; AVX2-LABEL: shuffle_v8i32_70000000: 1010 ; AVX2: # BB#0: 1011 ; AVX2-NEXT: movl $7, %eax 1012 ; AVX2-NEXT: vmovd %eax, %xmm1 1013 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1014 ; AVX2-NEXT: retq 1015 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1016 ret <8 x i32> %shuffle 1017 } 1018 1019 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { 1020 ; AVX1-LABEL: shuffle_v8i32_01014545: 1021 ; AVX1: # BB#0: 1022 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1023 ; AVX1-NEXT: retq 1024 ; 1025 ; AVX2-LABEL: shuffle_v8i32_01014545: 1026 ; AVX2: # BB#0: 1027 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1028 ; AVX2-NEXT: retq 1029 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 1030 ret <8 x i32> %shuffle 1031 } 1032 1033 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) { 1034 ; AVX1-LABEL: shuffle_v8i32_00112233: 1035 ; AVX1: # BB#0: 1036 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 1037 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 1038 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1039 ; AVX1-NEXT: retq 1040 ; 1041 ; AVX2-LABEL: shuffle_v8i32_00112233: 1042 ; AVX2: # BB#0: 1043 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 1044 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1045 ; AVX2-NEXT: retq 1046 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 1047 ret <8 x i32> %shuffle 1048 } 1049 1050 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) { 1051 ; AVX1-LABEL: shuffle_v8i32_00001111: 1052 ; AVX1: # BB#0: 1053 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1054 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1055 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1056 ; AVX1-NEXT: retq 1057 ; 1058 ; AVX2-LABEL: shuffle_v8i32_00001111: 1059 ; AVX2: # BB#0: 1060 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 1061 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1062 ; AVX2-NEXT: retq 1063 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1064 ret <8 x i32> %shuffle 1065 } 1066 1067 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { 1068 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7: 1069 ; AVX1: # BB#0: 1070 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1071 ; AVX1-NEXT: retq 1072 ; 1073 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7: 1074 ; AVX2: # BB#0: 1075 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1076 ; AVX2-NEXT: retq 1077 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 1078 ret <8 x i32> %shuffle 1079 } 1080 1081 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { 1082 ; AVX1-LABEL: shuffle_v8i32_08080808: 1083 ; AVX1: # BB#0: 1084 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 1085 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1086 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1087 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1088 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1089 ; AVX1-NEXT: retq 1090 ; 1091 ; AVX2-LABEL: shuffle_v8i32_08080808: 1092 ; AVX2: # BB#0: 1093 ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 1094 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 1095 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1096 ; AVX2-NEXT: retq 1097 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 1098 ret <8 x i32> %shuffle 1099 } 1100 1101 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) { 1102 ; AVX1-LABEL: shuffle_v8i32_08084c4c: 1103 ; AVX1: # BB#0: 1104 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 1105 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1106 ; AVX1-NEXT: retq 1107 ; 1108 ; AVX2-LABEL: shuffle_v8i32_08084c4c: 1109 ; AVX2: # BB#0: 1110 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4] 1111 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1112 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1113 ; AVX2-NEXT: retq 1114 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 1115 ret <8 x i32> %shuffle 1116 } 1117 1118 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) { 1119 ; AVX1-LABEL: shuffle_v8i32_8823cc67: 1120 ; AVX1: # BB#0: 1121 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 1122 ; AVX1-NEXT: retq 1123 ; 1124 ; AVX2-LABEL: shuffle_v8i32_8823cc67: 1125 ; AVX2: # BB#0: 1126 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7] 1127 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1128 ; AVX2-NEXT: retq 1129 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 1130 ret <8 x i32> %shuffle 1131 } 1132 1133 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) { 1134 ; AVX1-LABEL: shuffle_v8i32_9832dc76: 1135 ; AVX1: # BB#0: 1136 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 1137 ; AVX1-NEXT: retq 1138 ; 1139 ; AVX2-LABEL: shuffle_v8i32_9832dc76: 1140 ; AVX2: # BB#0: 1141 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1142 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1143 ; AVX2-NEXT: retq 1144 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 1145 ret <8 x i32> %shuffle 1146 } 1147 1148 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { 1149 ; AVX1-LABEL: shuffle_v8i32_9810dc54: 1150 ; AVX1: # BB#0: 1151 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 1152 ; AVX1-NEXT: retq 1153 ; 1154 ; AVX2-LABEL: shuffle_v8i32_9810dc54: 1155 ; AVX2: # BB#0: 1156 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4] 1157 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7] 1158 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1159 ; AVX2-NEXT: retq 1160 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 1161 ret <8 x i32> %shuffle 1162 } 1163 1164 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { 1165 ; AVX1-LABEL: shuffle_v8i32_08194c5d: 1166 ; AVX1: # BB#0: 1167 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1168 ; AVX1-NEXT: retq 1169 ; 1170 ; AVX2-LABEL: shuffle_v8i32_08194c5d: 1171 ; AVX2: # BB#0: 1172 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1173 ; AVX2-NEXT: retq 1174 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1175 ret <8 x i32> %shuffle 1176 } 1177 1178 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { 1179 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f: 1180 ; AVX1: # BB#0: 1181 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1182 ; AVX1-NEXT: retq 1183 ; 1184 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f: 1185 ; AVX2: # BB#0: 1186 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1187 ; AVX2-NEXT: retq 1188 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1189 ret <8 x i32> %shuffle 1190 } 1191 1192 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { 1193 ; AVX1-LABEL: shuffle_v8i32_08192a3b: 1194 ; AVX1: # BB#0: 1195 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1196 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1197 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1198 ; AVX1-NEXT: retq 1199 ; 1200 ; AVX2-LABEL: shuffle_v8i32_08192a3b: 1201 ; AVX2: # BB#0: 1202 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 1203 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1204 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1205 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1206 ; AVX2-NEXT: retq 1207 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1208 ret <8 x i32> %shuffle 1209 } 1210 1211 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { 1212 ; AVX1-LABEL: shuffle_v8i32_08991abb: 1213 ; AVX1: # BB#0: 1214 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 1215 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 1216 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1217 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 1218 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1219 ; AVX1-NEXT: retq 1220 ; 1221 ; AVX2-LABEL: shuffle_v8i32_08991abb: 1222 ; AVX2: # BB#0: 1223 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1224 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1225 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 1226 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1227 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1228 ; AVX2-NEXT: retq 1229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 1230 ret <8 x i32> %shuffle 1231 } 1232 1233 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { 1234 ; AVX1-LABEL: shuffle_v8i32_091b2d3f: 1235 ; AVX1: # BB#0: 1236 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 1237 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 1238 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1239 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1240 ; AVX1-NEXT: retq 1241 ; 1242 ; AVX2-LABEL: shuffle_v8i32_091b2d3f: 1243 ; AVX2: # BB#0: 1244 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1245 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1246 ; AVX2-NEXT: retq 1247 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1248 ret <8 x i32> %shuffle 1249 } 1250 1251 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { 1252 ; AVX1-LABEL: shuffle_v8i32_09ab1def: 1253 ; AVX1: # BB#0: 1254 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 1255 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1256 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1257 ; AVX1-NEXT: retq 1258 ; 1259 ; AVX2-LABEL: shuffle_v8i32_09ab1def: 1260 ; AVX2: # BB#0: 1261 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1262 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1263 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1264 ; AVX2-NEXT: retq 1265 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1266 ret <8 x i32> %shuffle 1267 } 1268 1269 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) { 1270 ; AVX1-LABEL: shuffle_v8i32_00014445: 1271 ; AVX1: # BB#0: 1272 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1273 ; AVX1-NEXT: retq 1274 ; 1275 ; AVX2-LABEL: shuffle_v8i32_00014445: 1276 ; AVX2: # BB#0: 1277 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1278 ; AVX2-NEXT: retq 1279 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 1280 ret <8 x i32> %shuffle 1281 } 1282 1283 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) { 1284 ; AVX1-LABEL: shuffle_v8i32_00204464: 1285 ; AVX1: # BB#0: 1286 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1287 ; AVX1-NEXT: retq 1288 ; 1289 ; AVX2-LABEL: shuffle_v8i32_00204464: 1290 ; AVX2: # BB#0: 1291 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1292 ; AVX2-NEXT: retq 1293 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 1294 ret <8 x i32> %shuffle 1295 } 1296 1297 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) { 1298 ; AVX1-LABEL: shuffle_v8i32_03004744: 1299 ; AVX1: # BB#0: 1300 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1301 ; AVX1-NEXT: retq 1302 ; 1303 ; AVX2-LABEL: shuffle_v8i32_03004744: 1304 ; AVX2: # BB#0: 1305 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1306 ; AVX2-NEXT: retq 1307 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 1308 ret <8 x i32> %shuffle 1309 } 1310 1311 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) { 1312 ; AVX1-LABEL: shuffle_v8i32_10005444: 1313 ; AVX1: # BB#0: 1314 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1315 ; AVX1-NEXT: retq 1316 ; 1317 ; AVX2-LABEL: shuffle_v8i32_10005444: 1318 ; AVX2: # BB#0: 1319 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1320 ; AVX2-NEXT: retq 1321 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 1322 ret <8 x i32> %shuffle 1323 } 1324 1325 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) { 1326 ; AVX1-LABEL: shuffle_v8i32_22006644: 1327 ; AVX1: # BB#0: 1328 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1329 ; AVX1-NEXT: retq 1330 ; 1331 ; AVX2-LABEL: shuffle_v8i32_22006644: 1332 ; AVX2: # BB#0: 1333 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1334 ; AVX2-NEXT: retq 1335 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 1336 ret <8 x i32> %shuffle 1337 } 1338 1339 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) { 1340 ; AVX1-LABEL: shuffle_v8i32_33307774: 1341 ; AVX1: # BB#0: 1342 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1343 ; AVX1-NEXT: retq 1344 ; 1345 ; AVX2-LABEL: shuffle_v8i32_33307774: 1346 ; AVX2: # BB#0: 1347 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1348 ; AVX2-NEXT: retq 1349 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 1350 ret <8 x i32> %shuffle 1351 } 1352 1353 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) { 1354 ; AVX1-LABEL: shuffle_v8i32_32107654: 1355 ; AVX1: # BB#0: 1356 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1357 ; AVX1-NEXT: retq 1358 ; 1359 ; AVX2-LABEL: shuffle_v8i32_32107654: 1360 ; AVX2: # BB#0: 1361 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1362 ; AVX2-NEXT: retq 1363 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1364 ret <8 x i32> %shuffle 1365 } 1366 1367 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { 1368 ; AVX1-LABEL: shuffle_v8i32_00234467: 1369 ; AVX1: # BB#0: 1370 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1371 ; AVX1-NEXT: retq 1372 ; 1373 ; AVX2-LABEL: shuffle_v8i32_00234467: 1374 ; AVX2: # BB#0: 1375 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1376 ; AVX2-NEXT: retq 1377 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 1378 ret <8 x i32> %shuffle 1379 } 1380 1381 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { 1382 ; AVX1-LABEL: shuffle_v8i32_00224466: 1383 ; AVX1: # BB#0: 1384 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1385 ; AVX1-NEXT: retq 1386 ; 1387 ; AVX2-LABEL: shuffle_v8i32_00224466: 1388 ; AVX2: # BB#0: 1389 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1390 ; AVX2-NEXT: retq 1391 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1392 ret <8 x i32> %shuffle 1393 } 1394 1395 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { 1396 ; AVX1-LABEL: shuffle_v8i32_10325476: 1397 ; AVX1: # BB#0: 1398 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1399 ; AVX1-NEXT: retq 1400 ; 1401 ; AVX2-LABEL: shuffle_v8i32_10325476: 1402 ; AVX2: # BB#0: 1403 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1404 ; AVX2-NEXT: retq 1405 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1406 ret <8 x i32> %shuffle 1407 } 1408 1409 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { 1410 ; AVX1-LABEL: shuffle_v8i32_11335577: 1411 ; AVX1: # BB#0: 1412 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1413 ; AVX1-NEXT: retq 1414 ; 1415 ; AVX2-LABEL: shuffle_v8i32_11335577: 1416 ; AVX2: # BB#0: 1417 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1418 ; AVX2-NEXT: retq 1419 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1420 ret <8 x i32> %shuffle 1421 } 1422 1423 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) { 1424 ; AVX1-LABEL: shuffle_v8i32_10235467: 1425 ; AVX1: # BB#0: 1426 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1427 ; AVX1-NEXT: retq 1428 ; 1429 ; AVX2-LABEL: shuffle_v8i32_10235467: 1430 ; AVX2: # BB#0: 1431 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1432 ; AVX2-NEXT: retq 1433 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1434 ret <8 x i32> %shuffle 1435 } 1436 1437 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { 1438 ; AVX1-LABEL: shuffle_v8i32_10225466: 1439 ; AVX1: # BB#0: 1440 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1441 ; AVX1-NEXT: retq 1442 ; 1443 ; AVX2-LABEL: shuffle_v8i32_10225466: 1444 ; AVX2: # BB#0: 1445 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1446 ; AVX2-NEXT: retq 1447 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 1448 ret <8 x i32> %shuffle 1449 } 1450 1451 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { 1452 ; AVX1-LABEL: shuffle_v8i32_00015444: 1453 ; AVX1: # BB#0: 1454 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 1455 ; AVX1-NEXT: retq 1456 ; 1457 ; AVX2-LABEL: shuffle_v8i32_00015444: 1458 ; AVX2: # BB#0: 1459 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] 1460 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1461 ; AVX2-NEXT: retq 1462 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 1463 ret <8 x i32> %shuffle 1464 } 1465 1466 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { 1467 ; AVX1-LABEL: shuffle_v8i32_00204644: 1468 ; AVX1: # BB#0: 1469 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 1470 ; AVX1-NEXT: retq 1471 ; 1472 ; AVX2-LABEL: shuffle_v8i32_00204644: 1473 ; AVX2: # BB#0: 1474 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] 1475 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1476 ; AVX2-NEXT: retq 1477 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 1478 ret <8 x i32> %shuffle 1479 } 1480 1481 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { 1482 ; AVX1-LABEL: shuffle_v8i32_03004474: 1483 ; AVX1: # BB#0: 1484 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 1485 ; AVX1-NEXT: retq 1486 ; 1487 ; AVX2-LABEL: shuffle_v8i32_03004474: 1488 ; AVX2: # BB#0: 1489 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] 1490 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1491 ; AVX2-NEXT: retq 1492 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 1493 ret <8 x i32> %shuffle 1494 } 1495 1496 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { 1497 ; AVX1-LABEL: shuffle_v8i32_10004444: 1498 ; AVX1: # BB#0: 1499 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 1500 ; AVX1-NEXT: retq 1501 ; 1502 ; AVX2-LABEL: shuffle_v8i32_10004444: 1503 ; AVX2: # BB#0: 1504 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] 1505 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1506 ; AVX2-NEXT: retq 1507 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1508 ret <8 x i32> %shuffle 1509 } 1510 1511 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { 1512 ; AVX1-LABEL: shuffle_v8i32_22006446: 1513 ; AVX1: # BB#0: 1514 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 1515 ; AVX1-NEXT: retq 1516 ; 1517 ; AVX2-LABEL: shuffle_v8i32_22006446: 1518 ; AVX2: # BB#0: 1519 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] 1520 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1521 ; AVX2-NEXT: retq 1522 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 1523 ret <8 x i32> %shuffle 1524 } 1525 1526 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { 1527 ; AVX1-LABEL: shuffle_v8i32_33307474: 1528 ; AVX1: # BB#0: 1529 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 1530 ; AVX1-NEXT: retq 1531 ; 1532 ; AVX2-LABEL: shuffle_v8i32_33307474: 1533 ; AVX2: # BB#0: 1534 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] 1535 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1536 ; AVX2-NEXT: retq 1537 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 1538 ret <8 x i32> %shuffle 1539 } 1540 1541 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { 1542 ; AVX1-LABEL: shuffle_v8i32_32104567: 1543 ; AVX1: # BB#0: 1544 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 1545 ; AVX1-NEXT: retq 1546 ; 1547 ; AVX2-LABEL: shuffle_v8i32_32104567: 1548 ; AVX2: # BB#0: 1549 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] 1550 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1551 ; AVX2-NEXT: retq 1552 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 1553 ret <8 x i32> %shuffle 1554 } 1555 1556 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { 1557 ; AVX1-LABEL: shuffle_v8i32_00236744: 1558 ; AVX1: # BB#0: 1559 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 1560 ; AVX1-NEXT: retq 1561 ; 1562 ; AVX2-LABEL: shuffle_v8i32_00236744: 1563 ; AVX2: # BB#0: 1564 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] 1565 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1566 ; AVX2-NEXT: retq 1567 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 1568 ret <8 x i32> %shuffle 1569 } 1570 1571 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { 1572 ; AVX1-LABEL: shuffle_v8i32_00226644: 1573 ; AVX1: # BB#0: 1574 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 1575 ; AVX1-NEXT: retq 1576 ; 1577 ; AVX2-LABEL: shuffle_v8i32_00226644: 1578 ; AVX2: # BB#0: 1579 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] 1580 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1581 ; AVX2-NEXT: retq 1582 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 1583 ret <8 x i32> %shuffle 1584 } 1585 1586 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { 1587 ; AVX1-LABEL: shuffle_v8i32_10324567: 1588 ; AVX1: # BB#0: 1589 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 1590 ; AVX1-NEXT: retq 1591 ; 1592 ; AVX2-LABEL: shuffle_v8i32_10324567: 1593 ; AVX2: # BB#0: 1594 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] 1595 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1596 ; AVX2-NEXT: retq 1597 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1598 ret <8 x i32> %shuffle 1599 } 1600 1601 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { 1602 ; AVX1-LABEL: shuffle_v8i32_11334567: 1603 ; AVX1: # BB#0: 1604 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 1605 ; AVX1-NEXT: retq 1606 ; 1607 ; AVX2-LABEL: shuffle_v8i32_11334567: 1608 ; AVX2: # BB#0: 1609 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] 1610 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1611 ; AVX2-NEXT: retq 1612 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 1613 ret <8 x i32> %shuffle 1614 } 1615 1616 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { 1617 ; AVX1-LABEL: shuffle_v8i32_01235467: 1618 ; AVX1: # BB#0: 1619 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 1620 ; AVX1-NEXT: retq 1621 ; 1622 ; AVX2-LABEL: shuffle_v8i32_01235467: 1623 ; AVX2: # BB#0: 1624 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] 1625 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1626 ; AVX2-NEXT: retq 1627 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1628 ret <8 x i32> %shuffle 1629 } 1630 1631 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { 1632 ; AVX1-LABEL: shuffle_v8i32_01235466: 1633 ; AVX1: # BB#0: 1634 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 1635 ; AVX1-NEXT: retq 1636 ; 1637 ; AVX2-LABEL: shuffle_v8i32_01235466: 1638 ; AVX2: # BB#0: 1639 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] 1640 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1641 ; AVX2-NEXT: retq 1642 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 1643 ret <8 x i32> %shuffle 1644 } 1645 1646 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { 1647 ; AVX1-LABEL: shuffle_v8i32_002u6u44: 1648 ; AVX1: # BB#0: 1649 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 1650 ; AVX1-NEXT: retq 1651 ; 1652 ; AVX2-LABEL: shuffle_v8i32_002u6u44: 1653 ; AVX2: # BB#0: 1654 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> 1655 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1656 ; AVX2-NEXT: retq 1657 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 1658 ret <8 x i32> %shuffle 1659 } 1660 1661 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { 1662 ; AVX1-LABEL: shuffle_v8i32_00uu66uu: 1663 ; AVX1: # BB#0: 1664 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 1665 ; AVX1-NEXT: retq 1666 ; 1667 ; AVX2-LABEL: shuffle_v8i32_00uu66uu: 1668 ; AVX2: # BB#0: 1669 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> 1670 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1671 ; AVX2-NEXT: retq 1672 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 1673 ret <8 x i32> %shuffle 1674 } 1675 1676 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { 1677 ; AVX1-LABEL: shuffle_v8i32_103245uu: 1678 ; AVX1: # BB#0: 1679 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 1680 ; AVX1-NEXT: retq 1681 ; 1682 ; AVX2-LABEL: shuffle_v8i32_103245uu: 1683 ; AVX2: # BB#0: 1684 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> 1685 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1686 ; AVX2-NEXT: retq 1687 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 1688 ret <8 x i32> %shuffle 1689 } 1690 1691 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { 1692 ; AVX1-LABEL: shuffle_v8i32_1133uu67: 1693 ; AVX1: # BB#0: 1694 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 1695 ; AVX1-NEXT: retq 1696 ; 1697 ; AVX2-LABEL: shuffle_v8i32_1133uu67: 1698 ; AVX2: # BB#0: 1699 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> 1700 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1701 ; AVX2-NEXT: retq 1702 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 1703 ret <8 x i32> %shuffle 1704 } 1705 1706 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { 1707 ; AVX1-LABEL: shuffle_v8i32_0uu354uu: 1708 ; AVX1: # BB#0: 1709 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 1710 ; AVX1-NEXT: retq 1711 ; 1712 ; AVX2-LABEL: shuffle_v8i32_0uu354uu: 1713 ; AVX2: # BB#0: 1714 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> 1715 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1716 ; AVX2-NEXT: retq 1717 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 1718 ret <8 x i32> %shuffle 1719 } 1720 1721 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { 1722 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66: 1723 ; AVX1: # BB#0: 1724 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 1725 ; AVX1-NEXT: retq 1726 ; 1727 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66: 1728 ; AVX2: # BB#0: 1729 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6> 1730 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1731 ; AVX2-NEXT: retq 1732 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 1733 ret <8 x i32> %shuffle 1734 } 1735 1736 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { 1737 ; AVX1-LABEL: shuffle_v8i32_6caa87e5: 1738 ; AVX1: # BB#0: 1739 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 1740 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] 1741 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1742 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1743 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1744 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1745 ; AVX1-NEXT: retq 1746 ; 1747 ; AVX2-LABEL: shuffle_v8i32_6caa87e5: 1748 ; AVX2: # BB#0: 1749 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u> 1750 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1751 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2] 1752 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1753 ; AVX2-NEXT: retq 1754 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5> 1755 ret <8 x i32> %shuffle 1756 } 1757 1758 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { 1759 ; AVX1-LABEL: shuffle_v8i32_32103210: 1760 ; AVX1: # BB#0: 1761 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1762 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1763 ; AVX1-NEXT: retq 1764 ; 1765 ; AVX2-LABEL: shuffle_v8i32_32103210: 1766 ; AVX2: # BB#0: 1767 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 1768 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1769 ; AVX2-NEXT: retq 1770 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 1771 ret <8 x i32> %shuffle 1772 } 1773 1774 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { 1775 ; AVX1-LABEL: shuffle_v8i32_76547654: 1776 ; AVX1: # BB#0: 1777 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1778 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1779 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1780 ; AVX1-NEXT: retq 1781 ; 1782 ; AVX2-LABEL: shuffle_v8i32_76547654: 1783 ; AVX2: # BB#0: 1784 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1785 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1786 ; AVX2-NEXT: retq 1787 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 1788 ret <8 x i32> %shuffle 1789 } 1790 1791 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { 1792 ; AVX1-LABEL: shuffle_v8i32_76543210: 1793 ; AVX1: # BB#0: 1794 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1795 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1796 ; AVX1-NEXT: retq 1797 ; 1798 ; AVX2-LABEL: shuffle_v8i32_76543210: 1799 ; AVX2: # BB#0: 1800 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1801 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1802 ; AVX2-NEXT: retq 1803 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1804 ret <8 x i32> %shuffle 1805 } 1806 1807 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { 1808 ; AVX1-LABEL: shuffle_v8i32_3210ba98: 1809 ; AVX1: # BB#0: 1810 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1811 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1812 ; AVX1-NEXT: retq 1813 ; 1814 ; AVX2-LABEL: shuffle_v8i32_3210ba98: 1815 ; AVX2: # BB#0: 1816 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1817 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1818 ; AVX2-NEXT: retq 1819 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 1820 ret <8 x i32> %shuffle 1821 } 1822 1823 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { 1824 ; AVX1-LABEL: shuffle_v8i32_3210fedc: 1825 ; AVX1: # BB#0: 1826 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1827 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1828 ; AVX1-NEXT: retq 1829 ; 1830 ; AVX2-LABEL: shuffle_v8i32_3210fedc: 1831 ; AVX2: # BB#0: 1832 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1833 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1834 ; AVX2-NEXT: retq 1835 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 1836 ret <8 x i32> %shuffle 1837 } 1838 1839 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { 1840 ; AVX1-LABEL: shuffle_v8i32_7654fedc: 1841 ; AVX1: # BB#0: 1842 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1843 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1844 ; AVX1-NEXT: retq 1845 ; 1846 ; AVX2-LABEL: shuffle_v8i32_7654fedc: 1847 ; AVX2: # BB#0: 1848 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1849 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1850 ; AVX2-NEXT: retq 1851 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 1852 ret <8 x i32> %shuffle 1853 } 1854 1855 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { 1856 ; AVX1-LABEL: shuffle_v8i32_fedc7654: 1857 ; AVX1: # BB#0: 1858 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1859 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1860 ; AVX1-NEXT: retq 1861 ; 1862 ; AVX2-LABEL: shuffle_v8i32_fedc7654: 1863 ; AVX2: # BB#0: 1864 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1865 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1866 ; AVX2-NEXT: retq 1867 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 1868 ret <8 x i32> %shuffle 1869 } 1870 1871 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { 1872 ; AVX1-LABEL: shuffle_v8i32_ba987654: 1873 ; AVX1: # BB#0: 1874 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1875 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1876 ; AVX1-NEXT: retq 1877 ; 1878 ; AVX2-LABEL: shuffle_v8i32_ba987654: 1879 ; AVX2: # BB#0: 1880 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1881 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1882 ; AVX2-NEXT: retq 1883 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1884 ret <8 x i32> %shuffle 1885 } 1886 1887 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { 1888 ; AVX1-LABEL: shuffle_v8i32_ba983210: 1889 ; AVX1: # BB#0: 1890 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1891 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1892 ; AVX1-NEXT: retq 1893 ; 1894 ; AVX2-LABEL: shuffle_v8i32_ba983210: 1895 ; AVX2: # BB#0: 1896 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1897 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1898 ; AVX2-NEXT: retq 1899 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1900 ret <8 x i32> %shuffle 1901 } 1902 1903 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { 1904 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: 1905 ; AVX1: # BB#0: 1906 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1907 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] 1908 ; AVX1-NEXT: retq 1909 ; 1910 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc: 1911 ; AVX2: # BB#0: 1912 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19] 1913 ; AVX2-NEXT: retq 1914 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12> 1915 ret <8 x i32> %shuffle 1916 } 1917 1918 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { 1919 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz: 1920 ; AVX1: # BB#0: 1921 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1922 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] 1923 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 1924 ; AVX1-NEXT: retq 1925 ; 1926 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz: 1927 ; AVX2: # BB#0: 1928 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero 1929 ; AVX2-NEXT: retq 1930 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0> 1931 ret <8 x i32> %shuffle 1932 } 1933 1934 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { 1935 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu: 1936 ; AVX1: # BB#0: 1937 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1938 ; AVX1-NEXT: retq 1939 ; 1940 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu: 1941 ; AVX2: # BB#0: 1942 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1943 ; AVX2-NEXT: retq 1944 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef> 1945 ret <8 x i32> %shuffle 1946 } 1947 1948 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) { 1949 ; AVX1-LABEL: shuffle_v8i32_uuuu1111: 1950 ; AVX1: # BB#0: 1951 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1952 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1953 ; AVX1-NEXT: retq 1954 ; 1955 ; AVX2-LABEL: shuffle_v8i32_uuuu1111: 1956 ; AVX2: # BB#0: 1957 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1958 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1959 ; AVX2-NEXT: retq 1960 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 1961 ret <8 x i32> %shuffle 1962 } 1963 1964 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { 1965 ; AVX1-LABEL: shuffle_v8i32_44444444: 1966 ; AVX1: # BB#0: 1967 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1968 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1969 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1970 ; AVX1-NEXT: retq 1971 ; 1972 ; AVX2-LABEL: shuffle_v8i32_44444444: 1973 ; AVX2: # BB#0: 1974 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1975 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1976 ; AVX2-NEXT: retq 1977 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 1978 ret <8 x i32> %shuffle 1979 } 1980 1981 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { 1982 ; AVX1-LABEL: shuffle_v8i32_5555uuuu: 1983 ; AVX1: # BB#0: 1984 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1985 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1986 ; AVX1-NEXT: retq 1987 ; 1988 ; AVX2-LABEL: shuffle_v8i32_5555uuuu: 1989 ; AVX2: # BB#0: 1990 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1991 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1992 ; AVX2-NEXT: retq 1993 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 1994 ret <8 x i32> %shuffle 1995 } 1996 1997 define <8 x float> @splat_mem_v8f32_2(float* %p) { 1998 ; ALL-LABEL: splat_mem_v8f32_2: 1999 ; ALL: # BB#0: 2000 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2001 ; ALL-NEXT: retq 2002 %1 = load float, float* %p 2003 %2 = insertelement <4 x float> undef, float %1, i32 0 2004 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer 2005 ret <8 x float> %3 2006 } 2007 2008 define <8 x float> @splat_v8f32(<4 x float> %r) { 2009 ; AVX1-LABEL: splat_v8f32: 2010 ; AVX1: # BB#0: 2011 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2012 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2013 ; AVX1-NEXT: retq 2014 ; 2015 ; AVX2-LABEL: splat_v8f32: 2016 ; AVX2: # BB#0: 2017 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 2018 ; AVX2-NEXT: retq 2019 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer 2020 ret <8 x float> %1 2021 } 2022 2023 ; 2024 ; Shuffle to logical bit shifts 2025 ; 2026 2027 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { 2028 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: 2029 ; AVX1: # BB#0: 2030 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2031 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2032 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 2033 ; AVX1-NEXT: retq 2034 ; 2035 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6: 2036 ; AVX2: # BB#0: 2037 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 2038 ; AVX2-NEXT: retq 2039 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6> 2040 ret <8 x i32> %shuffle 2041 } 2042 2043 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { 2044 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: 2045 ; AVX1: # BB#0: 2046 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2047 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2048 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 2049 ; AVX1-NEXT: retq 2050 ; 2051 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU: 2052 ; AVX2: # BB#0: 2053 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 2054 ; AVX2-NEXT: retq 2055 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef> 2056 ret <8 x i32> %shuffle 2057 } 2058 2059 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) { 2060 ; AVX1-LABEL: shuffle_v8i32_B012F456: 2061 ; AVX1: # BB#0: 2062 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4] 2063 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6] 2064 ; AVX1-NEXT: retq 2065 ; 2066 ; AVX2-LABEL: shuffle_v8i32_B012F456: 2067 ; AVX2: # BB#0: 2068 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27] 2069 ; AVX2-NEXT: retq 2070 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 2071 ret <8 x i32> %shuffle 2072 } 2073 2074 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) { 2075 ; AVX1-LABEL: shuffle_v8i32_1238567C: 2076 ; AVX1: # BB#0: 2077 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2078 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2079 ; AVX1-NEXT: retq 2080 ; 2081 ; AVX2-LABEL: shuffle_v8i32_1238567C: 2082 ; AVX2: # BB#0: 2083 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19] 2084 ; AVX2-NEXT: retq 2085 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12> 2086 ret <8 x i32> %shuffle 2087 } 2088 2089 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) { 2090 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4: 2091 ; AVX1: # BB#0: 2092 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4] 2093 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4] 2094 ; AVX1-NEXT: retq 2095 ; 2096 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4: 2097 ; AVX2: # BB#0: 2098 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19] 2099 ; AVX2-NEXT: retq 2100 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4> 2101 ret <8 x i32> %shuffle 2102 } 2103 2104 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) { 2105 ; AVX1-LABEL: shuffle_v8i32_389A7CDE: 2106 ; AVX1: # BB#0: 2107 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4] 2108 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6] 2109 ; AVX1-NEXT: retq 2110 ; 2111 ; AVX2-LABEL: shuffle_v8i32_389A7CDE: 2112 ; AVX2: # BB#0: 2113 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27] 2114 ; AVX2-NEXT: retq 2115 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14> 2116 ret <8 x i32> %shuffle 2117 } 2118 2119 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) { 2120 ; AVX1-LABEL: shuffle_v8i32_30127456: 2121 ; AVX1: # BB#0: 2122 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2123 ; AVX1-NEXT: retq 2124 ; 2125 ; AVX2-LABEL: shuffle_v8i32_30127456: 2126 ; AVX2: # BB#0: 2127 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2128 ; AVX2-NEXT: retq 2129 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6> 2130 ret <8 x i32> %shuffle 2131 } 2132 2133 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) { 2134 ; AVX1-LABEL: shuffle_v8i32_12305674: 2135 ; AVX1: # BB#0: 2136 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2137 ; AVX1-NEXT: retq 2138 ; 2139 ; AVX2-LABEL: shuffle_v8i32_12305674: 2140 ; AVX2: # BB#0: 2141 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2142 ; AVX2-NEXT: retq 2143 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 2144 ret <8 x i32> %shuffle 2145 } 2146 2147 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2148 ; ALL-LABEL: concat_v2f32_1: 2149 ; ALL: # BB#0: # %entry 2150 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2151 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2152 ; ALL-NEXT: retq 2153 entry: 2154 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2155 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2156 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2157 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2158 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> 2159 ret <8 x float> %tmp76 2160 } 2161 2162 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2163 ; ALL-LABEL: concat_v2f32_2: 2164 ; ALL: # BB#0: # %entry 2165 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2166 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2167 ; ALL-NEXT: retq 2168 entry: 2169 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2170 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2171 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2172 ret <8 x float> %tmp76 2173 } 2174 2175 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2176 ; ALL-LABEL: concat_v2f32_3: 2177 ; ALL: # BB#0: # %entry 2178 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2179 ; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2180 ; ALL-NEXT: retq 2181 entry: 2182 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2183 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2184 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2185 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2186 ret <8 x float> %res 2187 } 2188 2189 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) { 2190 ; ALL-LABEL: insert_mem_and_zero_v8i32: 2191 ; ALL: # BB#0: 2192 ; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2193 ; ALL-NEXT: retq 2194 %a = load i32, i32* %ptr 2195 %v = insertelement <8 x i32> undef, i32 %a, i32 0 2196 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2197 ret <8 x i32> %shuffle 2198 } 2199 2200 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) { 2201 ; AVX1-LABEL: concat_v8i32_0123CDEF: 2202 ; AVX1: # BB#0: 2203 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 2204 ; AVX1-NEXT: retq 2205 ; 2206 ; AVX2-LABEL: concat_v8i32_0123CDEF: 2207 ; AVX2: # BB#0: 2208 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2209 ; AVX2-NEXT: retq 2210 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2211 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2212 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2213 ret <8 x i32> %shuf 2214 } 2215 2216 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { 2217 ; ALL-LABEL: concat_v8i32_4567CDEF_bc: 2218 ; ALL: # BB#0: 2219 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2220 ; ALL-NEXT: retq 2221 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2222 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 2223 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64> 2224 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2225 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2226 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32> 2227 ret <8 x i32> %shuffle32 2228 } 2229 2230 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) { 2231 ; ALL-LABEL: concat_v8f32_4567CDEF_bc: 2232 ; ALL: # BB#0: 2233 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2234 ; ALL-NEXT: retq 2235 %a0 = bitcast <8 x float> %f0 to <4 x i64> 2236 %a1 = bitcast <8 x float> %f1 to <8 x i32> 2237 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2238 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2239 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64> 2240 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2241 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2242 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float> 2243 ret <8 x float> %shuffle32 2244 } 2245 2246 define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) { 2247 ; ALL-LABEL: insert_dup_mem_v8i32: 2248 ; ALL: # BB#0: 2249 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2250 ; ALL-NEXT: retq 2251 %tmp = load i32, i32* %ptr, align 4 2252 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2253 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer 2254 ret <8 x i32> %tmp2 2255 } 2256