1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST 7 8 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) { 9 ; AVX1-LABEL: shuffle_v8f32_00000000: 10 ; AVX1: # %bb.0: 11 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 12 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 13 ; AVX1-NEXT: retq 14 ; 15 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000000: 16 ; AVX2OR512VL: # %bb.0: 17 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 18 ; AVX2OR512VL-NEXT: retq 19 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20 ret <8 x float> %shuffle 21 } 22 23 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) { 24 ; AVX1-LABEL: shuffle_v8f32_00000010: 25 ; AVX1: # %bb.0: 26 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 27 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 28 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 29 ; AVX1-NEXT: retq 30 ; 31 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000010: 32 ; AVX2OR512VL: # %bb.0: 33 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 34 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 35 ; AVX2OR512VL-NEXT: retq 36 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 37 ret <8 x float> %shuffle 38 } 39 40 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) { 41 ; AVX1-LABEL: shuffle_v8f32_00000200: 42 ; AVX1: # %bb.0: 43 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 44 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 45 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 46 ; AVX1-NEXT: retq 47 ; 48 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000200: 49 ; AVX2OR512VL: # %bb.0: 50 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2] 51 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 52 ; AVX2OR512VL-NEXT: retq 53 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 54 ret <8 x float> %shuffle 55 } 56 57 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) { 58 ; AVX1-LABEL: shuffle_v8f32_00003000: 59 ; AVX1: # %bb.0: 60 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 61 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 62 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 63 ; AVX1-NEXT: retq 64 ; 65 ; AVX2OR512VL-LABEL: shuffle_v8f32_00003000: 66 ; AVX2OR512VL: # %bb.0: 67 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0] 68 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 69 ; AVX2OR512VL-NEXT: retq 70 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 71 ret <8 x float> %shuffle 72 } 73 74 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) { 75 ; AVX1-LABEL: shuffle_v8f32_00040000: 76 ; AVX1: # %bb.0: 77 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3] 78 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 79 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 80 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 81 ; AVX1-NEXT: retq 82 ; 83 ; AVX2OR512VL-LABEL: shuffle_v8f32_00040000: 84 ; AVX2OR512VL: # %bb.0: 85 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 86 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 87 ; AVX2OR512VL-NEXT: retq 88 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 89 ret <8 x float> %shuffle 90 } 91 92 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) { 93 ; AVX1-LABEL: shuffle_v8f32_00500000: 94 ; AVX1: # %bb.0: 95 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 96 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 97 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 98 ; AVX1-NEXT: retq 99 ; 100 ; AVX2OR512VL-LABEL: shuffle_v8f32_00500000: 101 ; AVX2OR512VL: # %bb.0: 102 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 103 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 104 ; AVX2OR512VL-NEXT: retq 105 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 106 ret <8 x float> %shuffle 107 } 108 109 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) { 110 ; AVX1-LABEL: shuffle_v8f32_06000000: 111 ; AVX1: # %bb.0: 112 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 113 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 114 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 115 ; AVX1-NEXT: retq 116 ; 117 ; AVX2OR512VL-LABEL: shuffle_v8f32_06000000: 118 ; AVX2OR512VL: # %bb.0: 119 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 120 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 121 ; AVX2OR512VL-NEXT: retq 122 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 123 ret <8 x float> %shuffle 124 } 125 126 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { 127 ; AVX1-LABEL: shuffle_v8f32_70000000: 128 ; AVX1: # %bb.0: 129 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 130 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 131 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 132 ; AVX1-NEXT: retq 133 ; 134 ; AVX2OR512VL-LABEL: shuffle_v8f32_70000000: 135 ; AVX2OR512VL: # %bb.0: 136 ; AVX2OR512VL-NEXT: movl $7, %eax 137 ; AVX2OR512VL-NEXT: vmovd %eax, %xmm1 138 ; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 139 ; AVX2OR512VL-NEXT: retq 140 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 141 ret <8 x float> %shuffle 142 } 143 144 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { 145 ; ALL-LABEL: shuffle_v8f32_01014545: 146 ; ALL: # %bb.0: 147 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 148 ; ALL-NEXT: retq 149 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 150 ret <8 x float> %shuffle 151 } 152 153 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { 154 ; AVX1-LABEL: shuffle_v8f32_00112233: 155 ; AVX1: # %bb.0: 156 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 157 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 158 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 159 ; AVX1-NEXT: retq 160 ; 161 ; AVX2OR512VL-LABEL: shuffle_v8f32_00112233: 162 ; AVX2OR512VL: # %bb.0: 163 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 164 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 165 ; AVX2OR512VL-NEXT: retq 166 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 167 ret <8 x float> %shuffle 168 } 169 170 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) { 171 ; AVX1-LABEL: shuffle_v8f32_00001111: 172 ; AVX1: # %bb.0: 173 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 174 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 175 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 176 ; AVX1-NEXT: retq 177 ; 178 ; AVX2OR512VL-LABEL: shuffle_v8f32_00001111: 179 ; AVX2OR512VL: # %bb.0: 180 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 181 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 182 ; AVX2OR512VL-NEXT: retq 183 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 184 ret <8 x float> %shuffle 185 } 186 187 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { 188 ; ALL-LABEL: shuffle_v8f32_81a3c5e7: 189 ; ALL: # %bb.0: 190 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 191 ; ALL-NEXT: retq 192 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 193 ret <8 x float> %shuffle 194 } 195 196 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { 197 ; AVX1-LABEL: shuffle_v8f32_08080808: 198 ; AVX1: # %bb.0: 199 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 200 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 201 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 202 ; AVX1-NEXT: retq 203 ; 204 ; AVX2OR512VL-LABEL: shuffle_v8f32_08080808: 205 ; AVX2OR512VL: # %bb.0: 206 ; AVX2OR512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 207 ; AVX2OR512VL-NEXT: vbroadcastsd %xmm0, %ymm0 208 ; AVX2OR512VL-NEXT: retq 209 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 210 ret <8 x float> %shuffle 211 } 212 213 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) { 214 ; ALL-LABEL: shuffle_v8f32_08084c4c: 215 ; ALL: # %bb.0: 216 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 217 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 218 ; ALL-NEXT: retq 219 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 220 ret <8 x float> %shuffle 221 } 222 223 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) { 224 ; ALL-LABEL: shuffle_v8f32_8823cc67: 225 ; ALL: # %bb.0: 226 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 227 ; ALL-NEXT: retq 228 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 229 ret <8 x float> %shuffle 230 } 231 232 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) { 233 ; ALL-LABEL: shuffle_v8f32_9832dc76: 234 ; ALL: # %bb.0: 235 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 236 ; ALL-NEXT: retq 237 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 238 ret <8 x float> %shuffle 239 } 240 241 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) { 242 ; ALL-LABEL: shuffle_v8f32_9810dc54: 243 ; ALL: # %bb.0: 244 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 245 ; ALL-NEXT: retq 246 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 247 ret <8 x float> %shuffle 248 } 249 250 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) { 251 ; ALL-LABEL: shuffle_v8f32_08194c5d: 252 ; ALL: # %bb.0: 253 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 254 ; ALL-NEXT: retq 255 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 256 ret <8 x float> %shuffle 257 } 258 259 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) { 260 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f: 261 ; ALL: # %bb.0: 262 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 263 ; ALL-NEXT: retq 264 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 265 ret <8 x float> %shuffle 266 } 267 268 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) { 269 ; AVX1OR2-LABEL: shuffle_v8f32_08192a3b: 270 ; AVX1OR2: # %bb.0: 271 ; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 272 ; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 273 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 274 ; AVX1OR2-NEXT: retq 275 ; 276 ; AVX512VL-LABEL: shuffle_v8f32_08192a3b: 277 ; AVX512VL: # %bb.0: 278 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11] 279 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 280 ; AVX512VL-NEXT: retq 281 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 282 ret <8 x float> %shuffle 283 } 284 285 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) { 286 ; AVX1-LABEL: shuffle_v8f32_08991abb: 287 ; AVX1: # %bb.0: 288 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 289 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 290 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 291 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 292 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 293 ; AVX1-NEXT: retq 294 ; 295 ; AVX2-LABEL: shuffle_v8f32_08991abb: 296 ; AVX2: # %bb.0: 297 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 298 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 299 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 300 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 301 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 302 ; AVX2-NEXT: retq 303 ; 304 ; AVX512VL-LABEL: shuffle_v8f32_08991abb: 305 ; AVX512VL: # %bb.0: 306 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 307 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] 308 ; AVX512VL-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 309 ; AVX512VL-NEXT: retq 310 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 311 ret <8 x float> %shuffle 312 } 313 314 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { 315 ; AVX1-LABEL: shuffle_v8f32_091b2d3f: 316 ; AVX1: # %bb.0: 317 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 318 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 319 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 320 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 321 ; AVX1-NEXT: retq 322 ; 323 ; AVX2-LABEL: shuffle_v8f32_091b2d3f: 324 ; AVX2: # %bb.0: 325 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 326 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 327 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 328 ; AVX2-NEXT: retq 329 ; 330 ; AVX512VL-LABEL: shuffle_v8f32_091b2d3f: 331 ; AVX512VL: # %bb.0: 332 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15] 333 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 334 ; AVX512VL-NEXT: retq 335 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 336 ret <8 x float> %shuffle 337 } 338 339 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { 340 ; AVX1-LABEL: shuffle_v8f32_09ab1def: 341 ; AVX1: # %bb.0: 342 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 343 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 344 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 345 ; AVX1-NEXT: retq 346 ; 347 ; AVX2-LABEL: shuffle_v8f32_09ab1def: 348 ; AVX2: # %bb.0: 349 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 350 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 351 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 352 ; AVX2-NEXT: retq 353 ; 354 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_09ab1def: 355 ; AVX512VL-SLOW: # %bb.0: 356 ; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 357 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 358 ; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 359 ; AVX512VL-SLOW-NEXT: retq 360 ; 361 ; AVX512VL-FAST-LABEL: shuffle_v8f32_09ab1def: 362 ; AVX512VL-FAST: # %bb.0: 363 ; AVX512VL-FAST-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 364 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7] 365 ; AVX512VL-FAST-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0 366 ; AVX512VL-FAST-NEXT: retq 367 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 368 ret <8 x float> %shuffle 369 } 370 371 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) { 372 ; ALL-LABEL: shuffle_v8f32_00014445: 373 ; ALL: # %bb.0: 374 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 375 ; ALL-NEXT: retq 376 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 377 ret <8 x float> %shuffle 378 } 379 380 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) { 381 ; ALL-LABEL: shuffle_v8f32_00204464: 382 ; ALL: # %bb.0: 383 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 384 ; ALL-NEXT: retq 385 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 386 ret <8 x float> %shuffle 387 } 388 389 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) { 390 ; ALL-LABEL: shuffle_v8f32_03004744: 391 ; ALL: # %bb.0: 392 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 393 ; ALL-NEXT: retq 394 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 395 ret <8 x float> %shuffle 396 } 397 398 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) { 399 ; ALL-LABEL: shuffle_v8f32_10005444: 400 ; ALL: # %bb.0: 401 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 402 ; ALL-NEXT: retq 403 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 404 ret <8 x float> %shuffle 405 } 406 407 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) { 408 ; ALL-LABEL: shuffle_v8f32_22006644: 409 ; ALL: # %bb.0: 410 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 411 ; ALL-NEXT: retq 412 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 413 ret <8 x float> %shuffle 414 } 415 416 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) { 417 ; ALL-LABEL: shuffle_v8f32_33307774: 418 ; ALL: # %bb.0: 419 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 420 ; ALL-NEXT: retq 421 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 422 ret <8 x float> %shuffle 423 } 424 425 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) { 426 ; ALL-LABEL: shuffle_v8f32_32107654: 427 ; ALL: # %bb.0: 428 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 429 ; ALL-NEXT: retq 430 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 431 ret <8 x float> %shuffle 432 } 433 434 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { 435 ; ALL-LABEL: shuffle_v8f32_00234467: 436 ; ALL: # %bb.0: 437 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 438 ; ALL-NEXT: retq 439 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 440 ret <8 x float> %shuffle 441 } 442 443 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 444 ; ALL-LABEL: shuffle_v8f32_00224466: 445 ; ALL: # %bb.0: 446 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 447 ; ALL-NEXT: retq 448 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 449 ret <8 x float> %shuffle 450 } 451 452 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { 453 ; ALL-LABEL: shuffle_v8f32_10325476: 454 ; ALL: # %bb.0: 455 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 456 ; ALL-NEXT: retq 457 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 458 ret <8 x float> %shuffle 459 } 460 461 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 462 ; ALL-LABEL: shuffle_v8f32_11335577: 463 ; ALL: # %bb.0: 464 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 465 ; ALL-NEXT: retq 466 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 467 ret <8 x float> %shuffle 468 } 469 470 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { 471 ; ALL-LABEL: shuffle_v8f32_10235467: 472 ; ALL: # %bb.0: 473 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 474 ; ALL-NEXT: retq 475 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 476 ret <8 x float> %shuffle 477 } 478 479 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { 480 ; ALL-LABEL: shuffle_v8f32_10225466: 481 ; ALL: # %bb.0: 482 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 483 ; ALL-NEXT: retq 484 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 485 ret <8 x float> %shuffle 486 } 487 488 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { 489 ; ALL-LABEL: shuffle_v8f32_00015444: 490 ; ALL: # %bb.0: 491 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 492 ; ALL-NEXT: retq 493 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 494 ret <8 x float> %shuffle 495 } 496 497 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { 498 ; ALL-LABEL: shuffle_v8f32_00204644: 499 ; ALL: # %bb.0: 500 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 501 ; ALL-NEXT: retq 502 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 503 ret <8 x float> %shuffle 504 } 505 506 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { 507 ; ALL-LABEL: shuffle_v8f32_03004474: 508 ; ALL: # %bb.0: 509 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 510 ; ALL-NEXT: retq 511 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 512 ret <8 x float> %shuffle 513 } 514 515 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { 516 ; ALL-LABEL: shuffle_v8f32_10004444: 517 ; ALL: # %bb.0: 518 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 519 ; ALL-NEXT: retq 520 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 521 ret <8 x float> %shuffle 522 } 523 524 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { 525 ; ALL-LABEL: shuffle_v8f32_22006446: 526 ; ALL: # %bb.0: 527 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 528 ; ALL-NEXT: retq 529 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 530 ret <8 x float> %shuffle 531 } 532 533 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { 534 ; ALL-LABEL: shuffle_v8f32_33307474: 535 ; ALL: # %bb.0: 536 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 537 ; ALL-NEXT: retq 538 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 539 ret <8 x float> %shuffle 540 } 541 542 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { 543 ; ALL-LABEL: shuffle_v8f32_32104567: 544 ; ALL: # %bb.0: 545 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 546 ; ALL-NEXT: retq 547 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 548 ret <8 x float> %shuffle 549 } 550 551 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { 552 ; ALL-LABEL: shuffle_v8f32_00236744: 553 ; ALL: # %bb.0: 554 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 555 ; ALL-NEXT: retq 556 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 557 ret <8 x float> %shuffle 558 } 559 560 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { 561 ; ALL-LABEL: shuffle_v8f32_00226644: 562 ; ALL: # %bb.0: 563 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 564 ; ALL-NEXT: retq 565 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 566 ret <8 x float> %shuffle 567 } 568 569 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { 570 ; ALL-LABEL: shuffle_v8f32_10324567: 571 ; ALL: # %bb.0: 572 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 573 ; ALL-NEXT: retq 574 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 575 ret <8 x float> %shuffle 576 } 577 578 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { 579 ; ALL-LABEL: shuffle_v8f32_11334567: 580 ; ALL: # %bb.0: 581 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 582 ; ALL-NEXT: retq 583 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 584 ret <8 x float> %shuffle 585 } 586 587 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { 588 ; ALL-LABEL: shuffle_v8f32_01235467: 589 ; ALL: # %bb.0: 590 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 591 ; ALL-NEXT: retq 592 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 593 ret <8 x float> %shuffle 594 } 595 596 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { 597 ; ALL-LABEL: shuffle_v8f32_01235466: 598 ; ALL: # %bb.0: 599 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 600 ; ALL-NEXT: retq 601 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 602 ret <8 x float> %shuffle 603 } 604 605 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { 606 ; ALL-LABEL: shuffle_v8f32_002u6u44: 607 ; ALL: # %bb.0: 608 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 609 ; ALL-NEXT: retq 610 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 611 ret <8 x float> %shuffle 612 } 613 614 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { 615 ; ALL-LABEL: shuffle_v8f32_00uu66uu: 616 ; ALL: # %bb.0: 617 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 618 ; ALL-NEXT: retq 619 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 620 ret <8 x float> %shuffle 621 } 622 623 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { 624 ; ALL-LABEL: shuffle_v8f32_103245uu: 625 ; ALL: # %bb.0: 626 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 627 ; ALL-NEXT: retq 628 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 629 ret <8 x float> %shuffle 630 } 631 632 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { 633 ; ALL-LABEL: shuffle_v8f32_1133uu67: 634 ; ALL: # %bb.0: 635 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 636 ; ALL-NEXT: retq 637 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 638 ret <8 x float> %shuffle 639 } 640 641 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { 642 ; ALL-LABEL: shuffle_v8f32_0uu354uu: 643 ; ALL: # %bb.0: 644 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 645 ; ALL-NEXT: retq 646 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 647 ret <8 x float> %shuffle 648 } 649 650 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { 651 ; ALL-LABEL: shuffle_v8f32_uuu3uu66: 652 ; ALL: # %bb.0: 653 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 654 ; ALL-NEXT: retq 655 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 656 ret <8 x float> %shuffle 657 } 658 659 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { 660 ; AVX1-LABEL: shuffle_v8f32_c348cda0: 661 ; AVX1: # %bb.0: 662 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 663 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] 664 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 665 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 666 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7] 667 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 668 ; AVX1-NEXT: retq 669 ; 670 ; AVX2-SLOW-LABEL: shuffle_v8f32_c348cda0: 671 ; AVX2-SLOW: # %bb.0: 672 ; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 673 ; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm2, %ymm1 674 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,2,0,4,7,6,4] 675 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,1] 676 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 677 ; AVX2-SLOW-NEXT: retq 678 ; 679 ; AVX2-FAST-LABEL: shuffle_v8f32_c348cda0: 680 ; AVX2-FAST: # %bb.0: 681 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,3,4,7,4,7,2,0] 682 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0 683 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 684 ; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1 685 ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 686 ; AVX2-FAST-NEXT: retq 687 ; 688 ; AVX512VL-LABEL: shuffle_v8f32_c348cda0: 689 ; AVX512VL: # %bb.0: 690 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [4,11,12,0,4,5,2,8] 691 ; AVX512VL-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2 692 ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0 693 ; AVX512VL-NEXT: retq 694 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0> 695 ret <8 x float> %shuffle 696 } 697 698 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { 699 ; AVX1-LABEL: shuffle_v8f32_f511235a: 700 ; AVX1: # %bb.0: 701 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 702 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 703 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 704 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5] 705 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3] 706 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 707 ; AVX1-NEXT: retq 708 ; 709 ; AVX2-SLOW-LABEL: shuffle_v8f32_f511235a: 710 ; AVX2-SLOW: # %bb.0: 711 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,2,3,7,6,6,7] 712 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,0] 713 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,5,5,6,7] 714 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,1,2] 715 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 716 ; AVX2-SLOW-NEXT: retq 717 ; 718 ; AVX2-FAST-LABEL: shuffle_v8f32_f511235a: 719 ; AVX2-FAST: # %bb.0: 720 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,2,3,7,6,3,2] 721 ; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1 722 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [5,5,1,1,2,3,5,5] 723 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0 724 ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 725 ; AVX2-FAST-NEXT: retq 726 ; 727 ; AVX512VL-LABEL: shuffle_v8f32_f511235a: 728 ; AVX512VL: # %bb.0: 729 ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm2 = [15,5,1,1,2,3,5,10] 730 ; AVX512VL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 731 ; AVX512VL-NEXT: retq 732 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> 733 ret <8 x float> %shuffle 734 } 735 736 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { 737 ; AVX1-LABEL: shuffle_v8f32_32103210: 738 ; AVX1: # %bb.0: 739 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 740 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 741 ; AVX1-NEXT: retq 742 ; 743 ; AVX2OR512VL-LABEL: shuffle_v8f32_32103210: 744 ; AVX2OR512VL: # %bb.0: 745 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 746 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1] 747 ; AVX2OR512VL-NEXT: retq 748 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 749 ret <8 x float> %shuffle 750 } 751 752 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { 753 ; AVX1-LABEL: shuffle_v8f32_76547654: 754 ; AVX1: # %bb.0: 755 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 756 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 757 ; AVX1-NEXT: retq 758 ; 759 ; AVX2-SLOW-LABEL: shuffle_v8f32_76547654: 760 ; AVX2-SLOW: # %bb.0: 761 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 762 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 763 ; AVX2-SLOW-NEXT: retq 764 ; 765 ; AVX2-FAST-LABEL: shuffle_v8f32_76547654: 766 ; AVX2-FAST: # %bb.0: 767 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 768 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 769 ; AVX2-FAST-NEXT: retq 770 ; 771 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_76547654: 772 ; AVX512VL-SLOW: # %bb.0: 773 ; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 774 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 775 ; AVX512VL-SLOW-NEXT: retq 776 ; 777 ; AVX512VL-FAST-LABEL: shuffle_v8f32_76547654: 778 ; AVX512VL-FAST: # %bb.0: 779 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 780 ; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 781 ; AVX512VL-FAST-NEXT: retq 782 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 783 ret <8 x float> %shuffle 784 } 785 786 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { 787 ; AVX1-LABEL: shuffle_v8f32_76543210: 788 ; AVX1: # %bb.0: 789 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 790 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 791 ; AVX1-NEXT: retq 792 ; 793 ; AVX2-SLOW-LABEL: shuffle_v8f32_76543210: 794 ; AVX2-SLOW: # %bb.0: 795 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 796 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 797 ; AVX2-SLOW-NEXT: retq 798 ; 799 ; AVX2-FAST-LABEL: shuffle_v8f32_76543210: 800 ; AVX2-FAST: # %bb.0: 801 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 802 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 803 ; AVX2-FAST-NEXT: retq 804 ; 805 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_76543210: 806 ; AVX512VL-SLOW: # %bb.0: 807 ; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 808 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 809 ; AVX512VL-SLOW-NEXT: retq 810 ; 811 ; AVX512VL-FAST-LABEL: shuffle_v8f32_76543210: 812 ; AVX512VL-FAST: # %bb.0: 813 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 814 ; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 815 ; AVX512VL-FAST-NEXT: retq 816 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 817 ret <8 x float> %shuffle 818 } 819 820 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { 821 ; ALL-LABEL: shuffle_v8f32_3210ba98: 822 ; ALL: # %bb.0: 823 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 824 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 825 ; ALL-NEXT: retq 826 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 827 ret <8 x float> %shuffle 828 } 829 830 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { 831 ; ALL-LABEL: shuffle_v8f32_3210fedc: 832 ; ALL: # %bb.0: 833 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 834 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 835 ; ALL-NEXT: retq 836 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 837 ret <8 x float> %shuffle 838 } 839 840 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { 841 ; AVX1OR2-LABEL: shuffle_v8f32_7654fedc: 842 ; AVX1OR2: # %bb.0: 843 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 844 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 845 ; AVX1OR2-NEXT: retq 846 ; 847 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_7654fedc: 848 ; AVX512VL-SLOW: # %bb.0: 849 ; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 850 ; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 851 ; AVX512VL-SLOW-NEXT: retq 852 ; 853 ; AVX512VL-FAST-LABEL: shuffle_v8f32_7654fedc: 854 ; AVX512VL-FAST: # %bb.0: 855 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 856 ; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 857 ; AVX512VL-FAST-NEXT: retq 858 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 859 ret <8 x float> %shuffle 860 } 861 862 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { 863 ; AVX1OR2-LABEL: shuffle_v8f32_fedc7654: 864 ; AVX1OR2: # %bb.0: 865 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 866 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 867 ; AVX1OR2-NEXT: retq 868 ; 869 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_fedc7654: 870 ; AVX512VL-SLOW: # %bb.0: 871 ; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 872 ; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 873 ; AVX512VL-SLOW-NEXT: retq 874 ; 875 ; AVX512VL-FAST-LABEL: shuffle_v8f32_fedc7654: 876 ; AVX512VL-FAST: # %bb.0: 877 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 878 ; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2 879 ; AVX512VL-FAST-NEXT: vmovaps %ymm2, %ymm0 880 ; AVX512VL-FAST-NEXT: retq 881 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 882 ret <8 x float> %shuffle 883 } 884 885 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) { 886 ; AVX1-LABEL: PR21138: 887 ; AVX1: # %bb.0: 888 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 889 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 890 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 891 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 892 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 893 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 894 ; AVX1-NEXT: retq 895 ; 896 ; AVX2-LABEL: PR21138: 897 ; AVX2: # %bb.0: 898 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 899 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 900 ; AVX2-NEXT: retq 901 ; 902 ; AVX512VL-SLOW-LABEL: PR21138: 903 ; AVX512VL-SLOW: # %bb.0: 904 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 905 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 906 ; AVX512VL-SLOW-NEXT: retq 907 ; 908 ; AVX512VL-FAST-LABEL: PR21138: 909 ; AVX512VL-FAST: # %bb.0: 910 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15] 911 ; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 912 ; AVX512VL-FAST-NEXT: retq 913 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 914 ret <8 x float> %shuffle 915 } 916 917 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { 918 ; ALL-LABEL: shuffle_v8f32_ba987654: 919 ; ALL: # %bb.0: 920 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 921 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 922 ; ALL-NEXT: retq 923 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 924 ret <8 x float> %shuffle 925 } 926 927 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { 928 ; ALL-LABEL: shuffle_v8f32_ba983210: 929 ; ALL: # %bb.0: 930 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 931 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 932 ; ALL-NEXT: retq 933 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0> 934 ret <8 x float> %shuffle 935 } 936 937 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) { 938 ; ALL-LABEL: shuffle_v8f32_80u1c4u5: 939 ; ALL: # %bb.0: 940 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 941 ; ALL-NEXT: retq 942 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5> 943 ret <8 x float> %shuffle 944 } 945 946 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { 947 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7: 948 ; ALL: # %bb.0: 949 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 950 ; ALL-NEXT: retq 951 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7> 952 ret <8 x float> %shuffle 953 } 954 955 define <8 x float> @shuffle_v8f32_084c195d(<8 x float> %a, <8 x float> %b) { 956 ; AVX1-LABEL: shuffle_v8f32_084c195d: 957 ; AVX1: # %bb.0: 958 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 959 ; AVX1-NEXT: vpermilps {{.*#+}} ymm2 = ymm2[0,1,2,0,4,5,6,4] 960 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,1,4,4,6,5] 961 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3,4,5],ymm1[6,7] 962 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 963 ; AVX1-NEXT: vpermilps {{.*#+}} ymm2 = ymm2[1,1,0,3,5,5,4,7] 964 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7] 965 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5],ymm0[6,7] 966 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 967 ; AVX1-NEXT: retq 968 ; 969 ; AVX2-LABEL: shuffle_v8f32_084c195d: 970 ; AVX2: # %bb.0: 971 ; AVX2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 972 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 973 ; AVX2-NEXT: retq 974 ; 975 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_084c195d: 976 ; AVX512VL-SLOW: # %bb.0: 977 ; AVX512VL-SLOW-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 978 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 979 ; AVX512VL-SLOW-NEXT: retq 980 ; 981 ; AVX512VL-FAST-LABEL: shuffle_v8f32_084c195d: 982 ; AVX512VL-FAST: # %bb.0: 983 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,8,4,12,1,9,5,13] 984 ; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 985 ; AVX512VL-FAST-NEXT: retq 986 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 4, i32 12, i32 1, i32 9, i32 5, i32 13> 987 ret <8 x float> %shuffle 988 } 989 990 define <8 x float> @shuffle_v8f32_01452367d(<8 x float> %a) { 991 ; AVX1-LABEL: shuffle_v8f32_01452367d: 992 ; AVX1: # %bb.0: 993 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 994 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2] 995 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 996 ; AVX1-NEXT: retq 997 ; 998 ; AVX2OR512VL-LABEL: shuffle_v8f32_01452367d: 999 ; AVX2OR512VL: # %bb.0: 1000 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 1001 ; AVX2OR512VL-NEXT: retq 1002 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 2, i32 3, i32 6, i32 7> 1003 ret <8 x float> %shuffle 1004 } 1005 1006 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { 1007 ; ALL-LABEL: shuffle_v8f32_uuuu1111: 1008 ; ALL: # %bb.0: 1009 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1010 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1011 ; ALL-NEXT: retq 1012 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 1013 ret <8 x float> %shuffle 1014 } 1015 1016 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { 1017 ; AVX1-LABEL: shuffle_v8f32_44444444: 1018 ; AVX1: # %bb.0: 1019 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 1020 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1021 ; AVX1-NEXT: retq 1022 ; 1023 ; AVX2OR512VL-LABEL: shuffle_v8f32_44444444: 1024 ; AVX2OR512VL: # %bb.0: 1025 ; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 1026 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 1027 ; AVX2OR512VL-NEXT: retq 1028 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 1029 ret <8 x float> %shuffle 1030 } 1031 1032 define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) { 1033 ; ALL-LABEL: shuffle_v8f32_1188uuuu: 1034 ; ALL: # %bb.0: 1035 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] 1036 ; ALL-NEXT: retq 1037 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef> 1038 ret <8 x float> %shuffle 1039 } 1040 1041 define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) { 1042 ; ALL-LABEL: shuffle_v8f32_uuuu3210: 1043 ; ALL: # %bb.0: 1044 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1045 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1046 ; ALL-NEXT: retq 1047 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0> 1048 ret <8 x float> %shuffle 1049 } 1050 1051 define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) { 1052 ; ALL-LABEL: shuffle_v8f32_uuuu1188: 1053 ; ALL: # %bb.0: 1054 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] 1055 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1056 ; ALL-NEXT: retq 1057 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8> 1058 ret <8 x float> %shuffle 1059 } 1060 1061 define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) { 1062 ; ALL-LABEL: shuffle_v8f32_1111uuuu: 1063 ; ALL: # %bb.0: 1064 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1065 ; ALL-NEXT: retq 1066 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef> 1067 ret <8 x float> %shuffle 1068 } 1069 1070 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) { 1071 ; ALL-LABEL: shuffle_v8f32_5555uuuu: 1072 ; ALL: # %bb.0: 1073 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 1074 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1075 ; ALL-NEXT: retq 1076 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 1077 ret <8 x float> %shuffle 1078 } 1079 1080 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { 1081 ; AVX1-LABEL: shuffle_v8i32_00000000: 1082 ; AVX1: # %bb.0: 1083 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1084 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1085 ; AVX1-NEXT: retq 1086 ; 1087 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000000: 1088 ; AVX2OR512VL: # %bb.0: 1089 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 1090 ; AVX2OR512VL-NEXT: retq 1091 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1092 ret <8 x i32> %shuffle 1093 } 1094 1095 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) { 1096 ; AVX1-LABEL: shuffle_v8i32_00000010: 1097 ; AVX1: # %bb.0: 1098 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1099 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 1100 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1101 ; AVX1-NEXT: retq 1102 ; 1103 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000010: 1104 ; AVX2OR512VL: # %bb.0: 1105 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 1106 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 1107 ; AVX2OR512VL-NEXT: retq 1108 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 1109 ret <8 x i32> %shuffle 1110 } 1111 1112 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) { 1113 ; AVX1-LABEL: shuffle_v8i32_00000200: 1114 ; AVX1: # %bb.0: 1115 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1116 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 1117 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1118 ; AVX1-NEXT: retq 1119 ; 1120 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000200: 1121 ; AVX2OR512VL: # %bb.0: 1122 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2] 1123 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 1124 ; AVX2OR512VL-NEXT: retq 1125 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 1126 ret <8 x i32> %shuffle 1127 } 1128 1129 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) { 1130 ; AVX1-LABEL: shuffle_v8i32_00003000: 1131 ; AVX1: # %bb.0: 1132 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1133 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 1134 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1135 ; AVX1-NEXT: retq 1136 ; 1137 ; AVX2OR512VL-LABEL: shuffle_v8i32_00003000: 1138 ; AVX2OR512VL: # %bb.0: 1139 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0] 1140 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0] 1141 ; AVX2OR512VL-NEXT: retq 1142 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 1143 ret <8 x i32> %shuffle 1144 } 1145 1146 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) { 1147 ; AVX1-LABEL: shuffle_v8i32_00040000: 1148 ; AVX1: # %bb.0: 1149 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3] 1150 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1151 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 1152 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 1153 ; AVX1-NEXT: retq 1154 ; 1155 ; AVX2OR512VL-LABEL: shuffle_v8i32_00040000: 1156 ; AVX2OR512VL: # %bb.0: 1157 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 1158 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1159 ; AVX2OR512VL-NEXT: retq 1160 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 1161 ret <8 x i32> %shuffle 1162 } 1163 1164 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) { 1165 ; AVX1-LABEL: shuffle_v8i32_00500000: 1166 ; AVX1: # %bb.0: 1167 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1168 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 1169 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 1170 ; AVX1-NEXT: retq 1171 ; 1172 ; AVX2OR512VL-LABEL: shuffle_v8i32_00500000: 1173 ; AVX2OR512VL: # %bb.0: 1174 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 1175 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1176 ; AVX2OR512VL-NEXT: retq 1177 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 1178 ret <8 x i32> %shuffle 1179 } 1180 1181 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) { 1182 ; AVX1-LABEL: shuffle_v8i32_06000000: 1183 ; AVX1: # %bb.0: 1184 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1185 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 1186 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 1187 ; AVX1-NEXT: retq 1188 ; 1189 ; AVX2OR512VL-LABEL: shuffle_v8i32_06000000: 1190 ; AVX2OR512VL: # %bb.0: 1191 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 1192 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1193 ; AVX2OR512VL-NEXT: retq 1194 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1195 ret <8 x i32> %shuffle 1196 } 1197 1198 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { 1199 ; AVX1-LABEL: shuffle_v8i32_70000000: 1200 ; AVX1: # %bb.0: 1201 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1202 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 1203 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 1204 ; AVX1-NEXT: retq 1205 ; 1206 ; AVX2OR512VL-LABEL: shuffle_v8i32_70000000: 1207 ; AVX2OR512VL: # %bb.0: 1208 ; AVX2OR512VL-NEXT: movl $7, %eax 1209 ; AVX2OR512VL-NEXT: vmovd %eax, %xmm1 1210 ; AVX2OR512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 1211 ; AVX2OR512VL-NEXT: retq 1212 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1213 ret <8 x i32> %shuffle 1214 } 1215 1216 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { 1217 ; AVX1-LABEL: shuffle_v8i32_01014545: 1218 ; AVX1: # %bb.0: 1219 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1220 ; AVX1-NEXT: retq 1221 ; 1222 ; AVX2OR512VL-LABEL: shuffle_v8i32_01014545: 1223 ; AVX2OR512VL: # %bb.0: 1224 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1225 ; AVX2OR512VL-NEXT: retq 1226 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 1227 ret <8 x i32> %shuffle 1228 } 1229 1230 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) { 1231 ; AVX1-LABEL: shuffle_v8i32_00112233: 1232 ; AVX1: # %bb.0: 1233 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 1234 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 1235 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1236 ; AVX1-NEXT: retq 1237 ; 1238 ; AVX2OR512VL-LABEL: shuffle_v8i32_00112233: 1239 ; AVX2OR512VL: # %bb.0: 1240 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 1241 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1242 ; AVX2OR512VL-NEXT: retq 1243 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 1244 ret <8 x i32> %shuffle 1245 } 1246 1247 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) { 1248 ; AVX1-LABEL: shuffle_v8i32_00001111: 1249 ; AVX1: # %bb.0: 1250 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1251 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1252 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1253 ; AVX1-NEXT: retq 1254 ; 1255 ; AVX2OR512VL-LABEL: shuffle_v8i32_00001111: 1256 ; AVX2OR512VL: # %bb.0: 1257 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 1258 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 1259 ; AVX2OR512VL-NEXT: retq 1260 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1261 ret <8 x i32> %shuffle 1262 } 1263 1264 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { 1265 ; ALL-LABEL: shuffle_v8i32_81a3c5e7: 1266 ; ALL: # %bb.0: 1267 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1268 ; ALL-NEXT: retq 1269 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 1270 ret <8 x i32> %shuffle 1271 } 1272 1273 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { 1274 ; AVX1-LABEL: shuffle_v8i32_08080808: 1275 ; AVX1: # %bb.0: 1276 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 1277 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] 1278 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1279 ; AVX1-NEXT: retq 1280 ; 1281 ; AVX2OR512VL-LABEL: shuffle_v8i32_08080808: 1282 ; AVX2OR512VL: # %bb.0: 1283 ; AVX2OR512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1284 ; AVX2OR512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1285 ; AVX2OR512VL-NEXT: retq 1286 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 1287 ret <8 x i32> %shuffle 1288 } 1289 1290 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) { 1291 ; AVX1-LABEL: shuffle_v8i32_08084c4c: 1292 ; AVX1: # %bb.0: 1293 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 1294 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1295 ; AVX1-NEXT: retq 1296 ; 1297 ; AVX2OR512VL-LABEL: shuffle_v8i32_08084c4c: 1298 ; AVX2OR512VL: # %bb.0: 1299 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4] 1300 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1301 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1302 ; AVX2OR512VL-NEXT: retq 1303 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 1304 ret <8 x i32> %shuffle 1305 } 1306 1307 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) { 1308 ; ALL-LABEL: shuffle_v8i32_8823cc67: 1309 ; ALL: # %bb.0: 1310 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 1311 ; ALL-NEXT: retq 1312 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 1313 ret <8 x i32> %shuffle 1314 } 1315 1316 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) { 1317 ; ALL-LABEL: shuffle_v8i32_9832dc76: 1318 ; ALL: # %bb.0: 1319 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 1320 ; ALL-NEXT: retq 1321 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 1322 ret <8 x i32> %shuffle 1323 } 1324 1325 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { 1326 ; ALL-LABEL: shuffle_v8i32_9810dc54: 1327 ; ALL: # %bb.0: 1328 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 1329 ; ALL-NEXT: retq 1330 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 1331 ret <8 x i32> %shuffle 1332 } 1333 1334 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { 1335 ; ALL-LABEL: shuffle_v8i32_08194c5d: 1336 ; ALL: # %bb.0: 1337 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1338 ; ALL-NEXT: retq 1339 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1340 ret <8 x i32> %shuffle 1341 } 1342 1343 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { 1344 ; ALL-LABEL: shuffle_v8i32_2a3b6e7f: 1345 ; ALL: # %bb.0: 1346 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1347 ; ALL-NEXT: retq 1348 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1349 ret <8 x i32> %shuffle 1350 } 1351 1352 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { 1353 ; AVX1OR2-LABEL: shuffle_v8i32_08192a3b: 1354 ; AVX1OR2: # %bb.0: 1355 ; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1356 ; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1357 ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1358 ; AVX1OR2-NEXT: retq 1359 ; 1360 ; AVX512VL-LABEL: shuffle_v8i32_08192a3b: 1361 ; AVX512VL: # %bb.0: 1362 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1363 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11] 1364 ; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0 1365 ; AVX512VL-NEXT: retq 1366 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1367 ret <8 x i32> %shuffle 1368 } 1369 1370 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { 1371 ; AVX1-LABEL: shuffle_v8i32_08991abb: 1372 ; AVX1: # %bb.0: 1373 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 1374 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 1375 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1376 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 1377 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1378 ; AVX1-NEXT: retq 1379 ; 1380 ; AVX2-LABEL: shuffle_v8i32_08991abb: 1381 ; AVX2: # %bb.0: 1382 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 1383 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1384 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1385 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1386 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1387 ; AVX2-NEXT: retq 1388 ; 1389 ; AVX512VL-LABEL: shuffle_v8i32_08991abb: 1390 ; AVX512VL: # %bb.0: 1391 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero 1392 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] 1393 ; AVX512VL-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 1394 ; AVX512VL-NEXT: retq 1395 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 1396 ret <8 x i32> %shuffle 1397 } 1398 1399 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { 1400 ; AVX1-LABEL: shuffle_v8i32_091b2d3f: 1401 ; AVX1: # %bb.0: 1402 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 1403 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 1404 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1405 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1406 ; AVX1-NEXT: retq 1407 ; 1408 ; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f: 1409 ; AVX2OR512VL: # %bb.0: 1410 ; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1411 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1412 ; AVX2OR512VL-NEXT: retq 1413 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1414 ret <8 x i32> %shuffle 1415 } 1416 1417 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { 1418 ; AVX1-LABEL: shuffle_v8i32_09ab1def: 1419 ; AVX1: # %bb.0: 1420 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,3,3] 1421 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1422 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1423 ; AVX1-NEXT: retq 1424 ; 1425 ; AVX2-LABEL: shuffle_v8i32_09ab1def: 1426 ; AVX2: # %bb.0: 1427 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1428 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1429 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1430 ; AVX2-NEXT: retq 1431 ; 1432 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_09ab1def: 1433 ; AVX512VL-SLOW: # %bb.0: 1434 ; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1435 ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 1436 ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1437 ; AVX512VL-SLOW-NEXT: retq 1438 ; 1439 ; AVX512VL-FAST-LABEL: shuffle_v8i32_09ab1def: 1440 ; AVX512VL-FAST: # %bb.0: 1441 ; AVX512VL-FAST-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero 1442 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7] 1443 ; AVX512VL-FAST-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 1444 ; AVX512VL-FAST-NEXT: retq 1445 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1446 ret <8 x i32> %shuffle 1447 } 1448 1449 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) { 1450 ; ALL-LABEL: shuffle_v8i32_00014445: 1451 ; ALL: # %bb.0: 1452 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1453 ; ALL-NEXT: retq 1454 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 1455 ret <8 x i32> %shuffle 1456 } 1457 1458 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) { 1459 ; ALL-LABEL: shuffle_v8i32_00204464: 1460 ; ALL: # %bb.0: 1461 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1462 ; ALL-NEXT: retq 1463 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 1464 ret <8 x i32> %shuffle 1465 } 1466 1467 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) { 1468 ; ALL-LABEL: shuffle_v8i32_03004744: 1469 ; ALL: # %bb.0: 1470 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1471 ; ALL-NEXT: retq 1472 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 1473 ret <8 x i32> %shuffle 1474 } 1475 1476 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) { 1477 ; ALL-LABEL: shuffle_v8i32_10005444: 1478 ; ALL: # %bb.0: 1479 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1480 ; ALL-NEXT: retq 1481 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 1482 ret <8 x i32> %shuffle 1483 } 1484 1485 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) { 1486 ; ALL-LABEL: shuffle_v8i32_22006644: 1487 ; ALL: # %bb.0: 1488 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1489 ; ALL-NEXT: retq 1490 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 1491 ret <8 x i32> %shuffle 1492 } 1493 1494 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) { 1495 ; ALL-LABEL: shuffle_v8i32_33307774: 1496 ; ALL: # %bb.0: 1497 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1498 ; ALL-NEXT: retq 1499 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 1500 ret <8 x i32> %shuffle 1501 } 1502 1503 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) { 1504 ; ALL-LABEL: shuffle_v8i32_32107654: 1505 ; ALL: # %bb.0: 1506 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1507 ; ALL-NEXT: retq 1508 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1509 ret <8 x i32> %shuffle 1510 } 1511 1512 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { 1513 ; ALL-LABEL: shuffle_v8i32_00234467: 1514 ; ALL: # %bb.0: 1515 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1516 ; ALL-NEXT: retq 1517 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 1518 ret <8 x i32> %shuffle 1519 } 1520 1521 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { 1522 ; AVX1-LABEL: shuffle_v8i32_00224466: 1523 ; AVX1: # %bb.0: 1524 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1525 ; AVX1-NEXT: retq 1526 ; 1527 ; AVX2OR512VL-LABEL: shuffle_v8i32_00224466: 1528 ; AVX2OR512VL: # %bb.0: 1529 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1530 ; AVX2OR512VL-NEXT: retq 1531 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1532 ret <8 x i32> %shuffle 1533 } 1534 1535 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { 1536 ; ALL-LABEL: shuffle_v8i32_10325476: 1537 ; ALL: # %bb.0: 1538 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1539 ; ALL-NEXT: retq 1540 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1541 ret <8 x i32> %shuffle 1542 } 1543 1544 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { 1545 ; AVX1-LABEL: shuffle_v8i32_11335577: 1546 ; AVX1: # %bb.0: 1547 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1548 ; AVX1-NEXT: retq 1549 ; 1550 ; AVX2OR512VL-LABEL: shuffle_v8i32_11335577: 1551 ; AVX2OR512VL: # %bb.0: 1552 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1553 ; AVX2OR512VL-NEXT: retq 1554 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1555 ret <8 x i32> %shuffle 1556 } 1557 1558 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) { 1559 ; ALL-LABEL: shuffle_v8i32_10235467: 1560 ; ALL: # %bb.0: 1561 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1562 ; ALL-NEXT: retq 1563 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1564 ret <8 x i32> %shuffle 1565 } 1566 1567 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { 1568 ; ALL-LABEL: shuffle_v8i32_10225466: 1569 ; ALL: # %bb.0: 1570 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1571 ; ALL-NEXT: retq 1572 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 1573 ret <8 x i32> %shuffle 1574 } 1575 1576 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { 1577 ; AVX1-LABEL: shuffle_v8i32_00015444: 1578 ; AVX1: # %bb.0: 1579 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 1580 ; AVX1-NEXT: retq 1581 ; 1582 ; AVX2OR512VL-LABEL: shuffle_v8i32_00015444: 1583 ; AVX2OR512VL: # %bb.0: 1584 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] 1585 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1586 ; AVX2OR512VL-NEXT: retq 1587 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 1588 ret <8 x i32> %shuffle 1589 } 1590 1591 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { 1592 ; AVX1-LABEL: shuffle_v8i32_00204644: 1593 ; AVX1: # %bb.0: 1594 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 1595 ; AVX1-NEXT: retq 1596 ; 1597 ; AVX2OR512VL-LABEL: shuffle_v8i32_00204644: 1598 ; AVX2OR512VL: # %bb.0: 1599 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] 1600 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1601 ; AVX2OR512VL-NEXT: retq 1602 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 1603 ret <8 x i32> %shuffle 1604 } 1605 1606 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { 1607 ; AVX1-LABEL: shuffle_v8i32_03004474: 1608 ; AVX1: # %bb.0: 1609 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 1610 ; AVX1-NEXT: retq 1611 ; 1612 ; AVX2OR512VL-LABEL: shuffle_v8i32_03004474: 1613 ; AVX2OR512VL: # %bb.0: 1614 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] 1615 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1616 ; AVX2OR512VL-NEXT: retq 1617 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 1618 ret <8 x i32> %shuffle 1619 } 1620 1621 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { 1622 ; AVX1-LABEL: shuffle_v8i32_10004444: 1623 ; AVX1: # %bb.0: 1624 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 1625 ; AVX1-NEXT: retq 1626 ; 1627 ; AVX2OR512VL-LABEL: shuffle_v8i32_10004444: 1628 ; AVX2OR512VL: # %bb.0: 1629 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] 1630 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1631 ; AVX2OR512VL-NEXT: retq 1632 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1633 ret <8 x i32> %shuffle 1634 } 1635 1636 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { 1637 ; AVX1-LABEL: shuffle_v8i32_22006446: 1638 ; AVX1: # %bb.0: 1639 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 1640 ; AVX1-NEXT: retq 1641 ; 1642 ; AVX2OR512VL-LABEL: shuffle_v8i32_22006446: 1643 ; AVX2OR512VL: # %bb.0: 1644 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] 1645 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1646 ; AVX2OR512VL-NEXT: retq 1647 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 1648 ret <8 x i32> %shuffle 1649 } 1650 1651 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { 1652 ; AVX1-LABEL: shuffle_v8i32_33307474: 1653 ; AVX1: # %bb.0: 1654 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 1655 ; AVX1-NEXT: retq 1656 ; 1657 ; AVX2OR512VL-LABEL: shuffle_v8i32_33307474: 1658 ; AVX2OR512VL: # %bb.0: 1659 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] 1660 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1661 ; AVX2OR512VL-NEXT: retq 1662 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 1663 ret <8 x i32> %shuffle 1664 } 1665 1666 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { 1667 ; AVX1-LABEL: shuffle_v8i32_32104567: 1668 ; AVX1: # %bb.0: 1669 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 1670 ; AVX1-NEXT: retq 1671 ; 1672 ; AVX2OR512VL-LABEL: shuffle_v8i32_32104567: 1673 ; AVX2OR512VL: # %bb.0: 1674 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] 1675 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1676 ; AVX2OR512VL-NEXT: retq 1677 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 1678 ret <8 x i32> %shuffle 1679 } 1680 1681 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { 1682 ; AVX1-LABEL: shuffle_v8i32_00236744: 1683 ; AVX1: # %bb.0: 1684 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 1685 ; AVX1-NEXT: retq 1686 ; 1687 ; AVX2OR512VL-LABEL: shuffle_v8i32_00236744: 1688 ; AVX2OR512VL: # %bb.0: 1689 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] 1690 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1691 ; AVX2OR512VL-NEXT: retq 1692 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 1693 ret <8 x i32> %shuffle 1694 } 1695 1696 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { 1697 ; AVX1-LABEL: shuffle_v8i32_00226644: 1698 ; AVX1: # %bb.0: 1699 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 1700 ; AVX1-NEXT: retq 1701 ; 1702 ; AVX2OR512VL-LABEL: shuffle_v8i32_00226644: 1703 ; AVX2OR512VL: # %bb.0: 1704 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] 1705 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1706 ; AVX2OR512VL-NEXT: retq 1707 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 1708 ret <8 x i32> %shuffle 1709 } 1710 1711 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { 1712 ; AVX1-LABEL: shuffle_v8i32_10324567: 1713 ; AVX1: # %bb.0: 1714 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 1715 ; AVX1-NEXT: retq 1716 ; 1717 ; AVX2OR512VL-LABEL: shuffle_v8i32_10324567: 1718 ; AVX2OR512VL: # %bb.0: 1719 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] 1720 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1721 ; AVX2OR512VL-NEXT: retq 1722 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1723 ret <8 x i32> %shuffle 1724 } 1725 1726 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { 1727 ; AVX1-LABEL: shuffle_v8i32_11334567: 1728 ; AVX1: # %bb.0: 1729 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 1730 ; AVX1-NEXT: retq 1731 ; 1732 ; AVX2OR512VL-LABEL: shuffle_v8i32_11334567: 1733 ; AVX2OR512VL: # %bb.0: 1734 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] 1735 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1736 ; AVX2OR512VL-NEXT: retq 1737 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 1738 ret <8 x i32> %shuffle 1739 } 1740 1741 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { 1742 ; AVX1-LABEL: shuffle_v8i32_01235467: 1743 ; AVX1: # %bb.0: 1744 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 1745 ; AVX1-NEXT: retq 1746 ; 1747 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235467: 1748 ; AVX2OR512VL: # %bb.0: 1749 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] 1750 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1751 ; AVX2OR512VL-NEXT: retq 1752 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1753 ret <8 x i32> %shuffle 1754 } 1755 1756 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { 1757 ; AVX1-LABEL: shuffle_v8i32_01235466: 1758 ; AVX1: # %bb.0: 1759 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 1760 ; AVX1-NEXT: retq 1761 ; 1762 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235466: 1763 ; AVX2OR512VL: # %bb.0: 1764 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] 1765 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1766 ; AVX2OR512VL-NEXT: retq 1767 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 1768 ret <8 x i32> %shuffle 1769 } 1770 1771 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { 1772 ; AVX1-LABEL: shuffle_v8i32_002u6u44: 1773 ; AVX1: # %bb.0: 1774 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 1775 ; AVX1-NEXT: retq 1776 ; 1777 ; AVX2OR512VL-LABEL: shuffle_v8i32_002u6u44: 1778 ; AVX2OR512VL: # %bb.0: 1779 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> 1780 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1781 ; AVX2OR512VL-NEXT: retq 1782 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 1783 ret <8 x i32> %shuffle 1784 } 1785 1786 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { 1787 ; AVX1-LABEL: shuffle_v8i32_00uu66uu: 1788 ; AVX1: # %bb.0: 1789 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 1790 ; AVX1-NEXT: retq 1791 ; 1792 ; AVX2OR512VL-LABEL: shuffle_v8i32_00uu66uu: 1793 ; AVX2OR512VL: # %bb.0: 1794 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> 1795 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1796 ; AVX2OR512VL-NEXT: retq 1797 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 1798 ret <8 x i32> %shuffle 1799 } 1800 1801 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { 1802 ; AVX1-LABEL: shuffle_v8i32_103245uu: 1803 ; AVX1: # %bb.0: 1804 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 1805 ; AVX1-NEXT: retq 1806 ; 1807 ; AVX2OR512VL-LABEL: shuffle_v8i32_103245uu: 1808 ; AVX2OR512VL: # %bb.0: 1809 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> 1810 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1811 ; AVX2OR512VL-NEXT: retq 1812 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 1813 ret <8 x i32> %shuffle 1814 } 1815 1816 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { 1817 ; AVX1-LABEL: shuffle_v8i32_1133uu67: 1818 ; AVX1: # %bb.0: 1819 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 1820 ; AVX1-NEXT: retq 1821 ; 1822 ; AVX2OR512VL-LABEL: shuffle_v8i32_1133uu67: 1823 ; AVX2OR512VL: # %bb.0: 1824 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> 1825 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1826 ; AVX2OR512VL-NEXT: retq 1827 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 1828 ret <8 x i32> %shuffle 1829 } 1830 1831 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { 1832 ; AVX1-LABEL: shuffle_v8i32_0uu354uu: 1833 ; AVX1: # %bb.0: 1834 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 1835 ; AVX1-NEXT: retq 1836 ; 1837 ; AVX2OR512VL-LABEL: shuffle_v8i32_0uu354uu: 1838 ; AVX2OR512VL: # %bb.0: 1839 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> 1840 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1841 ; AVX2OR512VL-NEXT: retq 1842 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 1843 ret <8 x i32> %shuffle 1844 } 1845 1846 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { 1847 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66: 1848 ; AVX1: # %bb.0: 1849 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 1850 ; AVX1-NEXT: retq 1851 ; 1852 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuu3uu66: 1853 ; AVX2OR512VL: # %bb.0: 1854 ; AVX2OR512VL-NEXT: vmovaps {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6> 1855 ; AVX2OR512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 1856 ; AVX2OR512VL-NEXT: retq 1857 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 1858 ret <8 x i32> %shuffle 1859 } 1860 1861 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { 1862 ; AVX1-LABEL: shuffle_v8i32_6caa87e5: 1863 ; AVX1: # %bb.0: 1864 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1865 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1866 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 1867 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] 1868 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1869 ; AVX1-NEXT: retq 1870 ; 1871 ; AVX2-SLOW-LABEL: shuffle_v8i32_6caa87e5: 1872 ; AVX2-SLOW: # %bb.0: 1873 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2] 1874 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6] 1875 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,0,3] 1876 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1877 ; AVX2-SLOW-NEXT: retq 1878 ; 1879 ; AVX2-FAST-LABEL: shuffle_v8i32_6caa87e5: 1880 ; AVX2-FAST: # %bb.0: 1881 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [4,4,2,2,0,0,6,6] 1882 ; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1 1883 ; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2] 1884 ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1885 ; AVX2-FAST-NEXT: retq 1886 ; 1887 ; AVX512VL-LABEL: shuffle_v8i32_6caa87e5: 1888 ; AVX512VL: # %bb.0: 1889 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13] 1890 ; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 1891 ; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 1892 ; AVX512VL-NEXT: retq 1893 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5> 1894 ret <8 x i32> %shuffle 1895 } 1896 1897 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { 1898 ; AVX1-LABEL: shuffle_v8i32_32103210: 1899 ; AVX1: # %bb.0: 1900 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1901 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1902 ; AVX1-NEXT: retq 1903 ; 1904 ; AVX2OR512VL-LABEL: shuffle_v8i32_32103210: 1905 ; AVX2OR512VL: # %bb.0: 1906 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1907 ; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1] 1908 ; AVX2OR512VL-NEXT: retq 1909 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 1910 ret <8 x i32> %shuffle 1911 } 1912 1913 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { 1914 ; AVX1-LABEL: shuffle_v8i32_76547654: 1915 ; AVX1: # %bb.0: 1916 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1917 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1918 ; AVX1-NEXT: retq 1919 ; 1920 ; AVX2-SLOW-LABEL: shuffle_v8i32_76547654: 1921 ; AVX2-SLOW: # %bb.0: 1922 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1923 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 1924 ; AVX2-SLOW-NEXT: retq 1925 ; 1926 ; AVX2-FAST-LABEL: shuffle_v8i32_76547654: 1927 ; AVX2-FAST: # %bb.0: 1928 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1929 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1930 ; AVX2-FAST-NEXT: retq 1931 ; 1932 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_76547654: 1933 ; AVX512VL-SLOW: # %bb.0: 1934 ; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1935 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3] 1936 ; AVX512VL-SLOW-NEXT: retq 1937 ; 1938 ; AVX512VL-FAST-LABEL: shuffle_v8i32_76547654: 1939 ; AVX512VL-FAST: # %bb.0: 1940 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1941 ; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1942 ; AVX512VL-FAST-NEXT: retq 1943 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 1944 ret <8 x i32> %shuffle 1945 } 1946 1947 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { 1948 ; AVX1-LABEL: shuffle_v8i32_76543210: 1949 ; AVX1: # %bb.0: 1950 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1951 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1952 ; AVX1-NEXT: retq 1953 ; 1954 ; AVX2-SLOW-LABEL: shuffle_v8i32_76543210: 1955 ; AVX2-SLOW: # %bb.0: 1956 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1957 ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 1958 ; AVX2-SLOW-NEXT: retq 1959 ; 1960 ; AVX2-FAST-LABEL: shuffle_v8i32_76543210: 1961 ; AVX2-FAST: # %bb.0: 1962 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1963 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1964 ; AVX2-FAST-NEXT: retq 1965 ; 1966 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_76543210: 1967 ; AVX512VL-SLOW: # %bb.0: 1968 ; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1969 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 1970 ; AVX512VL-SLOW-NEXT: retq 1971 ; 1972 ; AVX512VL-FAST-LABEL: shuffle_v8i32_76543210: 1973 ; AVX512VL-FAST: # %bb.0: 1974 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1975 ; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 1976 ; AVX512VL-FAST-NEXT: retq 1977 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1978 ret <8 x i32> %shuffle 1979 } 1980 1981 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { 1982 ; ALL-LABEL: shuffle_v8i32_3210ba98: 1983 ; ALL: # %bb.0: 1984 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1985 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1986 ; ALL-NEXT: retq 1987 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 1988 ret <8 x i32> %shuffle 1989 } 1990 1991 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { 1992 ; ALL-LABEL: shuffle_v8i32_3210fedc: 1993 ; ALL: # %bb.0: 1994 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1995 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1996 ; ALL-NEXT: retq 1997 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 1998 ret <8 x i32> %shuffle 1999 } 2000 2001 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { 2002 ; AVX1OR2-LABEL: shuffle_v8i32_7654fedc: 2003 ; AVX1OR2: # %bb.0: 2004 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2005 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2006 ; AVX1OR2-NEXT: retq 2007 ; 2008 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_7654fedc: 2009 ; AVX512VL-SLOW: # %bb.0: 2010 ; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2011 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2012 ; AVX512VL-SLOW-NEXT: retq 2013 ; 2014 ; AVX512VL-FAST-LABEL: shuffle_v8i32_7654fedc: 2015 ; AVX512VL-FAST: # %bb.0: 2016 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 2017 ; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0 2018 ; AVX512VL-FAST-NEXT: retq 2019 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 2020 ret <8 x i32> %shuffle 2021 } 2022 2023 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { 2024 ; AVX1OR2-LABEL: shuffle_v8i32_fedc7654: 2025 ; AVX1OR2: # %bb.0: 2026 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2027 ; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2028 ; AVX1OR2-NEXT: retq 2029 ; 2030 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_fedc7654: 2031 ; AVX512VL-SLOW: # %bb.0: 2032 ; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 2033 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2034 ; AVX512VL-SLOW-NEXT: retq 2035 ; 2036 ; AVX512VL-FAST-LABEL: shuffle_v8i32_fedc7654: 2037 ; AVX512VL-FAST: # %bb.0: 2038 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12] 2039 ; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 2040 ; AVX512VL-FAST-NEXT: vmovdqa %ymm2, %ymm0 2041 ; AVX512VL-FAST-NEXT: retq 2042 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 2043 ret <8 x i32> %shuffle 2044 } 2045 2046 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { 2047 ; ALL-LABEL: shuffle_v8i32_ba987654: 2048 ; ALL: # %bb.0: 2049 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2050 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2051 ; ALL-NEXT: retq 2052 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 2053 ret <8 x i32> %shuffle 2054 } 2055 2056 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { 2057 ; ALL-LABEL: shuffle_v8i32_ba983210: 2058 ; ALL: # %bb.0: 2059 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2060 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 2061 ; ALL-NEXT: retq 2062 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 2063 ret <8 x i32> %shuffle 2064 } 2065 2066 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { 2067 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: 2068 ; AVX1: # %bb.0: 2069 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2070 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] 2071 ; AVX1-NEXT: retq 2072 ; 2073 ; AVX2OR512VL-LABEL: shuffle_v8i32_zuu8zuuc: 2074 ; AVX2OR512VL: # %bb.0: 2075 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19] 2076 ; AVX2OR512VL-NEXT: retq 2077 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12> 2078 ret <8 x i32> %shuffle 2079 } 2080 2081 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { 2082 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz: 2083 ; AVX1: # %bb.0: 2084 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2085 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] 2086 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2087 ; AVX1-NEXT: retq 2088 ; 2089 ; AVX2OR512VL-LABEL: shuffle_v8i32_9ubzdefz: 2090 ; AVX2OR512VL: # %bb.0: 2091 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero 2092 ; AVX2OR512VL-NEXT: retq 2093 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0> 2094 ret <8 x i32> %shuffle 2095 } 2096 2097 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { 2098 ; ALL-LABEL: shuffle_v8i32_80u1b4uu: 2099 ; ALL: # %bb.0: 2100 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 2101 ; ALL-NEXT: retq 2102 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef> 2103 ret <8 x i32> %shuffle 2104 } 2105 2106 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) { 2107 ; ALL-LABEL: shuffle_v8i32_uuuu1111: 2108 ; ALL: # %bb.0: 2109 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2110 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2111 ; ALL-NEXT: retq 2112 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1> 2113 ret <8 x i32> %shuffle 2114 } 2115 2116 define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) { 2117 ; ALL-LABEL: shuffle_v8i32_2222uuuu: 2118 ; ALL: # %bb.0: 2119 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,2,2] 2120 ; ALL-NEXT: retq 2121 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef> 2122 ret <8 x i32> %shuffle 2123 } 2124 2125 define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) { 2126 ; ALL-LABEL: shuffle_v8i32_2A3Buuuu: 2127 ; ALL: # %bb.0: 2128 ; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2129 ; ALL-NEXT: retq 2130 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2131 ret <8 x i32> %shuffle 2132 } 2133 2134 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { 2135 ; AVX1-LABEL: shuffle_v8i32_44444444: 2136 ; AVX1: # %bb.0: 2137 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2138 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 2139 ; AVX1-NEXT: retq 2140 ; 2141 ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444: 2142 ; AVX2OR512VL: # %bb.0: 2143 ; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 2144 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 2145 ; AVX2OR512VL-NEXT: retq 2146 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 2147 ret <8 x i32> %shuffle 2148 } 2149 2150 define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) { 2151 ; AVX1-LABEL: shuffle_v8i32_44444444_bc: 2152 ; AVX1: # %bb.0: 2153 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2154 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 2155 ; AVX1-NEXT: retq 2156 ; 2157 ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444_bc: 2158 ; AVX2OR512VL: # %bb.0: 2159 ; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 2160 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 2161 ; AVX2OR512VL-NEXT: retq 2162 %tmp0 = bitcast <8 x float> %a to <8 x i32> 2163 %tmp1 = bitcast <8 x float> %b to <8 x i32> 2164 %shuffle = shufflevector <8 x i32> %tmp0, <8 x i32> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 2165 ret <8 x i32> %shuffle 2166 } 2167 2168 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { 2169 ; ALL-LABEL: shuffle_v8i32_5555uuuu: 2170 ; ALL: # %bb.0: 2171 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 2172 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2173 ; ALL-NEXT: retq 2174 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 2175 ret <8 x i32> %shuffle 2176 } 2177 2178 ; PR32453 2179 define <8 x i32> @shuffle_v8i32_uuuuuu7u(<8 x i32> %a, <8 x i32> %b) nounwind { 2180 ; AVX1-LABEL: shuffle_v8i32_uuuuuu7u: 2181 ; AVX1: # %bb.0: 2182 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 2183 ; AVX1-NEXT: retq 2184 ; 2185 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuuuuu7u: 2186 ; AVX2OR512VL: # %bb.0: 2187 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,3,3,4,5,7,7] 2188 ; AVX2OR512VL-NEXT: retq 2189 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef> 2190 ret <8 x i32> %shuffle 2191 } 2192 2193 define <8 x float> @splat_mem_v8f32_2(float* %p) { 2194 ; ALL-LABEL: splat_mem_v8f32_2: 2195 ; ALL: # %bb.0: 2196 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2197 ; ALL-NEXT: retq 2198 %1 = load float, float* %p 2199 %2 = insertelement <4 x float> undef, float %1, i32 0 2200 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer 2201 ret <8 x float> %3 2202 } 2203 2204 define <8 x float> @splat_v8f32(<4 x float> %r) { 2205 ; AVX1-LABEL: splat_v8f32: 2206 ; AVX1: # %bb.0: 2207 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2208 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2209 ; AVX1-NEXT: retq 2210 ; 2211 ; AVX2OR512VL-LABEL: splat_v8f32: 2212 ; AVX2OR512VL: # %bb.0: 2213 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 2214 ; AVX2OR512VL-NEXT: retq 2215 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer 2216 ret <8 x float> %1 2217 } 2218 2219 ; 2220 ; Shuffle to logical bit shifts 2221 ; 2222 2223 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { 2224 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: 2225 ; AVX1: # %bb.0: 2226 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2227 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2228 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 2229 ; AVX1-NEXT: retq 2230 ; 2231 ; AVX2OR512VL-LABEL: shuffle_v8i32_z0U2zUz6: 2232 ; AVX2OR512VL: # %bb.0: 2233 ; AVX2OR512VL-NEXT: vpsllq $32, %ymm0, %ymm0 2234 ; AVX2OR512VL-NEXT: retq 2235 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6> 2236 ret <8 x i32> %shuffle 2237 } 2238 2239 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { 2240 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: 2241 ; AVX1: # %bb.0: 2242 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2243 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2244 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 2245 ; AVX1-NEXT: retq 2246 ; 2247 ; AVX2OR512VL-LABEL: shuffle_v8i32_1U3z5zUU: 2248 ; AVX2OR512VL: # %bb.0: 2249 ; AVX2OR512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 2250 ; AVX2OR512VL-NEXT: retq 2251 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef> 2252 ret <8 x i32> %shuffle 2253 } 2254 2255 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) { 2256 ; AVX1-LABEL: shuffle_v8i32_B012F456: 2257 ; AVX1: # %bb.0: 2258 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4] 2259 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6] 2260 ; AVX1-NEXT: retq 2261 ; 2262 ; AVX2OR512VL-LABEL: shuffle_v8i32_B012F456: 2263 ; AVX2OR512VL: # %bb.0: 2264 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27] 2265 ; AVX2OR512VL-NEXT: retq 2266 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 2267 ret <8 x i32> %shuffle 2268 } 2269 2270 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) { 2271 ; AVX1-LABEL: shuffle_v8i32_1238567C: 2272 ; AVX1: # %bb.0: 2273 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2274 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2275 ; AVX1-NEXT: retq 2276 ; 2277 ; AVX2OR512VL-LABEL: shuffle_v8i32_1238567C: 2278 ; AVX2OR512VL: # %bb.0: 2279 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19] 2280 ; AVX2OR512VL-NEXT: retq 2281 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12> 2282 ret <8 x i32> %shuffle 2283 } 2284 2285 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) { 2286 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4: 2287 ; AVX1: # %bb.0: 2288 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4] 2289 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4] 2290 ; AVX1-NEXT: retq 2291 ; 2292 ; AVX2OR512VL-LABEL: shuffle_v8i32_9AB0DEF4: 2293 ; AVX2OR512VL: # %bb.0: 2294 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19] 2295 ; AVX2OR512VL-NEXT: retq 2296 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4> 2297 ret <8 x i32> %shuffle 2298 } 2299 2300 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) { 2301 ; AVX1-LABEL: shuffle_v8i32_389A7CDE: 2302 ; AVX1: # %bb.0: 2303 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4] 2304 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6] 2305 ; AVX1-NEXT: retq 2306 ; 2307 ; AVX2OR512VL-LABEL: shuffle_v8i32_389A7CDE: 2308 ; AVX2OR512VL: # %bb.0: 2309 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27] 2310 ; AVX2OR512VL-NEXT: retq 2311 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14> 2312 ret <8 x i32> %shuffle 2313 } 2314 2315 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) { 2316 ; ALL-LABEL: shuffle_v8i32_30127456: 2317 ; ALL: # %bb.0: 2318 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2319 ; ALL-NEXT: retq 2320 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6> 2321 ret <8 x i32> %shuffle 2322 } 2323 2324 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) { 2325 ; ALL-LABEL: shuffle_v8i32_12305674: 2326 ; ALL: # %bb.0: 2327 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2328 ; ALL-NEXT: retq 2329 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 2330 ret <8 x i32> %shuffle 2331 } 2332 2333 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2334 ; ALL-LABEL: concat_v2f32_1: 2335 ; ALL: # %bb.0: # %entry 2336 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2337 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2338 ; ALL-NEXT: retq 2339 entry: 2340 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2341 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2342 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2343 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2344 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> 2345 ret <8 x float> %tmp76 2346 } 2347 2348 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2349 ; ALL-LABEL: concat_v2f32_2: 2350 ; ALL: # %bb.0: # %entry 2351 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2352 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2353 ; ALL-NEXT: retq 2354 entry: 2355 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2356 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2357 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2358 ret <8 x float> %tmp76 2359 } 2360 2361 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2362 ; ALL-LABEL: concat_v2f32_3: 2363 ; ALL: # %bb.0: # %entry 2364 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2365 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2366 ; ALL-NEXT: retq 2367 entry: 2368 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2369 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2370 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2371 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2372 ret <8 x float> %res 2373 } 2374 2375 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) { 2376 ; ALL-LABEL: insert_mem_and_zero_v8i32: 2377 ; ALL: # %bb.0: 2378 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2379 ; ALL-NEXT: retq 2380 %a = load i32, i32* %ptr 2381 %v = insertelement <8 x i32> undef, i32 %a, i32 0 2382 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2383 ret <8 x i32> %shuffle 2384 } 2385 2386 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) { 2387 ; ALL-LABEL: concat_v8i32_0123CDEF: 2388 ; ALL: # %bb.0: 2389 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2390 ; ALL-NEXT: retq 2391 %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2392 %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2393 %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2394 ret <8 x i32> %shuf 2395 } 2396 2397 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { 2398 ; AVX1OR2-LABEL: concat_v8i32_4567CDEF_bc: 2399 ; AVX1OR2: # %bb.0: 2400 ; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2401 ; AVX1OR2-NEXT: retq 2402 ; 2403 ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc: 2404 ; AVX512VL: # %bb.0: 2405 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2406 ; AVX512VL-NEXT: retq 2407 %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2408 %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 2409 %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64> 2410 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2411 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2412 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32> 2413 ret <8 x i32> %shuffle32 2414 } 2415 2416 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) { 2417 ; ALL-LABEL: concat_v8f32_4567CDEF_bc: 2418 ; ALL: # %bb.0: 2419 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 2420 ; ALL-NEXT: retq 2421 %a0 = bitcast <8 x float> %f0 to <4 x i64> 2422 %a1 = bitcast <8 x float> %f1 to <8 x i32> 2423 %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2424 %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2425 %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64> 2426 %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64> 2427 %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2428 %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float> 2429 ret <8 x float> %shuffle32 2430 } 2431 2432 define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) { 2433 ; ALL-LABEL: insert_dup_mem_v8i32: 2434 ; ALL: # %bb.0: 2435 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0 2436 ; ALL-NEXT: retq 2437 %tmp = load i32, i32* %ptr, align 4 2438 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2439 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer 2440 ret <8 x i32> %tmp2 2441 } 2442 2443 define <8 x i32> @shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b) { 2444 ; AVX1-LABEL: shuffle_v8i32_12345678: 2445 ; AVX1: # %bb.0: 2446 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 2447 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 2448 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2449 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2450 ; AVX1-NEXT: retq 2451 ; 2452 ; AVX2-LABEL: shuffle_v8i32_12345678: 2453 ; AVX2: # %bb.0: 2454 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 2455 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0] 2456 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 2457 ; AVX2-NEXT: retq 2458 ; 2459 ; AVX512VL-LABEL: shuffle_v8i32_12345678: 2460 ; AVX512VL: # %bb.0: 2461 ; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7],ymm1[0] 2462 ; AVX512VL-NEXT: retq 2463 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 2464 ret <8 x i32> %shuffle 2465 } 2466 2467 define <8 x i32> @shuffle_v8i32_12345670(<8 x i32> %a) { 2468 ; AVX1-LABEL: shuffle_v8i32_12345670: 2469 ; AVX1: # %bb.0: 2470 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 2471 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 2472 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 2473 ; AVX1-NEXT: retq 2474 ; 2475 ; AVX2-LABEL: shuffle_v8i32_12345670: 2476 ; AVX2: # %bb.0: 2477 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0] 2478 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 2479 ; AVX2-NEXT: retq 2480 ; 2481 ; AVX512VL-LABEL: shuffle_v8i32_12345670: 2482 ; AVX512VL: # %bb.0: 2483 ; AVX512VL-NEXT: valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,0] 2484 ; AVX512VL-NEXT: retq 2485 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0> 2486 ret <8 x i32> %shuffle 2487 } 2488 2489 define <8 x float> @add_v8f32_02468ACE_13579BDF(<8 x float> %a, <8 x float> %b) { 2490 ; AVX1-LABEL: add_v8f32_02468ACE_13579BDF: 2491 ; AVX1: # %bb.0: # %entry 2492 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2493 ; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm1[0,2],xmm2[0,2] 2494 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2495 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2496 ; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2] 2497 ; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2498 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 2499 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 2500 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3] 2501 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2502 ; AVX1-NEXT: vaddps %ymm0, %ymm3, %ymm0 2503 ; AVX1-NEXT: retq 2504 ; 2505 ; AVX2-LABEL: add_v8f32_02468ACE_13579BDF: 2506 ; AVX2: # %bb.0: # %entry 2507 ; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2508 ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2509 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2510 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2511 ; AVX2-NEXT: vaddps %ymm0, %ymm2, %ymm0 2512 ; AVX2-NEXT: retq 2513 ; 2514 ; AVX512VL-SLOW-LABEL: add_v8f32_02468ACE_13579BDF: 2515 ; AVX512VL-SLOW: # %bb.0: # %entry 2516 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2517 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2518 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2519 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2520 ; AVX512VL-SLOW-NEXT: vaddps %ymm0, %ymm2, %ymm0 2521 ; AVX512VL-SLOW-NEXT: retq 2522 ; 2523 ; AVX512VL-FAST-LABEL: add_v8f32_02468ACE_13579BDF: 2524 ; AVX512VL-FAST: # %bb.0: # %entry 2525 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2526 ; AVX512VL-FAST-NEXT: vpermi2ps %ymm1, %ymm0, %ymm2 2527 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2528 ; AVX512VL-FAST-NEXT: vpermi2ps %ymm1, %ymm0, %ymm3 2529 ; AVX512VL-FAST-NEXT: vaddps %ymm3, %ymm2, %ymm0 2530 ; AVX512VL-FAST-NEXT: retq 2531 entry: 2532 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 2533 %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 2534 %add = fadd <8 x float> %shuffle, %shuffle1 2535 ret <8 x float> %add 2536 } 2537 2538 define <8 x float> @add_v8f32_8ACE0246_9BDF1357(<8 x float> %a, <8 x float> %b) { 2539 ; AVX1-LABEL: add_v8f32_8ACE0246_9BDF1357: 2540 ; AVX1: # %bb.0: # %entry 2541 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2542 ; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm0[0,2],xmm2[0,2] 2543 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2544 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 2545 ; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2] 2546 ; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2547 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 2548 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2549 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3] 2550 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2551 ; AVX1-NEXT: vaddps %ymm0, %ymm3, %ymm0 2552 ; AVX1-NEXT: retq 2553 ; 2554 ; AVX2-LABEL: add_v8f32_8ACE0246_9BDF1357: 2555 ; AVX2: # %bb.0: # %entry 2556 ; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2557 ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2558 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2559 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2560 ; AVX2-NEXT: vaddps %ymm0, %ymm2, %ymm0 2561 ; AVX2-NEXT: retq 2562 ; 2563 ; AVX512VL-SLOW-LABEL: add_v8f32_8ACE0246_9BDF1357: 2564 ; AVX512VL-SLOW: # %bb.0: # %entry 2565 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2566 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2567 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2568 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2569 ; AVX512VL-SLOW-NEXT: vaddps %ymm0, %ymm2, %ymm0 2570 ; AVX512VL-SLOW-NEXT: retq 2571 ; 2572 ; AVX512VL-FAST-LABEL: add_v8f32_8ACE0246_9BDF1357: 2573 ; AVX512VL-FAST: # %bb.0: # %entry 2574 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2575 ; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm2 2576 ; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2577 ; AVX512VL-FAST-NEXT: vpermi2ps %ymm0, %ymm1, %ymm3 2578 ; AVX512VL-FAST-NEXT: vaddps %ymm3, %ymm2, %ymm0 2579 ; AVX512VL-FAST-NEXT: retq 2580 entry: 2581 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6> 2582 %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7> 2583 %add = fadd <8 x float> %shuffle, %shuffle1 2584 ret <8 x float> %add 2585 } 2586 2587 define <8 x i32> @add_v8i32_02468ACE_13579BDF(<8 x i32> %a, <8 x i32> %b) { 2588 ; AVX1-LABEL: add_v8i32_02468ACE_13579BDF: 2589 ; AVX1: # %bb.0: # %entry 2590 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2591 ; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm1[0,2],xmm2[0,2] 2592 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2593 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2594 ; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2] 2595 ; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2596 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 2597 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 2598 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3] 2599 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 2600 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2601 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 2602 ; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2603 ; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0 2604 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2605 ; AVX1-NEXT: retq 2606 ; 2607 ; AVX2-LABEL: add_v8i32_02468ACE_13579BDF: 2608 ; AVX2: # %bb.0: # %entry 2609 ; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2610 ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2611 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2612 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2613 ; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2614 ; AVX2-NEXT: retq 2615 ; 2616 ; AVX512VL-SLOW-LABEL: add_v8i32_02468ACE_13579BDF: 2617 ; AVX512VL-SLOW: # %bb.0: # %entry 2618 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 2619 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2620 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 2621 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2622 ; AVX512VL-SLOW-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2623 ; AVX512VL-SLOW-NEXT: retq 2624 ; 2625 ; AVX512VL-FAST-LABEL: add_v8i32_02468ACE_13579BDF: 2626 ; AVX512VL-FAST: # %bb.0: # %entry 2627 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2628 ; AVX512VL-FAST-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 2629 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2630 ; AVX512VL-FAST-NEXT: vpermi2d %ymm1, %ymm0, %ymm3 2631 ; AVX512VL-FAST-NEXT: vpaddd %ymm3, %ymm2, %ymm0 2632 ; AVX512VL-FAST-NEXT: retq 2633 entry: 2634 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 2635 %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 2636 %add = add <8 x i32> %shuffle, %shuffle1 2637 ret <8 x i32> %add 2638 } 2639 2640 define <8 x i32> @add_v8i32_8ACE0246_9BDF1357(<8 x i32> %a, <8 x i32> %b) { 2641 ; AVX1-LABEL: add_v8i32_8ACE0246_9BDF1357: 2642 ; AVX1: # %bb.0: # %entry 2643 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2644 ; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm0[0,2],xmm2[0,2] 2645 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2646 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 2647 ; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2] 2648 ; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7] 2649 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 2650 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2651 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3] 2652 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2653 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2654 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 2655 ; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2656 ; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0 2657 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2658 ; AVX1-NEXT: retq 2659 ; 2660 ; AVX2-LABEL: add_v8i32_8ACE0246_9BDF1357: 2661 ; AVX2: # %bb.0: # %entry 2662 ; AVX2-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2663 ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2664 ; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2665 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2666 ; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2667 ; AVX2-NEXT: retq 2668 ; 2669 ; AVX512VL-SLOW-LABEL: add_v8i32_8ACE0246_9BDF1357: 2670 ; AVX512VL-SLOW: # %bb.0: # %entry 2671 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 2672 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] 2673 ; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 2674 ; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2675 ; AVX512VL-SLOW-NEXT: vpaddd %ymm0, %ymm2, %ymm0 2676 ; AVX512VL-SLOW-NEXT: retq 2677 ; 2678 ; AVX512VL-FAST-LABEL: add_v8i32_8ACE0246_9BDF1357: 2679 ; AVX512VL-FAST: # %bb.0: # %entry 2680 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14] 2681 ; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 2682 ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15] 2683 ; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm3 2684 ; AVX512VL-FAST-NEXT: vpaddd %ymm3, %ymm2, %ymm0 2685 ; AVX512VL-FAST-NEXT: retq 2686 entry: 2687 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6> 2688 %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7> 2689 %add = add <8 x i32> %shuffle, %shuffle1 2690 ret <8 x i32> %add 2691 } 2692