1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F 3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 4 5 target triple = "x86_64-unknown-unknown" 6 7 define <16 x float> @shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x float> %a, <16 x float> %b) { 8 ; ALL-LABEL: shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 9 ; ALL: # %bb.0: 10 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0 11 ; ALL-NEXT: retq 12 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 13 ret <16 x float> %shuffle 14 } 15 16 define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<16 x float> %a, <16 x float> %b) { 17 ; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: 18 ; ALL: # %bb.0: 19 ; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 20 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0 21 ; ALL-NEXT: retq 22 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 23 ret <16 x float> %shuffle 24 } 25 26 define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) { 27 ; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: 28 ; ALL: # %bb.0: 29 ; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 30 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0 31 ; ALL-NEXT: retq 32 %tmp0 = bitcast <16 x i32> %a to <16 x float> 33 %tmp1 = bitcast <16 x i32> %b to <16 x float> 34 %shuffle = shufflevector <16 x float> %tmp0, <16 x float> %tmp1, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 35 ret <16 x float> %shuffle 36 } 37 38 define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) { 39 ; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d: 40 ; ALL: # %bb.0: 41 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 42 ; ALL-NEXT: retq 43 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 44 ret <16 x float> %shuffle 45 } 46 47 define <16 x float> @shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz(<16 x float> %a, <16 x float> %b) { 48 ; ALL-LABEL: shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz: 49 ; ALL: # %bb.0: 50 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 51 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 52 ; ALL-NEXT: retq 53 %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32><i32 0, i32 16, i32 1, i32 16, i32 4, i32 16, i32 5, i32 16, i32 8, i32 16, i32 9, i32 16, i32 12, i32 16, i32 13, i32 16> 54 ret <16 x float> %shuffle 55 } 56 57 define <16 x float> @shuffle_v16f32_vunpcklps_swap(<16 x float> %a, <16 x float> %b) { 58 ; ALL-LABEL: shuffle_v16f32_vunpcklps_swap: 59 ; ALL: # %bb.0: 60 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[12],zmm0[12],zmm1[13],zmm0[13] 61 ; ALL-NEXT: retq 62 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 20, i32 4, i32 21, i32 5, i32 24, i32 8, i32 25, i32 9, i32 28, i32 12, i32 29, i32 13> 63 ret <16 x float> %shuffle 64 } 65 66 ; PR34382 67 define <16 x float> @shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12(<16 x float> %a0) { 68 ; ALL-LABEL: shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12: 69 ; ALL: # %bb.0: 70 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,3,0,6,4,5,7,8,8,9,9,15,14,14,12] 71 ; ALL-NEXT: retq 72 %shuffle = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 0, i32 6, i32 4, i32 5, i32 7, i32 8, i32 8, i32 9, i32 9, i32 15, i32 14, i32 14, i32 12> 73 ret <16 x float> %shuffle 74 } 75 76 define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) { 77 ; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d: 78 ; ALL: # %bb.0: 79 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 80 ; ALL-NEXT: retq 81 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> 82 ret <16 x i32> %shuffle 83 } 84 85 define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) { 86 ; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d: 87 ; ALL: # %bb.0: 88 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 89 ; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 90 ; ALL-NEXT: retq 91 %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32><i32 15, i32 16, i32 13, i32 17, i32 11, i32 20, i32 9, i32 21, i32 7, i32 24, i32 5, i32 25, i32 3, i32 28, i32 1, i32 29> 92 ret <16 x i32> %shuffle 93 } 94 95 define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x float> %a, <16 x float> %b) { 96 ; ALL-LABEL: shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f: 97 ; ALL: # %bb.0: 98 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 99 ; ALL-NEXT: retq 100 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 101 ret <16 x float> %shuffle 102 } 103 104 define <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f(<16 x float> %a, <16 x float> %b) { 105 ; ALL-LABEL: shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f: 106 ; ALL: # %bb.0: 107 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 108 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 109 ; ALL-NEXT: retq 110 %shuffle = shufflevector <16 x float> zeroinitializer, <16 x float> %b, <16 x i32><i32 0, i32 18, i32 0, i32 19, i32 4, i32 22, i32 4, i32 23, i32 6, i32 26, i32 6, i32 27, i32 8, i32 30, i32 8, i32 31> 111 ret <16 x float> %shuffle 112 } 113 114 define <16 x float> @shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x float> %a, <16 x float> %b) { 115 ; ALL-LABEL: shuffle_v16f32_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 116 ; ALL: # %bb.0: 117 ; ALL-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 118 ; ALL-NEXT: retq 119 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 120 ret <16 x float> %shuffle 121 } 122 123 define <16 x float> @shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x float> %a, <16 x float> %b) { 124 ; ALL-LABEL: shuffle_v16f32_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 125 ; ALL: # %bb.0: 126 ; ALL-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 127 ; ALL-NEXT: retq 128 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 129 ret <16 x float> %shuffle 130 } 131 132 define <16 x float> @shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13(<16 x float> %a, <16 x float> %b) { 133 ; ALL-LABEL: shuffle_v16f32_00_01_00_01_06_07_06_07_08_09_10_11_12_13_12_13: 134 ; ALL: # %bb.0: 135 ; ALL-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,3,3,4,5,6,6] 136 ; ALL-NEXT: retq 137 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13> 138 ret <16 x float> %shuffle 139 } 140 141 define <16 x float> @shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12(<16 x float> %a, <16 x float> %b) { 142 ; ALL-LABEL: shuffle_v16f32_00_00_02_00_04_04_06_04_08_08_10_08_12_12_14_12: 143 ; ALL: # %bb.0: 144 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4,8,8,10,8,12,12,14,12] 145 ; ALL-NEXT: retq 146 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4, i32 8, i32 8, i32 10, i32 8, i32 12, i32 12, i32 14, i32 12> 147 ret <16 x float> %shuffle 148 } 149 150 define <16 x float> @shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12(<16 x float> %a, <16 x float> %b) { 151 ; ALL-LABEL: shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu_12: 152 ; ALL: # %bb.0: 153 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,0,3,0,7,4,7,4,11,8,11,8,15,12,15,12] 154 ; ALL-NEXT: retq 155 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12> 156 ret <16 x float> %shuffle 157 } 158 159 define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) { 160 ; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 161 ; ALL: # %bb.0: 162 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0 163 ; ALL-NEXT: retq 164 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 165 ret <16 x i32> %shuffle 166 } 167 168 define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) { 169 ; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: 170 ; ALL: # %bb.0: 171 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 172 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0 173 ; ALL-NEXT: retq 174 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 175 ret <16 x i32> %shuffle 176 } 177 178 define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) { 179 ; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f: 180 ; ALL: # %bb.0: 181 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 182 ; ALL-NEXT: retq 183 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> 184 ret <16 x i32> %shuffle 185 } 186 187 define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) { 188 ; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz: 189 ; ALL: # %bb.0: 190 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 191 ; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 192 ; ALL-NEXT: retq 193 %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32><i32 2, i32 30, i32 3, i32 28, i32 6, i32 26, i32 7, i32 24, i32 10, i32 22, i32 11, i32 20, i32 14, i32 18, i32 15, i32 16> 194 ret <16 x i32> %shuffle 195 } 196 197 define <16 x i32> @shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28(<16 x i32> %a, <16 x i32> %b) { 198 ; AVX512F-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28: 199 ; AVX512F: # %bb.0: 200 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,2,3,16,5,6,7,20,9,10,11,24,13,14,15,28] 201 ; AVX512F-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 202 ; AVX512F-NEXT: retq 203 ; 204 ; AVX512BW-LABEL: shuffle_v16i32_01_02_03_16_05_06_07_20_09_10_11_24_13_14_15_28: 205 ; AVX512BW: # %bb.0: 206 ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1,2,3],zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zmm1[16,17,18,19],zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zmm1[32,33,34,35],zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zmm1[48,49,50,51] 207 ; AVX512BW-NEXT: retq 208 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 1, i32 2, i32 3, i32 16, i32 5, i32 6, i32 7, i32 20, i32 9, i32 10, i32 11, i32 24, i32 13, i32 14, i32 15, i32 28> 209 ret <16 x i32> %shuffle 210 } 211 212 define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) { 213 ; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01: 214 ; ALL: # %bb.0: 215 ; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1> 216 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 217 ; ALL-NEXT: retq 218 %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1> 219 ret <16 x float> %c 220 } 221 222 define <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x i32> %a) { 223 ; ALL-LABEL: shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01: 224 ; ALL: # %bb.0: 225 ; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1> 226 ; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0 227 ; ALL-NEXT: retq 228 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1> 229 ret <16 x i32> %c 230 } 231 232 define <16 x i32> @shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32> %b) { 233 ; ALL-LABEL: shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 234 ; ALL: # %bb.0: 235 ; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 236 ; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 237 ; ALL-NEXT: retq 238 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 239 ret <16 x i32> %c 240 } 241 242 define <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float> %b) { 243 ; ALL-LABEL: shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 244 ; ALL: # %bb.0: 245 ; ALL-NEXT: vmovaps {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 246 ; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 247 ; ALL-NEXT: retq 248 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 249 ret <16 x float> %c 250 } 251 252 define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float>* %b) { 253 ; ALL-LABEL: shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 254 ; ALL: # %bb.0: 255 ; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 256 ; ALL-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0 257 ; ALL-NEXT: retq 258 %c = load <16 x float>, <16 x float>* %b 259 %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 260 ret <16 x float> %d 261 } 262 263 define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b) { 264 ; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18: 265 ; ALL: # %bb.0: 266 ; ALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24] 267 ; ALL-NEXT: vpermt2d (%rdi), %zmm1, %zmm0 268 ; ALL-NEXT: retq 269 %c = load <16 x i32>, <16 x i32>* %b 270 %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 271 ret <16 x i32> %d 272 } 273 274 define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) { 275 ; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u: 276 ; ALL: # %bb.0: 277 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 278 ; ALL-NEXT: retq 279 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 280 ret <16 x i32> %c 281 } 282 283 ;FIXME: can do better with vpcompress 284 define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) { 285 ; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15: 286 ; ALL: # %bb.0: 287 ; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 288 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 289 ; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 290 ; ALL-NEXT: retq 291 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 292 ret <8 x i32> %res 293 } 294 295 ;FIXME: can do better with vpcompress 296 define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) { 297 ; ALL-LABEL: test_v16i32_0_1_2_12: 298 ; ALL: # %bb.0: 299 ; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 300 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 301 ; ALL-NEXT: vbroadcastss %xmm1, %xmm1 302 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 303 ; ALL-NEXT: vzeroupper 304 ; ALL-NEXT: retq 305 %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 12> 306 ret <4 x i32> %res 307 } 308 309 define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { 310 ; ALL-LABEL: shuffle_v16f32_extract_256: 311 ; ALL: # %bb.0: 312 ; ALL-NEXT: vmovups 32(%rsi), %ymm0 313 ; ALL-NEXT: retq 314 %ptr_a = bitcast float* %a to <16 x float>* 315 %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4 316 %v2 = shufflevector <16 x float> %v_a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 317 ret <8 x float> %v2 318 } 319 320 ;FIXME: can do better with vcompressp 321 define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) { 322 ; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10: 323 ; ALL: # %bb.0: 324 ; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 325 ; ALL-NEXT: vmovsldup {{.*#+}} xmm1 = xmm1[0,0,2,2] 326 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 327 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,u] 328 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7] 329 ; ALL-NEXT: retq 330 %res = shufflevector <16 x float> %v, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 7, i32 10> 331 ret <8 x float> %res 332 } 333 334 ;FIXME: can do better with vcompressp 335 define <4 x float> @test_v16f32_0_1_3_6 (<16 x float> %v) { 336 ; ALL-LABEL: test_v16f32_0_1_3_6: 337 ; ALL: # %bb.0: 338 ; ALL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,3,3] 339 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 340 ; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 341 ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 342 ; ALL-NEXT: vzeroupper 343 ; ALL-NEXT: retq 344 %res = shufflevector <16 x float> %v, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 6> 345 ret <4 x float> %res 346 } 347 348 define <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b) { 349 ; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12: 350 ; ALL: # %bb.0: 351 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] 352 ; ALL-NEXT: retq 353 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> 354 ret <16 x i32> %c 355 } 356 357 define <16 x i32> @shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12(<16 x i32> %a, <16 x i32> %b) { 358 ; ALL-LABEL: shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12: 359 ; ALL: # %bb.0: 360 ; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13] 361 ; ALL-NEXT: retq 362 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 363 ret <16 x i32> %c 364 } 365 366 define <16 x float> @shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c(<16 x float> %a, <16 x float> %b) { 367 ; ALL-LABEL: shuffle_v16f32_00_01_10_10_04_05_14_14_08_09_18_18_0c_0d_1c_1c: 368 ; ALL: # %bb.0: 369 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,1],zmm1[0,0],zmm0[4,5],zmm1[4,4],zmm0[8,9],zmm1[8,8],zmm0[12,13],zmm1[12,12] 370 ; ALL-NEXT: retq 371 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28> 372 ret <16 x float> %shuffle 373 } 374 375 define <16 x i32> @insert_mem_and_zero_v16i32(i32* %ptr) { 376 ; ALL-LABEL: insert_mem_and_zero_v16i32: 377 ; ALL: # %bb.0: 378 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 379 ; ALL-NEXT: retq 380 %a = load i32, i32* %ptr 381 %v = insertelement <16 x i32> undef, i32 %a, i32 0 382 %shuffle = shufflevector <16 x i32> %v, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 383 ret <16 x i32> %shuffle 384 } 385 386 387 define <16 x i32> @shuffle_v16i32_0zzzzzzzzzzzzzzz(<16 x i32> %a) { 388 ; ALL-LABEL: shuffle_v16i32_0zzzzzzzzzzzzzzz: 389 ; ALL: # %bb.0: 390 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 391 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 392 ; ALL-NEXT: retq 393 %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 394 ret <16 x i32> %shuffle 395 } 396 397 define <16 x float> @shuffle_v16f32_0zzzzzzzzzzzzzzz(<16 x float> %a) { 398 ; ALL-LABEL: shuffle_v16f32_0zzzzzzzzzzzzzzz: 399 ; ALL: # %bb.0: 400 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 401 ; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 402 ; ALL-NEXT: retq 403 %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 404 ret <16 x float> %shuffle 405 } 406 407 define <16 x i32> @shuffle_v16i32_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_23_zz(<16 x i32> %a) { 408 ; ALL-LABEL: shuffle_v16i32_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_23_zz: 409 ; ALL: # %bb.0: 410 ; ALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 411 ; ALL-NEXT: retq 412 %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0> 413 ret <16 x i32> %shuffle 414 } 415 416 define <16 x i32> @shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i32> %a, <16 x i32> %b) { 417 ; ALL-LABEL: shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 418 ; ALL: # %bb.0: 419 ; ALL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0] 420 ; ALL-NEXT: retq 421 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 422 ret <16 x i32> %shuffle 423 } 424 425 define <16 x i32> @shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i32> %a) { 426 ; ALL-LABEL: shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 427 ; ALL: # %bb.0: 428 ; ALL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 429 ; ALL-NEXT: retq 430 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0> 431 ret <16 x i32> %shuffle 432 } 433 434 define <16 x i32> @shuffle_v16i32_00_03_16_19_04_07_20_23_08_11_24_27_12_15_28_31(<16 x i32> %a, <16 x i32> %b) { 435 ; ALL-LABEL: shuffle_v16i32_00_03_16_19_04_07_20_23_08_11_24_27_12_15_28_31: 436 ; ALL: # %bb.0: 437 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,3],zmm0[4,7],zmm1[4,7],zmm0[8,11],zmm1[8,11],zmm0[12,15],zmm1[12,15] 438 ; ALL-NEXT: retq 439 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 3, i32 16, i32 19, i32 4, i32 7, i32 20, i32 23, i32 8, i32 11, i32 24, i32 27, i32 12, i32 15, i32 28, i32 31> 440 ret <16 x i32> %shuffle 441 } 442 443 define <16 x i32> @shuffle_v16i32_16_16_02_03_20_20_06_07_24_24_10_11_28_28_uu_uu(<16 x i32> %a, <16 x i32> %b) { 444 ; ALL-LABEL: shuffle_v16i32_16_16_02_03_20_20_06_07_24_24_10_11_28_28_uu_uu: 445 ; ALL: # %bb.0: 446 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm1[0,0],zmm0[2,3],zmm1[4,4],zmm0[6,7],zmm1[8,8],zmm0[10,11],zmm1[12,12],zmm0[14,15] 447 ; ALL-NEXT: retq 448 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 16, i32 16, i32 02, i32 03, i32 20, i32 20, i32 06, i32 07, i32 24, i32 24, i32 10, i32 11, i32 28, i32 28, i32 undef, i32 undef> 449 ret <16 x i32> %shuffle 450 } 451 452 define <16 x i32> @shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12(<16 x i32> %a, <16 x i32> %b) { 453 ; ALL-LABEL: shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12: 454 ; ALL: # %bb.0: 455 ; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[1,0],zmm1[5,4],zmm0[5,4],zmm1[9,8],zmm0[9,8],zmm1[13,12],zmm0[13,12] 456 ; ALL-NEXT: retq 457 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 17, i32 16, i32 01, i32 00, i32 21, i32 20, i32 05, i32 04, i32 25, i32 24, i32 09, i32 08, i32 29, i32 28, i32 13, i32 12> 458 ret <16 x i32> %shuffle 459 } 460 461 define <16 x float> @shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<8 x float> %a) { 462 ; ALL-LABEL: shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: 463 ; ALL: # %bb.0: 464 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 465 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0 466 ; ALL-NEXT: retq 467 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 468 ret <16 x float> %shuffle 469 } 470 471 define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, <16 x i32> %passthru, i16 %mask) { 472 ; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 473 ; AVX512F: # %bb.0: 474 ; AVX512F-NEXT: kmovw %edi, %k1 475 ; AVX512F-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 476 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 477 ; AVX512F-NEXT: retq 478 ; 479 ; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 480 ; AVX512BW: # %bb.0: 481 ; AVX512BW-NEXT: kmovd %edi, %k1 482 ; AVX512BW-NEXT: valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 483 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 484 ; AVX512BW-NEXT: retq 485 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1> 486 %mask.cast = bitcast i16 %mask to <16 x i1> 487 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 488 ret <16 x i32> %res 489 } 490 491 define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { 492 ; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 493 ; AVX512F: # %bb.0: 494 ; AVX512F-NEXT: kmovw %edi, %k1 495 ; AVX512F-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 496 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 497 ; AVX512F-NEXT: retq 498 ; 499 ; AVX512BW-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 500 ; AVX512BW: # %bb.0: 501 ; AVX512BW-NEXT: kmovd %edi, %k1 502 ; AVX512BW-NEXT: valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 503 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 504 ; AVX512BW-NEXT: retq 505 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 506 %mask.cast = bitcast i16 %mask to <16 x i1> 507 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 508 ret <16 x i32> %res 509 } 510 511 define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, i16 %mask) { 512 ; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 513 ; AVX512F: # %bb.0: 514 ; AVX512F-NEXT: kmovw %edi, %k1 515 ; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 516 ; AVX512F-NEXT: retq 517 ; 518 ; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: 519 ; AVX512BW: # %bb.0: 520 ; AVX512BW-NEXT: kmovd %edi, %k1 521 ; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 522 ; AVX512BW-NEXT: retq 523 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1> 524 %mask.cast = bitcast i16 %mask to <16 x i1> 525 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer 526 ret <16 x i32> %res 527 } 528 529 define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 530 ; AVX512F-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 531 ; AVX512F: # %bb.0: 532 ; AVX512F-NEXT: kmovw %edi, %k1 533 ; AVX512F-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 534 ; AVX512F-NEXT: retq 535 ; 536 ; AVX512BW-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17: 537 ; AVX512BW: # %bb.0: 538 ; AVX512BW-NEXT: kmovd %edi, %k1 539 ; AVX512BW-NEXT: valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1] 540 ; AVX512BW-NEXT: retq 541 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> 542 %mask.cast = bitcast i16 %mask to <16 x i1> 543 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> zeroinitializer 544 ret <16 x i32> %res 545 } 546 547 define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind { 548 ; ALL-LABEL: test_vshuff32x4_512: 549 ; ALL: # %bb.0: 550 ; ALL-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 551 ; ALL-NEXT: retq 552 %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 553 ret <16 x float> %res 554 } 555 556 define <16 x i32> @test_vshufi32x4_512(<16 x i32> %x, <16 x i32> %x1) nounwind { 557 ; ALL-LABEL: test_vshufi32x4_512: 558 ; ALL: # %bb.0: 559 ; ALL-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 560 ; ALL-NEXT: retq 561 %res = shufflevector <16 x i32> %x, <16 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 562 ret <16 x i32> %res 563 } 564 565 define <16 x float> @test_vshuff32x4_512_mask(<16 x float> %x, <16 x float> %x1, <16 x float> %y, <16 x i1> %mask) nounwind { 566 ; AVX512F-LABEL: test_vshuff32x4_512_mask: 567 ; AVX512F: # %bb.0: 568 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 569 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 570 ; AVX512F-NEXT: vpmovd2m %zmm3, %k1 571 ; AVX512F-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 572 ; AVX512F-NEXT: vmovaps %zmm2, %zmm0 573 ; AVX512F-NEXT: retq 574 ; 575 ; AVX512BW-LABEL: test_vshuff32x4_512_mask: 576 ; AVX512BW: # %bb.0: 577 ; AVX512BW-NEXT: vpsllw $7, %xmm3, %xmm3 578 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1 579 ; AVX512BW-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 580 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 581 ; AVX512BW-NEXT: retq 582 %x2 = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 583 %res = select <16 x i1> %mask, <16 x float> %x2, <16 x float> %y 584 ret <16 x float> %res 585 } 586 587 define <16 x i32> @test_vshufi32x4_512_mask(<16 x i32> %x, <16 x i32> %x1, <16 x i32> %y, <16 x i1> %mask) nounwind { 588 ; AVX512F-LABEL: test_vshufi32x4_512_mask: 589 ; AVX512F: # %bb.0: 590 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 591 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 592 ; AVX512F-NEXT: vpmovd2m %zmm3, %k1 593 ; AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 594 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 595 ; AVX512F-NEXT: retq 596 ; 597 ; AVX512BW-LABEL: test_vshufi32x4_512_mask: 598 ; AVX512BW: # %bb.0: 599 ; AVX512BW-NEXT: vpsllw $7, %xmm3, %xmm3 600 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1 601 ; AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 602 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 603 ; AVX512BW-NEXT: retq 604 %x2 = shufflevector <16 x i32> %x, <16 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19> 605 %res = select <16 x i1> %mask, <16 x i32> %x2, <16 x i32> %y 606 ret <16 x i32> %res 607 } 608 609 define <16 x float> @mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) { 610 ; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 611 ; AVX512F: # %bb.0: 612 ; AVX512F-NEXT: kmovw %edi, %k1 613 ; AVX512F-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 614 ; AVX512F-NEXT: vmovaps %zmm2, %zmm0 615 ; AVX512F-NEXT: retq 616 ; 617 ; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 618 ; AVX512BW: # %bb.0: 619 ; AVX512BW-NEXT: kmovd %edi, %k1 620 ; AVX512BW-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 621 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 622 ; AVX512BW-NEXT: retq 623 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 624 %mask.cast = bitcast i16 %mask to <16 x i1> 625 %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru 626 ret <16 x float> %res 627 } 628 629 define <16 x float> @mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) { 630 ; AVX512F-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 631 ; AVX512F: # %bb.0: 632 ; AVX512F-NEXT: kmovw %edi, %k1 633 ; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 634 ; AVX512F-NEXT: vmovaps %zmm2, %zmm0 635 ; AVX512F-NEXT: retq 636 ; 637 ; AVX512BW-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 638 ; AVX512BW: # %bb.0: 639 ; AVX512BW-NEXT: kmovd %edi, %k1 640 ; AVX512BW-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 641 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 642 ; AVX512BW-NEXT: retq 643 %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 644 %mask.cast = bitcast i16 %mask to <16 x i1> 645 %res = select <16 x i1> %mask.cast, <16 x float> %shuffle, <16 x float> %passthru 646 ret <16 x float> %res 647 } 648 649 define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { 650 ; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 651 ; AVX512F: # %bb.0: 652 ; AVX512F-NEXT: kmovw %edi, %k1 653 ; AVX512F-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 654 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 655 ; AVX512F-NEXT: retq 656 ; 657 ; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21_22_23: 658 ; AVX512BW: # %bb.0: 659 ; AVX512BW-NEXT: kmovd %edi, %k1 660 ; AVX512BW-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 661 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 662 ; AVX512BW-NEXT: retq 663 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 664 %mask.cast = bitcast i16 %mask to <16 x i1> 665 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 666 ret <16 x i32> %res 667 } 668 669 define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) { 670 ; AVX512F-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 671 ; AVX512F: # %bb.0: 672 ; AVX512F-NEXT: kmovw %edi, %k1 673 ; AVX512F-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 674 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 675 ; AVX512F-NEXT: retq 676 ; 677 ; AVX512BW-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15: 678 ; AVX512BW: # %bb.0: 679 ; AVX512BW-NEXT: kmovd %edi, %k1 680 ; AVX512BW-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 681 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 682 ; AVX512BW-NEXT: retq 683 %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 684 %mask.cast = bitcast i16 %mask to <16 x i1> 685 %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru 686 ret <16 x i32> %res 687 } 688 689 define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x i32> %a) { 690 ; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03: 691 ; ALL: # %bb.0: 692 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 693 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 694 ; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 695 ; ALL-NEXT: retq 696 %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 697 ret <16 x i32> %res 698 } 699 700 define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x float> %a) { 701 ; ALL-LABEL: mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03: 702 ; ALL: # %bb.0: 703 ; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 704 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 705 ; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 706 ; ALL-NEXT: retq 707 %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 708 ret <16 x float> %res 709 } 710