1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 3 4 target triple = "x86_64-unknown-unknown" 5 6 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 7 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 8 ; AVX1: # BB#0: 9 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 11 ; AVX1-NEXT: retq 12 ; 13 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 14 ; AVX2: # BB#0: 15 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 16 ; AVX2-NEXT: retq 17 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 18 ret <16 x i16> %shuffle 19 } 20 21 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 22 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 23 ; AVX1: # BB#0: 24 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 25 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 26 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4] 27 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4] 28 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 29 ; AVX1-NEXT: retq 30 ; 31 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 32 ; AVX2: # BB#0: 33 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 34 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 35 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 36 ; AVX2-NEXT: retq 37 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 38 ret <16 x i16> %shuffle 39 } 40 41 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 42 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 43 ; AVX1: # BB#0: 44 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 45 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 46 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 47 ; AVX1-NEXT: retq 48 ; 49 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 50 ; AVX2: # BB#0: 51 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 52 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 53 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 54 ; AVX2-NEXT: retq 55 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 56 ret <16 x i16> %shuffle 57 } 58 59 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 60 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 61 ; AVX1: # BB#0: 62 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 63 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 64 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 65 ; AVX1-NEXT: retq 66 ; 67 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 68 ; AVX2: # BB#0: 69 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 70 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 71 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 72 ; AVX2-NEXT: retq 73 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 74 ret <16 x i16> %shuffle 75 } 76 77 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 78 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 79 ; AVX1: # BB#0: 80 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 81 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 82 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 83 ; AVX1-NEXT: retq 84 ; 85 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 86 ; AVX2: # BB#0: 87 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 88 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 89 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 90 ; AVX2-NEXT: retq 91 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 92 ret <16 x i16> %shuffle 93 } 94 95 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 96 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 97 ; AVX1: # BB#0: 98 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 99 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 100 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 101 ; AVX1-NEXT: retq 102 ; 103 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 104 ; AVX2: # BB#0: 105 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 106 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 107 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 108 ; AVX2-NEXT: retq 109 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 110 ret <16 x i16> %shuffle 111 } 112 113 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 114 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 115 ; AVX1: # BB#0: 116 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 117 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 118 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 119 ; AVX1-NEXT: retq 120 ; 121 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 122 ; AVX2: # BB#0: 123 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 124 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 125 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 126 ; AVX2-NEXT: retq 127 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 128 ret <16 x i16> %shuffle 129 } 130 131 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 132 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 133 ; AVX1: # BB#0: 134 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 135 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 136 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 137 ; AVX1-NEXT: retq 138 ; 139 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 140 ; AVX2: # BB#0: 141 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 142 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 143 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 144 ; AVX2-NEXT: retq 145 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 146 ret <16 x i16> %shuffle 147 } 148 149 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 150 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 151 ; AVX1: # BB#0: 152 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 153 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 154 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 155 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 156 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 157 ; AVX1-NEXT: retq 158 ; 159 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 160 ; AVX2: # BB#0: 161 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 162 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 163 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 164 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 165 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 166 ; AVX2-NEXT: retq 167 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 168 ret <16 x i16> %shuffle 169 } 170 171 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 172 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 173 ; AVX1: # BB#0: 174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 175 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 176 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1] 177 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 178 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 179 ; AVX1-NEXT: retq 180 ; 181 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 182 ; AVX2: # BB#0: 183 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 184 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 185 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 186 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 187 ; AVX2-NEXT: retq 188 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 189 ret <16 x i16> %shuffle 190 } 191 192 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 193 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 194 ; AVX1: # BB#0: 195 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 196 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 197 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 198 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 199 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 200 ; AVX1-NEXT: retq 201 ; 202 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 203 ; AVX2: # BB#0: 204 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 205 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 206 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 207 ; AVX2-NEXT: retq 208 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 209 ret <16 x i16> %shuffle 210 } 211 212 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 213 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 214 ; AVX1: # BB#0: 215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 216 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 217 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] 218 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 219 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 220 ; AVX1-NEXT: retq 221 ; 222 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 223 ; AVX2: # BB#0: 224 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 225 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 226 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 227 ; AVX2-NEXT: retq 228 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 229 ret <16 x i16> %shuffle 230 } 231 232 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 233 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 234 ; AVX1: # BB#0: 235 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 236 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 237 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 238 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 239 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 240 ; AVX1-NEXT: retq 241 ; 242 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 243 ; AVX2: # BB#0: 244 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 245 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 246 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 247 ; AVX2-NEXT: retq 248 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 249 ret <16 x i16> %shuffle 250 } 251 252 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 253 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 254 ; AVX1: # BB#0: 255 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 256 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 258 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 259 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 260 ; AVX1-NEXT: retq 261 ; 262 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 263 ; AVX2: # BB#0: 264 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 265 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 266 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 267 ; AVX2-NEXT: retq 268 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 269 ret <16 x i16> %shuffle 270 } 271 272 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 273 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 274 ; AVX1: # BB#0: 275 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 276 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 277 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 278 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 279 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 280 ; AVX1-NEXT: retq 281 ; 282 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 283 ; AVX2: # BB#0: 284 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 285 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 286 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 287 ; AVX2-NEXT: retq 288 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 289 ret <16 x i16> %shuffle 290 } 291 292 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 293 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 294 ; AVX1: # BB#0: 295 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 296 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 297 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 298 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 299 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 300 ; AVX1-NEXT: retq 301 ; 302 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 303 ; AVX2: # BB#0: 304 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 305 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 306 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 307 ; AVX2-NEXT: retq 308 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 309 ret <16 x i16> %shuffle 310 } 311 312 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 313 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 314 ; AVX1: # BB#0: 315 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 316 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 317 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 318 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 319 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 320 ; AVX1-NEXT: retq 321 ; 322 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 323 ; AVX2: # BB#0: 324 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 325 ; AVX2-NEXT: retq 326 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 327 ret <16 x i16> %shuffle 328 } 329 330 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 331 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 332 ; AVX1: # BB#0: 333 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 334 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15] 335 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 336 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 337 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 338 ; AVX1-NEXT: retq 339 ; 340 ; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 341 ; AVX2: # BB#0: 342 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31] 343 ; AVX2-NEXT: retq 344 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 345 ret <16 x i16> %shuffle 346 } 347 348 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 349 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 350 ; AVX1: # BB#0: 351 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 352 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 354 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 355 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 356 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 357 ; AVX1-NEXT: retq 358 ; 359 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 360 ; AVX2: # BB#0: 361 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 362 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 363 ; AVX2-NEXT: retq 364 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 365 ret <16 x i16> %shuffle 366 } 367 368 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 369 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 370 ; AVX1: # BB#0: 371 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] 372 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7] 373 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 374 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 375 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 376 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 377 ; AVX1-NEXT: retq 378 ; 379 ; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 380 ; AVX2: # BB#0: 381 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15] 382 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 383 ; AVX2-NEXT: retq 384 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15> 385 ret <16 x i16> %shuffle 386 } 387 388 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) { 389 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 390 ; AVX1: # BB#0: 391 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 392 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 393 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 394 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 395 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 396 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 397 ; AVX1-NEXT: retq 398 ; 399 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 400 ; AVX2: # BB#0: 401 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15] 402 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14] 403 ; AVX2-NEXT: retq 404 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 405 ret <16 x i16> %shuffle 406 } 407 408 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) { 409 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 410 ; AVX1: # BB#0: 411 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7] 412 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7] 413 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 414 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7] 415 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] 416 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 417 ; AVX1-NEXT: retq 418 ; 419 ; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 420 ; AVX2: # BB#0: 421 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15] 422 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15] 423 ; AVX2-NEXT: retq 424 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 425 ret <16 x i16> %shuffle 426 } 427 428 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 429 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 430 ; AVX1: # BB#0: 431 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 432 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 433 ; AVX1-NEXT: retq 434 ; 435 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 436 ; AVX2: # BB#0: 437 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 438 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 439 ; AVX2-NEXT: retq 440 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 441 ret <16 x i16> %shuffle 442 } 443 444 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 445 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 446 ; AVX1: # BB#0: 447 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 448 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 449 ; AVX1-NEXT: retq 450 ; 451 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 452 ; AVX2: # BB#0: 453 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 454 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 455 ; AVX2-NEXT: retq 456 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 457 ret <16 x i16> %shuffle 458 } 459 460 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 461 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 462 ; AVX1: # BB#0: 463 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 464 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 465 ; AVX1-NEXT: retq 466 ; 467 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 468 ; AVX2: # BB#0: 469 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 470 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 471 ; AVX2-NEXT: retq 472 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 473 ret <16 x i16> %shuffle 474 } 475 476 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 477 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 478 ; AVX1: # BB#0: 479 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 480 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 481 ; AVX1-NEXT: retq 482 ; 483 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 484 ; AVX2: # BB#0: 485 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 486 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 487 ; AVX2-NEXT: retq 488 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 489 ret <16 x i16> %shuffle 490 } 491 492 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 493 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 494 ; AVX1: # BB#0: 495 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 496 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 497 ; AVX1-NEXT: retq 498 ; 499 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 500 ; AVX2: # BB#0: 501 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 502 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 503 ; AVX2-NEXT: retq 504 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 505 ret <16 x i16> %shuffle 506 } 507 508 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 509 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 510 ; AVX1: # BB#0: 511 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 512 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 513 ; AVX1-NEXT: retq 514 ; 515 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 516 ; AVX2: # BB#0: 517 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 518 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 519 ; AVX2-NEXT: retq 520 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 521 ret <16 x i16> %shuffle 522 } 523 524 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 525 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 526 ; AVX1: # BB#0: 527 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 528 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 529 ; AVX1-NEXT: retq 530 ; 531 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 532 ; AVX2: # BB#0: 533 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 534 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 535 ; AVX2-NEXT: retq 536 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 537 ret <16 x i16> %shuffle 538 } 539 540 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 541 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 542 ; AVX1: # BB#0: 543 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 544 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 545 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 546 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 547 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 548 ; AVX1-NEXT: retq 549 ; 550 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 551 ; AVX2: # BB#0: 552 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 553 ; AVX2-NEXT: retq 554 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 555 ret <16 x i16> %shuffle 556 } 557 558 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 559 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 560 ; AVX1: # BB#0: 561 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 562 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 563 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 564 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] 565 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 566 ; AVX1-NEXT: retq 567 ; 568 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 569 ; AVX2: # BB#0: 570 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 571 ; AVX2-NEXT: retq 572 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 573 ret <16 x i16> %shuffle 574 } 575 576 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 577 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31: 578 ; AVX1: # BB#0: 579 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 580 ; AVX1-NEXT: retq 581 ; 582 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31: 583 ; AVX2: # BB#0: 584 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 585 ; AVX2-NEXT: retq 586 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 587 ret <16 x i16> %shuffle 588 } 589 590 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 591 ; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: 592 ; AVX1: # BB#0: 593 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] 594 ; AVX1-NEXT: retq 595 ; 596 ; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: 597 ; AVX2: # BB#0: 598 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 599 ; AVX2-NEXT: retq 600 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15> 601 ret <16 x i16> %shuffle 602 } 603 604 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) { 605 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 606 ; AVX1: # BB#0: 607 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 608 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 609 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7] 610 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 611 ; AVX1-NEXT: retq 612 ; 613 ; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 614 ; AVX2: # BB#0: 615 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] 616 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 617 ; AVX2-NEXT: retq 618 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31> 619 ret <16 x i16> %shuffle 620 } 621 622 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 623 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 624 ; AVX1: # BB#0: 625 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] 626 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 627 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 628 ; AVX1-NEXT: retq 629 ; 630 ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 631 ; AVX2: # BB#0: 632 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 633 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 634 ; AVX2-NEXT: retq 635 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 636 ret <16 x i16> %shuffle 637 } 638 639 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 640 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 641 ; AVX1: # BB#0: 642 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 643 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 644 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 645 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 646 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 647 ; AVX1-NEXT: retq 648 ; 649 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 650 ; AVX2: # BB#0: 651 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] 652 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 653 ; AVX2-NEXT: retq 654 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 655 ret <16 x i16> %shuffle 656 } 657 658 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 659 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 660 ; AVX1: # BB#0: 661 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 662 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 663 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 664 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] 665 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 666 ; AVX1-NEXT: retq 667 ; 668 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 669 ; AVX2: # BB#0: 670 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0] 671 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 672 ; AVX2-NEXT: retq 673 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 674 ret <16 x i16> %shuffle 675 } 676 677 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 678 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31: 679 ; AVX1: # BB#0: 680 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7] 681 ; AVX1-NEXT: retq 682 ; 683 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31: 684 ; AVX2: # BB#0: 685 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7] 686 ; AVX2-NEXT: retq 687 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 688 ret <16 x i16> %shuffle 689 } 690 691 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) { 692 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 693 ; AVX1: # BB#0: 694 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 695 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 696 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 697 ; AVX1-NEXT: retq 698 ; 699 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 700 ; AVX2: # BB#0: 701 ; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 702 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 703 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 704 ; AVX2-NEXT: retq 705 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16> 706 ret <16 x i16> %shuffle 707 } 708 709 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) { 710 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 711 ; AVX1: # BB#0: 712 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 713 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 714 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 715 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 716 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 717 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 718 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 719 ; AVX1-NEXT: retq 720 ; 721 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 722 ; AVX2: # BB#0: 723 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17] 724 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 725 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 726 ; AVX2-NEXT: retq 727 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24> 728 ret <16 x i16> %shuffle 729 } 730 731 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 732 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 733 ; AVX1: # BB#0: 734 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 735 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 736 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 737 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7] 738 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 739 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 740 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 741 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 742 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 743 ; AVX1-NEXT: retq 744 ; 745 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 746 ; AVX2: # BB#0: 747 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 748 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 749 ; AVX2-NEXT: retq 750 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15> 751 ret <16 x i16> %shuffle 752 } 753 754 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) { 755 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 756 ; AVX1: # BB#0: 757 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 758 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 759 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 760 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] 761 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] 762 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 763 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 764 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 765 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 766 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 767 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 768 ; AVX1-NEXT: retq 769 ; 770 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 771 ; AVX2: # BB#0: 772 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 773 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 774 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] 775 ; AVX2-NEXT: retq 776 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12> 777 ret <16 x i16> %shuffle 778 } 779 780 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) { 781 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 782 ; AVX1: # BB#0: 783 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 784 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 785 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 786 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3] 787 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 788 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 789 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 791 ; AVX1-NEXT: retq 792 ; 793 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 794 ; AVX2: # BB#0: 795 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 796 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 797 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] 798 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 799 ; AVX2-NEXT: retq 800 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8> 801 ret <16 x i16> %shuffle 802 } 803 804 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) { 805 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 806 ; AVX1: # BB#0: 807 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 808 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 809 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 810 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 811 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 812 ; AVX1-NEXT: retq 813 ; 814 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 815 ; AVX2: # BB#0: 816 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17] 817 ; AVX2-NEXT: retq 818 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8> 819 ret <16 x i16> %shuffle 820 } 821 822 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) { 823 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 824 ; AVX1: # BB#0: 825 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 826 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 827 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 828 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 829 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 830 ; AVX1-NEXT: retq 831 ; 832 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 833 ; AVX2: # BB#0: 834 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17] 835 ; AVX2-NEXT: retq 836 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8> 837 ret <16 x i16> %shuffle 838 } 839 840 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) { 841 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 842 ; AVX1: # BB#0: 843 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 844 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 845 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 846 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 847 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 848 ; AVX1-NEXT: retq 849 ; 850 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 851 ; AVX2: # BB#0: 852 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17] 853 ; AVX2-NEXT: retq 854 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8> 855 ret <16 x i16> %shuffle 856 } 857 858 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 859 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 860 ; AVX1: # BB#0: 861 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 862 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 863 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 864 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 865 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 866 ; AVX1-NEXT: retq 867 ; 868 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 869 ; AVX2: # BB#0: 870 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17] 871 ; AVX2-NEXT: retq 872 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8> 873 ret <16 x i16> %shuffle 874 } 875 876 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 877 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 878 ; AVX1: # BB#0: 879 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 880 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 881 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 882 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 883 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 884 ; AVX1-NEXT: retq 885 ; 886 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 887 ; AVX2: # BB#0: 888 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17] 889 ; AVX2-NEXT: retq 890 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8> 891 ret <16 x i16> %shuffle 892 } 893 894 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 895 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 896 ; AVX1: # BB#0: 897 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 898 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 899 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 900 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 901 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 902 ; AVX1-NEXT: retq 903 ; 904 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 905 ; AVX2: # BB#0: 906 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17] 907 ; AVX2-NEXT: retq 908 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 909 ret <16 x i16> %shuffle 910 } 911 912 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 913 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 914 ; AVX1: # BB#0: 915 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 916 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 917 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 918 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 919 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 920 ; AVX1-NEXT: retq 921 ; 922 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 923 ; AVX2: # BB#0: 924 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 925 ; AVX2-NEXT: retq 926 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 927 ret <16 x i16> %shuffle 928 } 929 930 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 931 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 932 ; AVX1: # BB#0: 933 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 934 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 935 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 936 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 937 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 938 ; AVX1-NEXT: retq 939 ; 940 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 941 ; AVX2: # BB#0: 942 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 943 ; AVX2-NEXT: retq 944 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 945 ret <16 x i16> %shuffle 946 } 947 948 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 949 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 950 ; AVX1: # BB#0: 951 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 952 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 953 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 954 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 955 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 956 ; AVX1-NEXT: retq 957 ; 958 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 959 ; AVX2: # BB#0: 960 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 961 ; AVX2-NEXT: retq 962 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 963 ret <16 x i16> %shuffle 964 } 965 966 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 967 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 968 ; AVX1: # BB#0: 969 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 970 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 971 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 972 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 973 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 974 ; AVX1-NEXT: retq 975 ; 976 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 977 ; AVX2: # BB#0: 978 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31] 979 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u] 980 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 981 ; AVX2-NEXT: retq 982 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 983 ret <16 x i16> %shuffle 984 } 985 986 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 987 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 988 ; AVX1: # BB#0: 989 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 990 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 991 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 992 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 993 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 994 ; AVX1-NEXT: retq 995 ; 996 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 997 ; AVX2: # BB#0: 998 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] 999 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u] 1000 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1001 ; AVX2-NEXT: retq 1002 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 1003 ret <16 x i16> %shuffle 1004 } 1005 1006 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1007 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1008 ; AVX1: # BB#0: 1009 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 1010 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1011 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1] 1012 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1013 ; AVX1-NEXT: retq 1014 ; 1015 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1016 ; AVX2: # BB#0: 1017 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17] 1018 ; AVX2-NEXT: retq 1019 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1020 ret <16 x i16> %shuffle 1021 } 1022 1023 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1024 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1025 ; AVX1: # BB#0: 1026 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 1027 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1028 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1] 1029 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1030 ; AVX1-NEXT: retq 1031 ; 1032 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1033 ; AVX2: # BB#0: 1034 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17] 1035 ; AVX2-NEXT: retq 1036 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8> 1037 ret <16 x i16> %shuffle 1038 } 1039 1040 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1041 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1042 ; AVX1: # BB#0: 1043 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 1044 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1045 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1] 1046 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1047 ; AVX1-NEXT: retq 1048 ; 1049 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1050 ; AVX2: # BB#0: 1051 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17] 1052 ; AVX2-NEXT: retq 1053 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8> 1054 ret <16 x i16> %shuffle 1055 } 1056 1057 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1058 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1059 ; AVX1: # BB#0: 1060 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 1061 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1062 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1] 1063 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1064 ; AVX1-NEXT: retq 1065 ; 1066 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1067 ; AVX2: # BB#0: 1068 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17] 1069 ; AVX2-NEXT: retq 1070 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8> 1071 ret <16 x i16> %shuffle 1072 } 1073 1074 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) { 1075 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1076 ; AVX1: # BB#0: 1077 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 1078 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1079 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 1080 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1081 ; AVX1-NEXT: retq 1082 ; 1083 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1084 ; AVX2: # BB#0: 1085 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17] 1086 ; AVX2-NEXT: retq 1087 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8> 1088 ret <16 x i16> %shuffle 1089 } 1090 1091 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1092 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1093 ; AVX1: # BB#0: 1094 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 1095 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1096 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1] 1097 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1098 ; AVX1-NEXT: retq 1099 ; 1100 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1101 ; AVX2: # BB#0: 1102 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17] 1103 ; AVX2-NEXT: retq 1104 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8> 1105 ret <16 x i16> %shuffle 1106 } 1107 1108 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1109 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1110 ; AVX1: # BB#0: 1111 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1113 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15] 1114 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1115 ; AVX1-NEXT: retq 1116 ; 1117 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1118 ; AVX2: # BB#0: 1119 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31] 1120 ; AVX2-NEXT: retq 1121 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15> 1122 ret <16 x i16> %shuffle 1123 } 1124 1125 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1126 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1127 ; AVX1: # BB#0: 1128 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 1129 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1131 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1] 1132 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1133 ; AVX1-NEXT: retq 1134 ; 1135 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1136 ; AVX2: # BB#0: 1137 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17] 1138 ; AVX2-NEXT: retq 1139 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8> 1140 ret <16 x i16> %shuffle 1141 } 1142 1143 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1144 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1145 ; AVX1: # BB#0: 1146 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1147 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1148 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1149 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1150 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1151 ; AVX1-NEXT: retq 1152 ; 1153 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1154 ; AVX2: # BB#0: 1155 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1156 ; AVX2-NEXT: retq 1157 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 1158 ret <16 x i16> %shuffle 1159 } 1160 1161 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1162 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1163 ; AVX1: # BB#0: 1164 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1] 1165 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1166 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1] 1167 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1168 ; AVX1-NEXT: retq 1169 ; 1170 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1171 ; AVX2: # BB#0: 1172 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17] 1173 ; AVX2-NEXT: retq 1174 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8> 1175 ret <16 x i16> %shuffle 1176 } 1177 1178 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1179 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1180 ; AVX1: # BB#0: 1181 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1] 1182 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1183 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15] 1184 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1185 ; AVX1-NEXT: retq 1186 ; 1187 ; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1188 ; AVX2: # BB#0: 1189 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31] 1190 ; AVX2-NEXT: retq 1191 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15> 1192 ret <16 x i16> %shuffle 1193 } 1194 1195 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1196 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1197 ; AVX1: # BB#0: 1198 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7] 1199 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1200 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1201 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1] 1202 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1203 ; AVX1-NEXT: retq 1204 ; 1205 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1206 ; AVX2: # BB#0: 1207 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17] 1208 ; AVX2-NEXT: retq 1209 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8> 1210 ret <16 x i16> %shuffle 1211 } 1212 1213 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1214 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1215 ; AVX1: # BB#0: 1216 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3] 1217 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1218 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1219 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7] 1220 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1221 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1222 ; AVX1-NEXT: retq 1223 ; 1224 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1225 ; AVX2: # BB#0: 1226 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25] 1227 ; AVX2-NEXT: retq 1228 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12> 1229 ret <16 x i16> %shuffle 1230 } 1231 1232 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1233 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1234 ; AVX1: # BB#0: 1235 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1236 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1237 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1238 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1239 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1240 ; AVX1-NEXT: retq 1241 ; 1242 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1243 ; AVX2: # BB#0: 1244 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1245 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1246 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1247 ; AVX2-NEXT: retq 1248 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1249 ret <16 x i16> %shuffle 1250 } 1251 1252 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1253 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1254 ; AVX1: # BB#0: 1255 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1256 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1257 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1258 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1259 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1260 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1261 ; AVX1-NEXT: retq 1262 ; 1263 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1264 ; AVX2: # BB#0: 1265 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1266 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1267 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1268 ; AVX2-NEXT: retq 1269 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1270 ret <16 x i16> %shuffle 1271 } 1272 1273 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1274 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1275 ; AVX1: # BB#0: 1276 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1277 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1278 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1279 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1280 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1281 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1282 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1283 ; AVX1-NEXT: retq 1284 ; 1285 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1286 ; AVX2: # BB#0: 1287 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1288 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1289 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1290 ; AVX2-NEXT: retq 1291 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1292 ret <16 x i16> %shuffle 1293 } 1294 1295 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1296 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1297 ; AVX1: # BB#0: 1298 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1299 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1300 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1301 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1302 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1303 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1304 ; AVX1-NEXT: retq 1305 ; 1306 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1307 ; AVX2: # BB#0: 1308 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1309 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1310 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1311 ; AVX2-NEXT: retq 1312 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1313 ret <16 x i16> %shuffle 1314 } 1315 1316 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) { 1317 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1318 ; AVX1: # BB#0: 1319 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1320 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1321 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1322 ; AVX1-NEXT: retq 1323 ; 1324 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1325 ; AVX2: # BB#0: 1326 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1327 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1328 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1329 ; AVX2-NEXT: retq 1330 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1331 ret <16 x i16> %shuffle 1332 } 1333 1334 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { 1335 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1336 ; AVX1: # BB#0: 1337 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1338 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1339 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1340 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1341 ; AVX1-NEXT: retq 1342 ; 1343 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1344 ; AVX2: # BB#0: 1345 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] 1346 ; AVX2-NEXT: retq 1347 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> 1348 ret <16 x i16> %shuffle 1349 } 1350 1351 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) { 1352 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1353 ; AVX1: # BB#0: 1354 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1355 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1356 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1357 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1358 ; AVX1-NEXT: retq 1359 ; 1360 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1361 ; AVX2: # BB#0: 1362 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 1363 ; AVX2-NEXT: retq 1364 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 1365 ret <16 x i16> %shuffle 1366 } 1367 1368 ; 1369 ; Shuffle to logical bit shifts 1370 ; 1371 1372 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) { 1373 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1374 ; AVX1: # BB#0: 1375 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1 1376 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1377 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm0 1378 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1379 ; AVX1-NEXT: retq 1380 ; 1381 ; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1382 ; AVX2: # BB#0: 1383 ; AVX2-NEXT: vpslld $16, %ymm0, %ymm0 1384 ; AVX2-NEXT: retq 1385 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1386 ret <16 x i16> %shuffle 1387 } 1388 1389 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) { 1390 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1391 ; AVX1: # BB#0: 1392 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1 1393 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1394 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0 1395 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1396 ; AVX1-NEXT: retq 1397 ; 1398 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1399 ; AVX2: # BB#0: 1400 ; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 1401 ; AVX2-NEXT: retq 1402 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1403 ret <16 x i16> %shuffle 1404 } 1405 1406 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) { 1407 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1408 ; AVX1: # BB#0: 1409 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1410 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1411 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 1412 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1413 ; AVX1-NEXT: retq 1414 ; 1415 ; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1416 ; AVX2: # BB#0: 1417 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 1418 ; AVX2-NEXT: retq 1419 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1420 ret <16 x i16> %shuffle 1421 } 1422 1423 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) { 1424 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1425 ; AVX1: # BB#0: 1426 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 1427 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 1428 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1429 ; AVX1-NEXT: retq 1430 ; 1431 ; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1432 ; AVX2: # BB#0: 1433 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 1434 ; AVX2-NEXT: retq 1435 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16> 1436 ret <16 x i16> %shuffle 1437 } 1438 1439 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) { 1440 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1441 ; AVX1: # BB#0: 1442 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7] 1443 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1444 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 1445 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1446 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1447 ; AVX1-NEXT: retq 1448 ; 1449 ; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1450 ; AVX2: # BB#0: 1451 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1452 ; AVX2-NEXT: retq 1453 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0> 1454 ret <16 x i16> %shuffle 1455 } 1456 1457 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) { 1458 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1459 ; AVX1: # BB#0: 1460 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1461 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1462 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1463 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1464 ; AVX1-NEXT: retq 1465 ; 1466 ; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1467 ; AVX2: # BB#0: 1468 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1469 ; AVX2-NEXT: retq 1470 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0> 1471 ret <16 x i16> %shuffle 1472 } 1473 1474 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) { 1475 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 1476 ; AVX1: # BB#0: 1477 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1478 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1479 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1480 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1481 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1482 ; AVX1-NEXT: retq 1483 ; 1484 ; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 1485 ; AVX2: # BB#0: 1486 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1487 ; AVX2-NEXT: retq 1488 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 1489 ret <16 x i16> %shuffle 1490 } 1491 1492 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) { 1493 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 1494 ; AVX1: # BB#0: 1495 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1496 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1497 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 1498 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 1499 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1500 ; AVX1-NEXT: retq 1501 ; 1502 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 1503 ; AVX2: # BB#0: 1504 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17] 1505 ; AVX2-NEXT: retq 1506 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24> 1507 ret <16 x i16> %shuffle 1508 } 1509 1510 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) { 1511 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 1512 ; AVX1: # BB#0: 1513 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1514 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1515 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 1516 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 1517 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1518 ; AVX1-NEXT: retq 1519 ; 1520 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 1521 ; AVX2: # BB#0: 1522 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 1523 ; AVX2-NEXT: retq 1524 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8> 1525 ret <16 x i16> %shuffle 1526 } 1527 1528 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) { 1529 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 1530 ; AVX1: # BB#0: 1531 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1532 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1533 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1534 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1535 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1536 ; AVX1-NEXT: retq 1537 ; 1538 ; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 1539 ; AVX2: # BB#0: 1540 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1541 ; AVX2-NEXT: retq 1542 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 1543 ret <16 x i16> %shuffle 1544 } 1545 1546 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) { 1547 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 1548 ; AVX1: # BB#0: 1549 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 1550 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 1551 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1552 ; AVX1-NEXT: retq 1553 ; 1554 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 1555 ; AVX2: # BB#0: 1556 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1557 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17] 1558 ; AVX2-NEXT: retq 1559 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16> 1560 ret <16 x i16> %shuffle 1561 } 1562 1563 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) { 1564 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 1565 ; AVX1: # BB#0: 1566 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1567 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1568 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1569 ; AVX1-NEXT: retq 1570 ; 1571 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 1572 ; AVX2: # BB#0: 1573 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1574 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1575 ; AVX2-NEXT: retq 1576 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22> 1577 ret <16 x i16> %shuffle 1578 } 1579 1580 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) { 1581 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 1582 ; AVX1: # BB#0: 1583 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1584 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1585 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] 1586 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 1587 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1588 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1589 ; AVX1-NEXT: retq 1590 ; 1591 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 1592 ; AVX2: # BB#0: 1593 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1594 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1595 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] 1596 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 1597 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1598 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1599 ; AVX2-NEXT: retq 1600 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11> 1601 ret <16 x i16> %shuffle 1602 } 1603 1604 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) { 1605 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 1606 ; AVX1: # BB#0: 1607 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1608 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 1609 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 1610 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 1611 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1612 ; AVX1-NEXT: retq 1613 ; 1614 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 1615 ; AVX2: # BB#0: 1616 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1617 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 1618 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 1619 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 1620 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1621 ; AVX2-NEXT: retq 1622 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9> 1623 ret <16 x i16> %shuffle 1624 } 1625 1626 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) { 1627 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 1628 ; AVX1: # BB#0: 1629 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1630 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1631 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7] 1632 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 1633 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] 1634 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1635 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1636 ; AVX1-NEXT: retq 1637 ; 1638 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 1639 ; AVX2: # BB#0: 1640 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1641 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1642 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1643 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1644 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1645 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1646 ; AVX2-NEXT: retq 1647 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27> 1648 ret <16 x i16> %shuffle 1649 } 1650 1651 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1652 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 1653 ; AVX1: # BB#0: 1654 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1655 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1656 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 1657 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1658 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1659 ; AVX1-NEXT: retq 1660 ; 1661 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 1662 ; AVX2: # BB#0: 1663 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1664 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1665 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 1666 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 1667 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1668 ; AVX2-NEXT: retq 1669 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1670 ret <16 x i16> %shuffle 1671 } 1672 1673 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1674 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 1675 ; AVX1: # BB#0: 1676 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1677 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1678 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1679 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] 1680 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1681 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1682 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1683 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1684 ; AVX1-NEXT: retq 1685 ; 1686 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 1687 ; AVX2: # BB#0: 1688 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1689 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 1690 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1691 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] 1692 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1693 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1694 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1695 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1696 ; AVX2-NEXT: retq 1697 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 1698 ret <16 x i16> %shuffle 1699 } 1700 1701 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) { 1702 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 1703 ; AVX1: # BB#0: 1704 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1705 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1706 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1707 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1708 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1709 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1710 ; AVX1-NEXT: retq 1711 ; 1712 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 1713 ; AVX2: # BB#0: 1714 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1715 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1716 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1717 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1718 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1719 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1720 ; AVX2-NEXT: retq 1721 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11> 1722 ret <16 x i16> %shuffle 1723 } 1724 1725 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) { 1726 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 1727 ; AVX1: # BB#0: 1728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1729 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1730 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1731 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1732 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 1733 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1734 ; AVX1-NEXT: retq 1735 ; 1736 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 1737 ; AVX2: # BB#0: 1738 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1739 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1740 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1741 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1742 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 1743 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1744 ; AVX2-NEXT: retq 1745 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15> 1746 ret <16 x i16> %shuffle 1747 } 1748 1749 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1750 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 1751 ; AVX1: # BB#0: 1752 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1753 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1754 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1755 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1756 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] 1757 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1758 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1759 ; AVX1-NEXT: retq 1760 ; 1761 ; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 1762 ; AVX2: # BB#0: 1763 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1764 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1765 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1766 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1767 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] 1768 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1769 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1770 ; AVX2-NEXT: retq 1771 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13> 1772 ret <16 x i16> %shuffle 1773 } 1774 1775 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1776 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 1777 ; AVX1: # BB#0: 1778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1779 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1780 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] 1781 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1782 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1783 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1784 ; AVX1-NEXT: retq 1785 ; 1786 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 1787 ; AVX2: # BB#0: 1788 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1789 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1790 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] 1791 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1792 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1793 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1794 ; AVX2-NEXT: retq 1795 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8> 1796 ret <16 x i16> %shuffle 1797 } 1798 1799 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1800 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 1801 ; AVX1: # BB#0: 1802 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1803 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1804 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1805 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] 1806 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1807 ; AVX1-NEXT: retq 1808 ; 1809 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 1810 ; AVX2: # BB#0: 1811 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1812 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1813 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1814 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] 1815 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1816 ; AVX2-NEXT: retq 1817 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 1818 ret <16 x i16> %shuffle 1819 } 1820 1821 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1822 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 1823 ; AVX1: # BB#0: 1824 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1825 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1826 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 1827 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1828 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] 1829 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1830 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1831 ; AVX1-NEXT: retq 1832 ; 1833 ; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 1834 ; AVX2: # BB#0: 1835 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1836 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1837 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 1838 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1839 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] 1840 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1841 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1842 ; AVX2-NEXT: retq 1843 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13> 1844 ret <16 x i16> %shuffle 1845 } 1846 1847 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) { 1848 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 1849 ; AVX1: # BB#0: 1850 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1851 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1852 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1853 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] 1854 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1855 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1856 ; AVX1-NEXT: retq 1857 ; 1858 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 1859 ; AVX2: # BB#0: 1860 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1861 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1862 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1863 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] 1864 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1865 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1866 ; AVX2-NEXT: retq 1867 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15> 1868 ret <16 x i16> %shuffle 1869 } 1870 1871 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) { 1872 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 1873 ; AVX1: # BB#0: 1874 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1875 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 1876 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 1877 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] 1878 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1879 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1880 ; AVX1-NEXT: retq 1881 ; 1882 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 1883 ; AVX2: # BB#0: 1884 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1885 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 1886 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 1887 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] 1888 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1889 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 1890 ; AVX2-NEXT: retq 1891 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8> 1892 ret <16 x i16> %shuffle 1893 } 1894 1895 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 1896 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 1897 ; AVX1: # BB#0: 1898 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1899 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1900 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] 1901 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1902 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 1903 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1904 ; AVX1-NEXT: retq 1905 ; 1906 ; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 1907 ; AVX2: # BB#0: 1908 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1909 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1910 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] 1911 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1912 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 1913 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1914 ; AVX2-NEXT: retq 1915 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8> 1916 ret <16 x i16> %shuffle 1917 } 1918 1919 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 1920 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 1921 ; AVX1: # BB#0: 1922 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1923 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1924 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] 1925 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1926 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 1927 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1928 ; AVX1-NEXT: retq 1929 ; 1930 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 1931 ; AVX2: # BB#0: 1932 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1933 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1934 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] 1935 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1936 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 1937 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1938 ; AVX2-NEXT: retq 1939 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8> 1940 ret <16 x i16> %shuffle 1941 } 1942 1943 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) { 1944 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 1945 ; AVX1: # BB#0: 1946 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1947 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1948 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] 1949 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1950 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 1951 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1952 ; AVX1-NEXT: retq 1953 ; 1954 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 1955 ; AVX2: # BB#0: 1956 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1957 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 1958 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] 1959 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1960 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 1961 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1962 ; AVX2-NEXT: retq 1963 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12> 1964 ret <16 x i16> %shuffle 1965 } 1966 1967 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) { 1968 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 1969 ; AVX1: # BB#0: 1970 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1971 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1972 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] 1973 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1974 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 1975 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1976 ; AVX1-NEXT: retq 1977 ; 1978 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 1979 ; AVX2: # BB#0: 1980 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1981 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1982 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] 1983 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1984 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 1985 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1986 ; AVX2-NEXT: retq 1987 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8> 1988 ret <16 x i16> %shuffle 1989 } 1990 1991 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) { 1992 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 1993 ; AVX1: # BB#0: 1994 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1995 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1996 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] 1997 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1998 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 1999 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2000 ; AVX1-NEXT: retq 2001 ; 2002 ; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 2003 ; AVX2: # BB#0: 2004 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2005 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2006 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] 2007 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2008 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 2009 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2010 ; AVX2-NEXT: retq 2011 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12> 2012 ret <16 x i16> %shuffle 2013 } 2014 2015 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2016 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2017 ; AVX1: # BB#0: 2018 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2019 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 2020 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2021 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2022 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2023 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2024 ; AVX1-NEXT: retq 2025 ; 2026 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2027 ; AVX2: # BB#0: 2028 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2029 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 2030 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2031 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2032 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2033 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2034 ; AVX2-NEXT: retq 2035 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11> 2036 ret <16 x i16> %shuffle 2037 } 2038 2039 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2040 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2041 ; AVX1: # BB#0: 2042 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2043 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 2044 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2045 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2046 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2047 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2048 ; AVX1-NEXT: retq 2049 ; 2050 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2051 ; AVX2: # BB#0: 2052 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2053 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 2054 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2055 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2056 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2057 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2058 ; AVX2-NEXT: retq 2059 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11> 2060 ret <16 x i16> %shuffle 2061 } 2062 2063 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) { 2064 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2065 ; AVX1: # BB#0: 2066 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2067 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 2068 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2069 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2070 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2071 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2072 ; AVX1-NEXT: retq 2073 ; 2074 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2075 ; AVX2: # BB#0: 2076 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2077 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 2078 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2079 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2080 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2081 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2082 ; AVX2-NEXT: retq 2083 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13> 2084 ret <16 x i16> %shuffle 2085 } 2086 2087 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) { 2088 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2089 ; AVX1: # BB#0: 2090 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2091 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 2092 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2093 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 2094 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2095 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2096 ; AVX1-NEXT: retq 2097 ; 2098 ; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2099 ; AVX2: # BB#0: 2100 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2101 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 2102 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2103 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 2104 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2105 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2106 ; AVX2-NEXT: retq 2107 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11> 2108 ret <16 x i16> %shuffle 2109 } 2110 2111 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2112 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2113 ; AVX1: # BB#0: 2114 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2115 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2116 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] 2117 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2118 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 2119 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2120 ; AVX1-NEXT: retq 2121 ; 2122 ; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2123 ; AVX2: # BB#0: 2124 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2125 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2126 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] 2127 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2128 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 2129 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2130 ; AVX2-NEXT: retq 2131 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2132 ret <16 x i16> %shuffle 2133 } 2134 2135 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2136 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2137 ; AVX1: # BB#0: 2138 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2139 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2140 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] 2141 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2142 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 2143 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2144 ; AVX1-NEXT: retq 2145 ; 2146 ; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2147 ; AVX2: # BB#0: 2148 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2149 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2150 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] 2151 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2152 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 2153 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2154 ; AVX2-NEXT: retq 2155 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 2156 ret <16 x i16> %shuffle 2157 } 2158 2159 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2160 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2161 ; AVX1: # BB#0: 2162 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2163 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2164 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2165 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2166 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2167 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2168 ; AVX1-NEXT: retq 2169 ; 2170 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2171 ; AVX2: # BB#0: 2172 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2173 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2174 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2175 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2176 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2177 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2178 ; AVX2-NEXT: retq 2179 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 2180 ret <16 x i16> %shuffle 2181 } 2182 2183 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 2184 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2185 ; AVX1: # BB#0: 2186 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2187 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2188 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] 2189 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2190 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 2191 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2192 ; AVX1-NEXT: retq 2193 ; 2194 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2195 ; AVX2: # BB#0: 2196 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2197 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 2198 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] 2199 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2200 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 2201 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2202 ; AVX2-NEXT: retq 2203 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8> 2204 ret <16 x i16> %shuffle 2205 } 2206 2207 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 2208 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2209 ; AVX1: # BB#0: 2210 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2211 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2212 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 2213 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2214 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2215 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] 2216 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2217 ; AVX1-NEXT: retq 2218 ; 2219 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2220 ; AVX2: # BB#0: 2221 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2222 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2223 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 2224 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2225 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2226 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] 2227 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2228 ; AVX2-NEXT: retq 2229 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15> 2230 ret <16 x i16> %shuffle 2231 } 2232 2233 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2234 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2235 ; AVX1: # BB#0: 2236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2237 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2238 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] 2239 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2240 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 2241 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2242 ; AVX1-NEXT: retq 2243 ; 2244 ; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2245 ; AVX2: # BB#0: 2246 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2247 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2248 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] 2249 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2250 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 2251 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2252 ; AVX2-NEXT: retq 2253 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2254 ret <16 x i16> %shuffle 2255 } 2256 2257 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2258 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2259 ; AVX1: # BB#0: 2260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2261 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2262 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2263 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2264 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2265 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2266 ; AVX1-NEXT: retq 2267 ; 2268 ; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2269 ; AVX2: # BB#0: 2270 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2271 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2272 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2273 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2274 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2275 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2276 ; AVX2-NEXT: retq 2277 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12> 2278 ret <16 x i16> %shuffle 2279 } 2280 2281 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2282 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 2283 ; AVX1: # BB#0: 2284 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2285 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2286 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2287 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2288 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2289 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2290 ; AVX1-NEXT: retq 2291 ; 2292 ; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 2293 ; AVX2: # BB#0: 2294 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2295 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2296 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2297 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2298 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2299 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2300 ; AVX2-NEXT: retq 2301 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 2302 ret <16 x i16> %shuffle 2303 } 2304 2305 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2306 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 2307 ; AVX1: # BB#0: 2308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2309 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 2310 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2311 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2312 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2313 ; AVX1-NEXT: retq 2314 ; 2315 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 2316 ; AVX2: # BB#0: 2317 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31] 2318 ; AVX2-NEXT: retq 2319 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 2320 ret <16 x i16> %shuffle 2321 } 2322 2323 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 2324 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 2325 ; AVX1: # BB#0: 2326 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2327 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] 2328 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2329 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 2330 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2331 ; AVX1-NEXT: retq 2332 ; 2333 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 2334 ; AVX2: # BB#0: 2335 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2336 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2 2337 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2338 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 2339 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2340 ; AVX2-NEXT: retq 2341 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11> 2342 ret <16 x i16> %shuffle 2343 } 2344 2345 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2346 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 2347 ; AVX1: # BB#0: 2348 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2349 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 2350 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2351 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2352 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2353 ; AVX1-NEXT: retq 2354 ; 2355 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 2356 ; AVX2: # BB#0: 2357 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19] 2358 ; AVX2-NEXT: retq 2359 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2360 ret <16 x i16> %shuffle 2361 } 2362 2363 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 2364 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 2365 ; AVX1: # BB#0: 2366 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2367 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 2368 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2369 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2370 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2371 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2372 ; AVX1-NEXT: retq 2373 ; 2374 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 2375 ; AVX2: # BB#0: 2376 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2377 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 2378 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2379 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2380 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2381 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2382 ; AVX2-NEXT: retq 2383 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11> 2384 ret <16 x i16> %shuffle 2385 } 2386 2387 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) { 2388 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 2389 ; AVX1: # BB#0: 2390 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2391 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] 2392 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2393 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 2394 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2395 ; AVX1-NEXT: retq 2396 ; 2397 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 2398 ; AVX2: # BB#0: 2399 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2400 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] 2401 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2402 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 2403 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2404 ; AVX2-NEXT: retq 2405 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15> 2406 ret <16 x i16> %shuffle 2407 } 2408 2409 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) { 2410 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 2411 ; AVX1: # BB#0: 2412 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2413 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 2414 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2415 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 2416 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2417 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2418 ; AVX1-NEXT: retq 2419 ; 2420 ; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 2421 ; AVX2: # BB#0: 2422 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2423 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 2424 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2425 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 2426 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2427 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2428 ; AVX2-NEXT: retq 2429 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13> 2430 ret <16 x i16> %shuffle 2431 } 2432 2433 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 2434 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 2435 ; AVX1: # BB#0: 2436 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2437 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2438 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2439 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2440 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2441 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2442 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2443 ; AVX1-NEXT: retq 2444 ; 2445 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 2446 ; AVX2: # BB#0: 2447 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] 2448 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2449 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2450 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7] 2451 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7] 2452 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 2453 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2454 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2455 ; AVX2-NEXT: retq 2456 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 2457 ret <16 x i16> %shuffle 2458 } 2459 2460 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) { 2461 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 2462 ; AVX1: # BB#0: 2463 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2464 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2465 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] 2466 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 2467 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2468 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2469 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2470 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2471 ; AVX1-NEXT: retq 2472 ; 2473 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 2474 ; AVX2: # BB#0: 2475 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2476 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15] 2477 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2 2478 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2479 ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7] 2480 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2481 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 2482 ; AVX2-NEXT: retq 2483 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31> 2484 ret <16 x i16> %shuffle 2485 } 2486 2487 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 2488 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 2489 ; AVX1: # BB#0: 2490 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2491 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2492 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2493 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2494 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2495 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2496 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2497 ; AVX1-NEXT: retq 2498 ; 2499 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 2500 ; AVX2: # BB#0: 2501 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 2502 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2503 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2504 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7] 2505 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7] 2506 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 2507 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2508 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2509 ; AVX2-NEXT: retq 2510 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 2511 ret <16 x i16> %shuffle 2512 } 2513 2514 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) { 2515 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 2516 ; AVX1: # BB#0: 2517 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2518 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2519 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2520 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2521 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2522 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2523 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2524 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2525 ; AVX1-NEXT: retq 2526 ; 2527 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 2528 ; AVX2: # BB#0: 2529 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2530 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2531 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2532 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7] 2533 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2534 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2535 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2536 ; AVX2-NEXT: retq 2537 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27> 2538 ret <16 x i16> %shuffle 2539 } 2540 2541 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 2542 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 2543 ; AVX1: # BB#0: 2544 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2545 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3] 2546 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2547 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 2548 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2549 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7] 2550 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 2551 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] 2552 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 2553 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 2554 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2555 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2556 ; AVX1-NEXT: retq 2557 ; 2558 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 2559 ; AVX2: # BB#0: 2560 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2561 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7] 2562 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 2563 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] 2564 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7] 2565 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] 2566 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2567 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 2568 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 2569 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2570 ; AVX2-NEXT: retq 2571 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31> 2572 ret <16 x i16> %shuffle 2573 } 2574 2575 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) { 2576 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 2577 ; AVX1: # BB#0: 2578 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2579 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3] 2580 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2581 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 2582 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2583 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] 2584 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3] 2585 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 2586 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2587 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2588 ; AVX1-NEXT: retq 2589 ; 2590 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 2591 ; AVX2: # BB#0: 2592 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2593 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3] 2594 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4 2595 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] 2596 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2597 ; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 2598 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 2599 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 2600 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2601 ; AVX2-NEXT: retq 2602 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25> 2603 ret <16 x i16> %shuffle 2604 } 2605 2606 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) { 2607 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 2608 ; AVX1: # BB#0: 2609 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2610 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2611 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11] 2612 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7] 2613 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 2614 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2615 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2616 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 2617 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 2618 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2619 ; AVX1-NEXT: retq 2620 ; 2621 ; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 2622 ; AVX2: # BB#0: 2623 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2624 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2625 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7] 2626 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5] 2627 ; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5] 2628 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2629 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23] 2630 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 2631 ; AVX2-NEXT: retq 2632 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26> 2633 ret <16 x i16> %shuffle 2634 } 2635 2636 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) { 2637 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 2638 ; AVX1: # BB#0: 2639 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2640 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2641 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2642 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2643 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2644 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2645 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2646 ; AVX1-NEXT: retq 2647 ; 2648 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 2649 ; AVX2: # BB#0: 2650 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] 2651 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2652 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2653 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2654 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2655 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 2656 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 2657 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 2658 ; AVX2-NEXT: retq 2659 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11> 2660 ret <16 x i16> %shuffle 2661 } 2662 2663 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) { 2664 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 2665 ; AVX1: # BB#0: 2666 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2667 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2668 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2669 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2670 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2671 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2672 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2673 ; AVX1-NEXT: retq 2674 ; 2675 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 2676 ; AVX2: # BB#0: 2677 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15] 2678 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2679 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2680 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2681 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2682 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 2683 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 2684 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 2685 ; AVX2-NEXT: retq 2686 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15> 2687 ret <16 x i16> %shuffle 2688 } 2689 2690 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) { 2691 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 2692 ; AVX1: # BB#0: 2693 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2694 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7] 2695 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2696 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] 2697 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7] 2698 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] 2699 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2700 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7] 2701 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 2702 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2703 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2704 ; AVX1-NEXT: retq 2705 ; 2706 ; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 2707 ; AVX2: # BB#0: 2708 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2709 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2710 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 2711 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 2712 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2713 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 2714 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 2715 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2716 ; AVX2-NEXT: retq 2717 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31> 2718 ret <16 x i16> %shuffle 2719 } 2720 2721 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2722 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 2723 ; AVX1: # BB#0: 2724 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 2725 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] 2726 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7] 2727 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2729 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 2730 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 2731 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 2732 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 2733 ; AVX1-NEXT: retq 2734 ; 2735 ; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 2736 ; AVX2: # BB#0: 2737 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15] 2738 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7] 2739 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15] 2740 ; AVX2-NEXT: retq 2741 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef> 2742 ret <16 x i16> %shuffle 2743 } 2744 2745 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2746 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 2747 ; AVX1: # BB#0: 2748 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2749 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2750 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 2751 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 2752 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2753 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 2754 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 2755 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2756 ; AVX1-NEXT: retq 2757 ; 2758 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 2759 ; AVX2: # BB#0: 2760 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2761 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19] 2762 ; AVX2-NEXT: retq 2763 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2764 ret <16 x i16> %shuffle 2765 } 2766 2767 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2768 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu: 2769 ; AVX1: # BB#0: 2770 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7] 2771 ; AVX1-NEXT: retq 2772 ; 2773 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu: 2774 ; AVX2: # BB#0: 2775 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7] 2776 ; AVX2-NEXT: retq 2777 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2778 ret <16 x i16> %shuffle 2779 } 2780 2781 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2782 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 2783 ; AVX1: # BB#0: 2784 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2785 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2786 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 2787 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7] 2788 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 2789 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2791 ; AVX1-NEXT: retq 2792 ; 2793 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 2794 ; AVX2: # BB#0: 2795 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2796 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] 2797 ; AVX2-NEXT: retq 2798 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2799 ret <16 x i16> %shuffle 2800 } 2801 2802 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 2803 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 2804 ; AVX1: # BB#0: 2805 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2806 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2807 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2808 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7] 2809 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 2810 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2811 ; AVX1-NEXT: retq 2812 ; 2813 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 2814 ; AVX2: # BB#0: 2815 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2] 2816 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] 2817 ; AVX2-NEXT: retq 2818 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11> 2819 ret <16 x i16> %shuffle 2820 } 2821 2822 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2823 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 2824 ; AVX1: # BB#0: 2825 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2826 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2827 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2828 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7] 2829 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 2830 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 2831 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2832 ; AVX1-NEXT: retq 2833 ; 2834 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 2835 ; AVX2: # BB#0: 2836 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] 2837 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15] 2838 ; AVX2-NEXT: retq 2839 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2840 ret <16 x i16> %shuffle 2841 } 2842 2843 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 2844 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 2845 ; AVX1: # BB#0: 2846 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2847 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2848 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3] 2849 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7] 2850 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2851 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 2852 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 2853 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2854 ; AVX1-NEXT: retq 2855 ; 2856 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 2857 ; AVX2: # BB#0: 2858 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2859 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2860 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2861 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 2862 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2863 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2864 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2865 ; AVX2-NEXT: retq 2866 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11> 2867 ret <16 x i16> %shuffle 2868 } 2869 2870 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) { 2871 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 2872 ; AVX1: # BB#0: 2873 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2874 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2875 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7] 2876 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 2877 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 2878 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2879 ; AVX1-NEXT: retq 2880 ; 2881 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 2882 ; AVX2: # BB#0: 2883 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 2884 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15] 2885 ; AVX2-NEXT: retq 2886 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15> 2887 ret <16 x i16> %shuffle 2888 } 2889 2890 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) { 2891 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 2892 ; AVX1: # BB#0: 2893 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2894 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 2895 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2896 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7] 2897 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7] 2898 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7] 2899 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 2900 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 2901 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2902 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2903 ; AVX1-NEXT: retq 2904 ; 2905 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 2906 ; AVX2: # BB#0: 2907 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 2908 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 2909 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15] 2910 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15] 2911 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] 2912 ; AVX2-NEXT: retq 2913 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25> 2914 ret <16 x i16> %shuffle 2915 } 2916 2917 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) { 2918 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 2919 ; AVX1: # BB#0: 2920 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2921 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 2922 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2923 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2924 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] 2925 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 2926 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2927 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 2928 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 2929 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2930 ; AVX1-NEXT: retq 2931 ; 2932 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 2933 ; AVX2: # BB#0: 2934 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21] 2935 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2936 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2937 ; AVX2-NEXT: retq 2938 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef> 2939 ret <16 x i16> %shuffle 2940 } 2941 2942 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 2943 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 2944 ; AVX1: # BB#0: 2945 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2946 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2947 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 2948 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 2949 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 2950 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2951 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 2952 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2953 ; AVX1-NEXT: retq 2954 ; 2955 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 2956 ; AVX2: # BB#0: 2957 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15] 2958 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2959 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 2960 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2961 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2962 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2963 ; AVX2-NEXT: retq 2964 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12> 2965 ret <16 x i16> %shuffle 2966 } 2967 2968 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 2969 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 2970 ; AVX1: # BB#0: 2971 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2972 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2973 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 2974 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 2975 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2976 ; AVX1-NEXT: retq 2977 ; 2978 ; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 2979 ; AVX2: # BB#0: 2980 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25] 2981 ; AVX2-NEXT: retq 2982 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 2983 ret <16 x i16> %shuffle 2984 } 2985 2986 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 2987 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 2988 ; AVX1: # BB#0: 2989 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2990 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 2991 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2992 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2993 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2994 ; AVX1-NEXT: retq 2995 ; 2996 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 2997 ; AVX2: # BB#0: 2998 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2999 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3000 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3001 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3002 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3003 ; AVX2-NEXT: retq 3004 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12> 3005 ret <16 x i16> %shuffle 3006 } 3007 3008 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3009 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3010 ; AVX1: # BB#0: 3011 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3012 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3013 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3014 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3015 ; AVX1-NEXT: retq 3016 ; 3017 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3018 ; AVX2: # BB#0: 3019 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] 3020 ; AVX2-NEXT: retq 3021 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3022 ret <16 x i16> %shuffle 3023 } 3024 3025 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3026 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3027 ; AVX1: # BB#0: 3028 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3029 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3030 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3031 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3032 ; AVX1-NEXT: retq 3033 ; 3034 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3035 ; AVX2: # BB#0: 3036 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25] 3037 ; AVX2-NEXT: retq 3038 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3039 ret <16 x i16> %shuffle 3040 } 3041 3042 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) { 3043 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3044 ; AVX1: # BB#0: 3045 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3046 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3047 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3048 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 3049 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 3050 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3051 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7] 3052 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3053 ; AVX1-NEXT: retq 3054 ; 3055 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3056 ; AVX2: # BB#0: 3057 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15] 3058 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3059 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3060 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3061 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3062 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3063 ; AVX2-NEXT: retq 3064 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10> 3065 ret <16 x i16> %shuffle 3066 } 3067 3068 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 3069 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 3070 ; AVX1: # BB#0: 3071 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3072 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3073 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3074 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 3075 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3076 ; AVX1-NEXT: retq 3077 ; 3078 ; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 3079 ; AVX2: # BB#0: 3080 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21] 3081 ; AVX2-NEXT: retq 3082 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef> 3083 ret <16 x i16> %shuffle 3084 } 3085 3086 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) { 3087 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 3088 ; AVX1: # BB#0: 3089 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3090 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3091 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3092 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3093 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3094 ; AVX1-NEXT: retq 3095 ; 3096 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 3097 ; AVX2: # BB#0: 3098 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3099 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3100 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3101 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3102 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3103 ; AVX2-NEXT: retq 3104 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10> 3105 ret <16 x i16> %shuffle 3106 } 3107 3108 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 3109 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 3110 ; AVX1: # BB#0: 3111 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3113 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3114 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3115 ; AVX1-NEXT: retq 3116 ; 3117 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 3118 ; AVX2: # BB#0: 3119 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21] 3120 ; AVX2-NEXT: retq 3121 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef> 3122 ret <16 x i16> %shuffle 3123 } 3124 3125 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3126 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 3127 ; AVX1: # BB#0: 3128 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3129 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3130 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3131 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3132 ; AVX1-NEXT: retq 3133 ; 3134 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 3135 ; AVX2: # BB#0: 3136 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero 3137 ; AVX2-NEXT: retq 3138 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef> 3139 ret <16 x i16> %shuffle 3140 } 3141 3142 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) { 3143 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 3144 ; AVX1: # BB#0: 3145 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3146 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3147 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3148 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3149 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 3150 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7] 3152 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3153 ; AVX1-NEXT: retq 3154 ; 3155 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 3156 ; AVX2: # BB#0: 3157 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 3158 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3159 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3160 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3161 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3162 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3163 ; AVX2-NEXT: retq 3164 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26> 3165 ret <16 x i16> %shuffle 3166 } 3167 3168 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) { 3169 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 3170 ; AVX1: # BB#0: 3171 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3172 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3173 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3174 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 3175 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3176 ; AVX1-NEXT: retq 3177 ; 3178 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 3179 ; AVX2: # BB#0: 3180 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21] 3181 ; AVX2-NEXT: retq 3182 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef> 3183 ret <16 x i16> %shuffle 3184 } 3185 3186 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) { 3187 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 3188 ; AVX1: # BB#0: 3189 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3190 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3191 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3192 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 3193 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 3194 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3195 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7] 3196 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3197 ; AVX1-NEXT: retq 3198 ; 3199 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 3200 ; AVX2: # BB#0: 3201 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15] 3202 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3203 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3204 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3205 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3206 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3207 ; AVX2-NEXT: retq 3208 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28> 3209 ret <16 x i16> %shuffle 3210 } 3211 3212 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) { 3213 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 3214 ; AVX1: # BB#0: 3215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3216 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3217 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3218 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 3219 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3220 ; AVX1-NEXT: retq 3221 ; 3222 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 3223 ; AVX2: # BB#0: 3224 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25] 3225 ; AVX2-NEXT: retq 3226 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef> 3227 ret <16 x i16> %shuffle 3228 } 3229 3230 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) { 3231 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 3232 ; AVX1: # BB#0: 3233 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3234 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 3235 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3236 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4] 3237 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3238 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 3239 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 3240 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3241 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3242 ; AVX1-NEXT: retq 3243 ; 3244 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 3245 ; AVX2: # BB#0: 3246 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15] 3247 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31] 3248 ; AVX2-NEXT: retq 3249 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef> 3250 ret <16 x i16> %shuffle 3251 } 3252 3253 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) { 3254 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector: 3255 ; ALL: # BB#0: 3256 ; ALL-NEXT: movzwl (%rdi), %eax 3257 ; ALL-NEXT: vmovd %eax, %xmm0 3258 ; ALL-NEXT: retq 3259 %val = load i16, i16* %ptr 3260 %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0 3261 ret <16 x i16> %i0 3262 } 3263 3264