1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5 target triple = "x86_64-unknown-unknown" 6 7 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 8 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 9 ; AVX1: # BB#0: 10 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 11 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 12 ; AVX1-NEXT: retq 13 ; 14 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 15 ; AVX2: # BB#0: 16 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 17 ; AVX2-NEXT: retq 18 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19 ret <16 x i16> %shuffle 20 } 21 22 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 23 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 24 ; AVX1: # BB#0: 25 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 26 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 27 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4] 28 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4] 29 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 30 ; AVX1-NEXT: retq 31 ; 32 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 33 ; AVX2: # BB#0: 34 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 35 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 36 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 37 ; AVX2-NEXT: retq 38 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 39 ret <16 x i16> %shuffle 40 } 41 42 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 43 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 44 ; AVX1: # BB#0: 45 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 46 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 47 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 48 ; AVX1-NEXT: retq 49 ; 50 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 51 ; AVX2: # BB#0: 52 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 53 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 54 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 55 ; AVX2-NEXT: retq 56 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 57 ret <16 x i16> %shuffle 58 } 59 60 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 61 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 62 ; AVX1: # BB#0: 63 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 64 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 65 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 66 ; AVX1-NEXT: retq 67 ; 68 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 69 ; AVX2: # BB#0: 70 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 71 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 72 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 73 ; AVX2-NEXT: retq 74 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 75 ret <16 x i16> %shuffle 76 } 77 78 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 79 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 80 ; AVX1: # BB#0: 81 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 82 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 83 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 84 ; AVX1-NEXT: retq 85 ; 86 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 87 ; AVX2: # BB#0: 88 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 89 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 90 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 91 ; AVX2-NEXT: retq 92 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 93 ret <16 x i16> %shuffle 94 } 95 96 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 97 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 98 ; AVX1: # BB#0: 99 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 100 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 101 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 102 ; AVX1-NEXT: retq 103 ; 104 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 105 ; AVX2: # BB#0: 106 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 107 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 108 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 109 ; AVX2-NEXT: retq 110 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 111 ret <16 x i16> %shuffle 112 } 113 114 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 115 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 116 ; AVX1: # BB#0: 117 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 118 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 119 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 120 ; AVX1-NEXT: retq 121 ; 122 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 123 ; AVX2: # BB#0: 124 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 125 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 126 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 127 ; AVX2-NEXT: retq 128 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 129 ret <16 x i16> %shuffle 130 } 131 132 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 133 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 134 ; AVX1: # BB#0: 135 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 136 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 137 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 138 ; AVX1-NEXT: retq 139 ; 140 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 141 ; AVX2: # BB#0: 142 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 143 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 144 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 145 ; AVX2-NEXT: retq 146 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 147 ret <16 x i16> %shuffle 148 } 149 150 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 151 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 152 ; AVX1: # BB#0: 153 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 154 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 155 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 156 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 157 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 158 ; AVX1-NEXT: retq 159 ; 160 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 161 ; AVX2: # BB#0: 162 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 163 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 164 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 165 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 166 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 167 ; AVX2-NEXT: retq 168 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 169 ret <16 x i16> %shuffle 170 } 171 172 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 173 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 174 ; AVX1: # BB#0: 175 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 176 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 177 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1] 178 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 179 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 180 ; AVX1-NEXT: retq 181 ; 182 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 183 ; AVX2: # BB#0: 184 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 185 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 186 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 187 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 188 ; AVX2-NEXT: retq 189 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 190 ret <16 x i16> %shuffle 191 } 192 193 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 194 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 195 ; AVX1: # BB#0: 196 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 197 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 198 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 199 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 200 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 201 ; AVX1-NEXT: retq 202 ; 203 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 204 ; AVX2: # BB#0: 205 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 206 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 207 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 208 ; AVX2-NEXT: retq 209 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 210 ret <16 x i16> %shuffle 211 } 212 213 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 214 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 215 ; AVX1: # BB#0: 216 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 217 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 218 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] 219 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 220 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 221 ; AVX1-NEXT: retq 222 ; 223 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 224 ; AVX2: # BB#0: 225 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 226 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 227 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 228 ; AVX2-NEXT: retq 229 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 230 ret <16 x i16> %shuffle 231 } 232 233 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 234 ; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 235 ; AVX1: # BB#0: 236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 237 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 238 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 239 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 240 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 241 ; AVX1-NEXT: retq 242 ; 243 ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 244 ; AVX2: # BB#0: 245 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 246 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 247 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 248 ; AVX2-NEXT: retq 249 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 250 ret <16 x i16> %shuffle 251 } 252 253 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 254 ; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 255 ; AVX1: # BB#0: 256 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 257 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 258 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 259 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 260 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 261 ; AVX1-NEXT: retq 262 ; 263 ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 264 ; AVX2: # BB#0: 265 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 266 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 267 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 268 ; AVX2-NEXT: retq 269 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 270 ret <16 x i16> %shuffle 271 } 272 273 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 274 ; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 275 ; AVX1: # BB#0: 276 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 277 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 278 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 279 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 280 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 281 ; AVX1-NEXT: retq 282 ; 283 ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 284 ; AVX2: # BB#0: 285 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 286 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 287 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 288 ; AVX2-NEXT: retq 289 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 290 ret <16 x i16> %shuffle 291 } 292 293 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 294 ; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 295 ; AVX1: # BB#0: 296 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 297 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 298 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 299 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 300 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 301 ; AVX1-NEXT: retq 302 ; 303 ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 304 ; AVX2: # BB#0: 305 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 306 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 307 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 308 ; AVX2-NEXT: retq 309 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 310 ret <16 x i16> %shuffle 311 } 312 313 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 314 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 315 ; AVX1: # BB#0: 316 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 317 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 318 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 319 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 320 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 321 ; AVX1-NEXT: retq 322 ; 323 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 324 ; AVX2: # BB#0: 325 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 326 ; AVX2-NEXT: retq 327 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 328 ret <16 x i16> %shuffle 329 } 330 331 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 332 ; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 333 ; AVX1: # BB#0: 334 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 335 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15] 336 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 337 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 338 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 339 ; AVX1-NEXT: retq 340 ; 341 ; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 342 ; AVX2: # BB#0: 343 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31] 344 ; AVX2-NEXT: retq 345 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 346 ret <16 x i16> %shuffle 347 } 348 349 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 350 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 351 ; AVX1: # BB#0: 352 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 353 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 354 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 355 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 356 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 357 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 358 ; AVX1-NEXT: retq 359 ; 360 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 361 ; AVX2: # BB#0: 362 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 363 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 364 ; AVX2-NEXT: retq 365 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 366 ret <16 x i16> %shuffle 367 } 368 369 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 370 ; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 371 ; AVX1: # BB#0: 372 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] 373 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7] 374 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 375 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 376 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 377 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 378 ; AVX1-NEXT: retq 379 ; 380 ; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 381 ; AVX2: # BB#0: 382 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15] 383 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 384 ; AVX2-NEXT: retq 385 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15> 386 ret <16 x i16> %shuffle 387 } 388 389 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) { 390 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 391 ; AVX1: # BB#0: 392 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 393 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 394 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 395 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 396 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 397 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 398 ; AVX1-NEXT: retq 399 ; 400 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 401 ; AVX2: # BB#0: 402 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15] 403 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14] 404 ; AVX2-NEXT: retq 405 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 406 ret <16 x i16> %shuffle 407 } 408 409 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) { 410 ; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 411 ; AVX1: # BB#0: 412 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7] 413 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7] 414 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 415 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7] 416 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] 417 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 418 ; AVX1-NEXT: retq 419 ; 420 ; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 421 ; AVX2: # BB#0: 422 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15] 423 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15] 424 ; AVX2-NEXT: retq 425 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 426 ret <16 x i16> %shuffle 427 } 428 429 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 430 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 431 ; AVX1: # BB#0: 432 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 433 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 434 ; AVX1-NEXT: retq 435 ; 436 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 437 ; AVX2: # BB#0: 438 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 439 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 440 ; AVX2-NEXT: retq 441 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 442 ret <16 x i16> %shuffle 443 } 444 445 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 446 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 447 ; AVX1: # BB#0: 448 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 449 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 450 ; AVX1-NEXT: retq 451 ; 452 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 453 ; AVX2: # BB#0: 454 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 455 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 456 ; AVX2-NEXT: retq 457 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 458 ret <16 x i16> %shuffle 459 } 460 461 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 462 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 463 ; AVX1: # BB#0: 464 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 465 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 466 ; AVX1-NEXT: retq 467 ; 468 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 469 ; AVX2: # BB#0: 470 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 471 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 472 ; AVX2-NEXT: retq 473 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 474 ret <16 x i16> %shuffle 475 } 476 477 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 478 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 479 ; AVX1: # BB#0: 480 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 481 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 482 ; AVX1-NEXT: retq 483 ; 484 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 485 ; AVX2: # BB#0: 486 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 487 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 488 ; AVX2-NEXT: retq 489 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 490 ret <16 x i16> %shuffle 491 } 492 493 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 494 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 495 ; AVX1: # BB#0: 496 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 497 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 498 ; AVX1-NEXT: retq 499 ; 500 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 501 ; AVX2: # BB#0: 502 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 503 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 504 ; AVX2-NEXT: retq 505 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 506 ret <16 x i16> %shuffle 507 } 508 509 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 510 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 511 ; AVX1: # BB#0: 512 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 513 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 514 ; AVX1-NEXT: retq 515 ; 516 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 517 ; AVX2: # BB#0: 518 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 519 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 520 ; AVX2-NEXT: retq 521 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 522 ret <16 x i16> %shuffle 523 } 524 525 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 526 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 527 ; AVX1: # BB#0: 528 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 529 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 530 ; AVX1-NEXT: retq 531 ; 532 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 533 ; AVX2: # BB#0: 534 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 535 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 536 ; AVX2-NEXT: retq 537 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 538 ret <16 x i16> %shuffle 539 } 540 541 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 542 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 543 ; AVX1: # BB#0: 544 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 545 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 546 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 547 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 548 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 549 ; AVX1-NEXT: retq 550 ; 551 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 552 ; AVX2: # BB#0: 553 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 554 ; AVX2-NEXT: retq 555 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 556 ret <16 x i16> %shuffle 557 } 558 559 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 560 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 561 ; AVX1: # BB#0: 562 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 563 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 564 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 565 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] 566 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 567 ; AVX1-NEXT: retq 568 ; 569 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 570 ; AVX2: # BB#0: 571 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 572 ; AVX2-NEXT: retq 573 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 574 ret <16 x i16> %shuffle 575 } 576 577 define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 578 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31: 579 ; AVX1: # BB#0: 580 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 581 ; AVX1-NEXT: retq 582 ; 583 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31: 584 ; AVX2: # BB#0: 585 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 586 ; AVX2-NEXT: retq 587 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 588 ret <16 x i16> %shuffle 589 } 590 591 define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 592 ; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: 593 ; AVX1: # BB#0: 594 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] 595 ; AVX1-NEXT: retq 596 ; 597 ; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: 598 ; AVX2: # BB#0: 599 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 600 ; AVX2-NEXT: retq 601 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15> 602 ret <16 x i16> %shuffle 603 } 604 605 define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) { 606 ; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 607 ; AVX1: # BB#0: 608 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 609 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 610 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7] 611 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 612 ; AVX1-NEXT: retq 613 ; 614 ; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 615 ; AVX2: # BB#0: 616 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] 617 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 618 ; AVX2-NEXT: retq 619 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31> 620 ret <16 x i16> %shuffle 621 } 622 623 define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 624 ; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 625 ; AVX1: # BB#0: 626 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7] 627 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 628 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 629 ; AVX1-NEXT: retq 630 ; 631 ; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 632 ; AVX2: # BB#0: 633 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 634 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 635 ; AVX2-NEXT: retq 636 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 637 ret <16 x i16> %shuffle 638 } 639 640 define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 641 ; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 642 ; AVX1: # BB#0: 643 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 644 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 645 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 646 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 647 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 648 ; AVX1-NEXT: retq 649 ; 650 ; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 651 ; AVX2: # BB#0: 652 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] 653 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 654 ; AVX2-NEXT: retq 655 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 656 ret <16 x i16> %shuffle 657 } 658 659 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 660 ; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 661 ; AVX1: # BB#0: 662 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 664 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 665 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] 666 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 667 ; AVX1-NEXT: retq 668 ; 669 ; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 670 ; AVX2: # BB#0: 671 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0] 672 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 673 ; AVX2-NEXT: retq 674 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 675 ret <16 x i16> %shuffle 676 } 677 678 define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 679 ; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31: 680 ; AVX1: # BB#0: 681 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7] 682 ; AVX1-NEXT: retq 683 ; 684 ; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31: 685 ; AVX2: # BB#0: 686 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7] 687 ; AVX2-NEXT: retq 688 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 689 ret <16 x i16> %shuffle 690 } 691 692 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) { 693 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 694 ; AVX1: # BB#0: 695 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 696 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 697 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 698 ; AVX1-NEXT: retq 699 ; 700 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 701 ; AVX2: # BB#0: 702 ; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 703 ; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 704 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 705 ; AVX2-NEXT: retq 706 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16> 707 ret <16 x i16> %shuffle 708 } 709 710 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) { 711 ; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 712 ; AVX1: # BB#0: 713 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 714 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 715 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 716 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 717 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 718 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 719 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 720 ; AVX1-NEXT: retq 721 ; 722 ; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 723 ; AVX2: # BB#0: 724 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17] 725 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 726 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 727 ; AVX2-NEXT: retq 728 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24> 729 ret <16 x i16> %shuffle 730 } 731 732 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 733 ; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 734 ; AVX1: # BB#0: 735 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 736 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 737 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 738 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7] 739 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 740 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 741 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 742 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 743 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 744 ; AVX1-NEXT: retq 745 ; 746 ; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 747 ; AVX2: # BB#0: 748 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 749 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 750 ; AVX2-NEXT: retq 751 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15> 752 ret <16 x i16> %shuffle 753 } 754 755 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) { 756 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 757 ; AVX1: # BB#0: 758 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 759 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 760 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 761 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] 762 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] 763 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 764 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 765 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 766 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 767 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 768 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 769 ; AVX1-NEXT: retq 770 ; 771 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 772 ; AVX2: # BB#0: 773 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 774 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 775 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] 776 ; AVX2-NEXT: retq 777 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12> 778 ret <16 x i16> %shuffle 779 } 780 781 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) { 782 ; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 783 ; AVX1: # BB#0: 784 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 785 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 786 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 787 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3] 788 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 789 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 790 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 791 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 792 ; AVX1-NEXT: retq 793 ; 794 ; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 795 ; AVX2: # BB#0: 796 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 797 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 798 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] 799 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 800 ; AVX2-NEXT: retq 801 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8> 802 ret <16 x i16> %shuffle 803 } 804 805 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) { 806 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 807 ; AVX1: # BB#0: 808 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 809 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 810 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 811 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 812 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 813 ; AVX1-NEXT: retq 814 ; 815 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 816 ; AVX2: # BB#0: 817 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17] 818 ; AVX2-NEXT: retq 819 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8> 820 ret <16 x i16> %shuffle 821 } 822 823 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) { 824 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 825 ; AVX1: # BB#0: 826 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 827 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 828 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 829 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 830 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 831 ; AVX1-NEXT: retq 832 ; 833 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 834 ; AVX2: # BB#0: 835 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17] 836 ; AVX2-NEXT: retq 837 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8> 838 ret <16 x i16> %shuffle 839 } 840 841 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) { 842 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 843 ; AVX1: # BB#0: 844 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 845 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 846 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 847 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 848 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 849 ; AVX1-NEXT: retq 850 ; 851 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 852 ; AVX2: # BB#0: 853 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17] 854 ; AVX2-NEXT: retq 855 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8> 856 ret <16 x i16> %shuffle 857 } 858 859 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 860 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 861 ; AVX1: # BB#0: 862 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 863 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 864 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 865 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 866 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 867 ; AVX1-NEXT: retq 868 ; 869 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 870 ; AVX2: # BB#0: 871 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17] 872 ; AVX2-NEXT: retq 873 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8> 874 ret <16 x i16> %shuffle 875 } 876 877 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 878 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 879 ; AVX1: # BB#0: 880 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 881 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 882 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 883 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 884 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 885 ; AVX1-NEXT: retq 886 ; 887 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 888 ; AVX2: # BB#0: 889 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17] 890 ; AVX2-NEXT: retq 891 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8> 892 ret <16 x i16> %shuffle 893 } 894 895 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 896 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 897 ; AVX1: # BB#0: 898 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 899 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 900 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 901 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 902 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 903 ; AVX1-NEXT: retq 904 ; 905 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 906 ; AVX2: # BB#0: 907 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17] 908 ; AVX2-NEXT: retq 909 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 910 ret <16 x i16> %shuffle 911 } 912 913 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 914 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 915 ; AVX1: # BB#0: 916 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 917 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 918 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 919 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 920 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 921 ; AVX1-NEXT: retq 922 ; 923 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 924 ; AVX2: # BB#0: 925 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 926 ; AVX2-NEXT: retq 927 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 928 ret <16 x i16> %shuffle 929 } 930 931 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 932 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 933 ; AVX1: # BB#0: 934 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 935 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 936 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 937 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 938 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 939 ; AVX1-NEXT: retq 940 ; 941 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 942 ; AVX2: # BB#0: 943 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 944 ; AVX2-NEXT: retq 945 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 946 ret <16 x i16> %shuffle 947 } 948 949 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 950 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 951 ; AVX1: # BB#0: 952 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 953 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 954 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 955 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 956 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 957 ; AVX1-NEXT: retq 958 ; 959 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 960 ; AVX2: # BB#0: 961 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 962 ; AVX2-NEXT: retq 963 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 964 ret <16 x i16> %shuffle 965 } 966 967 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 968 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 969 ; AVX1: # BB#0: 970 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 971 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 972 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 973 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 974 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 975 ; AVX1-NEXT: retq 976 ; 977 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 978 ; AVX2: # BB#0: 979 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31] 980 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u] 981 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 982 ; AVX2-NEXT: retq 983 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 984 ret <16 x i16> %shuffle 985 } 986 987 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 988 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 989 ; AVX1: # BB#0: 990 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 991 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 992 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 993 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 994 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 995 ; AVX1-NEXT: retq 996 ; 997 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 998 ; AVX2: # BB#0: 999 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] 1000 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u] 1001 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1002 ; AVX2-NEXT: retq 1003 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 1004 ret <16 x i16> %shuffle 1005 } 1006 1007 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1008 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1009 ; AVX1: # BB#0: 1010 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 1011 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1012 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1] 1013 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1014 ; AVX1-NEXT: retq 1015 ; 1016 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1017 ; AVX2: # BB#0: 1018 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17] 1019 ; AVX2-NEXT: retq 1020 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1021 ret <16 x i16> %shuffle 1022 } 1023 1024 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1025 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1026 ; AVX1: # BB#0: 1027 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 1028 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1029 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1] 1030 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1031 ; AVX1-NEXT: retq 1032 ; 1033 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1034 ; AVX2: # BB#0: 1035 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17] 1036 ; AVX2-NEXT: retq 1037 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8> 1038 ret <16 x i16> %shuffle 1039 } 1040 1041 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1042 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1043 ; AVX1: # BB#0: 1044 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 1045 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1046 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1] 1047 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1048 ; AVX1-NEXT: retq 1049 ; 1050 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1051 ; AVX2: # BB#0: 1052 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17] 1053 ; AVX2-NEXT: retq 1054 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8> 1055 ret <16 x i16> %shuffle 1056 } 1057 1058 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1059 ; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1060 ; AVX1: # BB#0: 1061 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 1062 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1063 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1] 1064 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1065 ; AVX1-NEXT: retq 1066 ; 1067 ; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1068 ; AVX2: # BB#0: 1069 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17] 1070 ; AVX2-NEXT: retq 1071 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8> 1072 ret <16 x i16> %shuffle 1073 } 1074 1075 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) { 1076 ; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1077 ; AVX1: # BB#0: 1078 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 1079 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1080 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 1081 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1082 ; AVX1-NEXT: retq 1083 ; 1084 ; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1085 ; AVX2: # BB#0: 1086 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17] 1087 ; AVX2-NEXT: retq 1088 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8> 1089 ret <16 x i16> %shuffle 1090 } 1091 1092 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1093 ; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1094 ; AVX1: # BB#0: 1095 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 1096 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1097 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1] 1098 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1099 ; AVX1-NEXT: retq 1100 ; 1101 ; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1102 ; AVX2: # BB#0: 1103 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17] 1104 ; AVX2-NEXT: retq 1105 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8> 1106 ret <16 x i16> %shuffle 1107 } 1108 1109 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1110 ; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1111 ; AVX1: # BB#0: 1112 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1113 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1114 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15] 1115 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1116 ; AVX1-NEXT: retq 1117 ; 1118 ; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1119 ; AVX2: # BB#0: 1120 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31] 1121 ; AVX2-NEXT: retq 1122 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15> 1123 ret <16 x i16> %shuffle 1124 } 1125 1126 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1127 ; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1128 ; AVX1: # BB#0: 1129 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 1130 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1131 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1132 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1] 1133 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1134 ; AVX1-NEXT: retq 1135 ; 1136 ; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1137 ; AVX2: # BB#0: 1138 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17] 1139 ; AVX2-NEXT: retq 1140 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8> 1141 ret <16 x i16> %shuffle 1142 } 1143 1144 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1145 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1146 ; AVX1: # BB#0: 1147 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1148 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1149 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1150 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1151 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1152 ; AVX1-NEXT: retq 1153 ; 1154 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1155 ; AVX2: # BB#0: 1156 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1157 ; AVX2-NEXT: retq 1158 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 1159 ret <16 x i16> %shuffle 1160 } 1161 1162 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1163 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1164 ; AVX1: # BB#0: 1165 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1] 1166 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1167 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1] 1168 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1169 ; AVX1-NEXT: retq 1170 ; 1171 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1172 ; AVX2: # BB#0: 1173 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17] 1174 ; AVX2-NEXT: retq 1175 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8> 1176 ret <16 x i16> %shuffle 1177 } 1178 1179 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1180 ; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1181 ; AVX1: # BB#0: 1182 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1] 1183 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1184 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15] 1185 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1186 ; AVX1-NEXT: retq 1187 ; 1188 ; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1189 ; AVX2: # BB#0: 1190 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31] 1191 ; AVX2-NEXT: retq 1192 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15> 1193 ret <16 x i16> %shuffle 1194 } 1195 1196 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1197 ; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1198 ; AVX1: # BB#0: 1199 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7] 1200 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1201 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1202 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1] 1203 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1204 ; AVX1-NEXT: retq 1205 ; 1206 ; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1207 ; AVX2: # BB#0: 1208 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17] 1209 ; AVX2-NEXT: retq 1210 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8> 1211 ret <16 x i16> %shuffle 1212 } 1213 1214 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1215 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1216 ; AVX1: # BB#0: 1217 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3] 1218 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1219 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1220 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7] 1221 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1222 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1223 ; AVX1-NEXT: retq 1224 ; 1225 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1226 ; AVX2: # BB#0: 1227 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25] 1228 ; AVX2-NEXT: retq 1229 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12> 1230 ret <16 x i16> %shuffle 1231 } 1232 1233 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1234 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1235 ; AVX1: # BB#0: 1236 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1237 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1238 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1239 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1240 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1241 ; AVX1-NEXT: retq 1242 ; 1243 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1244 ; AVX2: # BB#0: 1245 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1246 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1247 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1248 ; AVX2-NEXT: retq 1249 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1250 ret <16 x i16> %shuffle 1251 } 1252 1253 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1254 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1255 ; AVX1: # BB#0: 1256 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1257 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1258 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1259 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1260 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1261 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1262 ; AVX1-NEXT: retq 1263 ; 1264 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1265 ; AVX2: # BB#0: 1266 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1267 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1268 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1269 ; AVX2-NEXT: retq 1270 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1271 ret <16 x i16> %shuffle 1272 } 1273 1274 define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1275 ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1276 ; AVX1: # BB#0: 1277 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1278 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1279 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1280 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1281 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1282 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1283 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1284 ; AVX1-NEXT: retq 1285 ; 1286 ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1287 ; AVX2: # BB#0: 1288 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1289 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1290 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1291 ; AVX2-NEXT: retq 1292 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1293 ret <16 x i16> %shuffle 1294 } 1295 1296 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1297 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1298 ; AVX1: # BB#0: 1299 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1300 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1301 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1302 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1303 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1304 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1305 ; AVX1-NEXT: retq 1306 ; 1307 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1308 ; AVX2: # BB#0: 1309 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1310 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1311 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1312 ; AVX2-NEXT: retq 1313 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1314 ret <16 x i16> %shuffle 1315 } 1316 1317 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) { 1318 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1319 ; AVX1: # BB#0: 1320 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1321 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1322 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1323 ; AVX1-NEXT: retq 1324 ; 1325 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1326 ; AVX2: # BB#0: 1327 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1328 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1329 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1330 ; AVX2-NEXT: retq 1331 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1332 ret <16 x i16> %shuffle 1333 } 1334 1335 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { 1336 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1337 ; AVX1: # BB#0: 1338 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1339 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1340 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1341 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1342 ; AVX1-NEXT: retq 1343 ; 1344 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1345 ; AVX2: # BB#0: 1346 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] 1347 ; AVX2-NEXT: retq 1348 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> 1349 ret <16 x i16> %shuffle 1350 } 1351 1352 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) { 1353 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1354 ; AVX1: # BB#0: 1355 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1356 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1357 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1358 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1359 ; AVX1-NEXT: retq 1360 ; 1361 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1362 ; AVX2: # BB#0: 1363 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 1364 ; AVX2-NEXT: retq 1365 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 1366 ret <16 x i16> %shuffle 1367 } 1368 1369 ; 1370 ; Shuffle to logical bit shifts 1371 ; 1372 1373 define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) { 1374 ; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1375 ; AVX1: # BB#0: 1376 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1 1377 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1378 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm0 1379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1380 ; AVX1-NEXT: retq 1381 ; 1382 ; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1383 ; AVX2: # BB#0: 1384 ; AVX2-NEXT: vpslld $16, %ymm0, %ymm0 1385 ; AVX2-NEXT: retq 1386 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1387 ret <16 x i16> %shuffle 1388 } 1389 1390 define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) { 1391 ; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1392 ; AVX1: # BB#0: 1393 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1 1394 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1395 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0 1396 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1397 ; AVX1-NEXT: retq 1398 ; 1399 ; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1400 ; AVX2: # BB#0: 1401 ; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 1402 ; AVX2-NEXT: retq 1403 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1404 ret <16 x i16> %shuffle 1405 } 1406 1407 define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) { 1408 ; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1409 ; AVX1: # BB#0: 1410 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1411 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1412 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 1413 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1414 ; AVX1-NEXT: retq 1415 ; 1416 ; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1417 ; AVX2: # BB#0: 1418 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 1419 ; AVX2-NEXT: retq 1420 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1421 ret <16 x i16> %shuffle 1422 } 1423 1424 define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) { 1425 ; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1426 ; AVX1: # BB#0: 1427 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 1428 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 1429 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1430 ; AVX1-NEXT: retq 1431 ; 1432 ; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1433 ; AVX2: # BB#0: 1434 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 1435 ; AVX2-NEXT: retq 1436 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16> 1437 ret <16 x i16> %shuffle 1438 } 1439 1440 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) { 1441 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1442 ; AVX1: # BB#0: 1443 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1444 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1445 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1446 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1447 ; AVX1-NEXT: retq 1448 ; 1449 ; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1450 ; AVX2: # BB#0: 1451 ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1452 ; AVX2-NEXT: retq 1453 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0> 1454 ret <16 x i16> %shuffle 1455 } 1456 1457 define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) { 1458 ; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1459 ; AVX1: # BB#0: 1460 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1461 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1462 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1463 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1464 ; AVX1-NEXT: retq 1465 ; 1466 ; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1467 ; AVX2: # BB#0: 1468 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1469 ; AVX2-NEXT: retq 1470 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0> 1471 ret <16 x i16> %shuffle 1472 } 1473 1474 define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) { 1475 ; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 1476 ; AVX1: # BB#0: 1477 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1478 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1479 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1480 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1481 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1482 ; AVX1-NEXT: retq 1483 ; 1484 ; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 1485 ; AVX2: # BB#0: 1486 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1487 ; AVX2-NEXT: retq 1488 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 1489 ret <16 x i16> %shuffle 1490 } 1491 1492 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) { 1493 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 1494 ; AVX1: # BB#0: 1495 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1496 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1497 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 1498 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 1499 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1500 ; AVX1-NEXT: retq 1501 ; 1502 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 1503 ; AVX2: # BB#0: 1504 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17] 1505 ; AVX2-NEXT: retq 1506 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24> 1507 ret <16 x i16> %shuffle 1508 } 1509 1510 define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) { 1511 ; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 1512 ; AVX1: # BB#0: 1513 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1514 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1515 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 1516 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 1517 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1518 ; AVX1-NEXT: retq 1519 ; 1520 ; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 1521 ; AVX2: # BB#0: 1522 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 1523 ; AVX2-NEXT: retq 1524 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8> 1525 ret <16 x i16> %shuffle 1526 } 1527 1528 define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) { 1529 ; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 1530 ; AVX1: # BB#0: 1531 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1532 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1533 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1534 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1535 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1536 ; AVX1-NEXT: retq 1537 ; 1538 ; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 1539 ; AVX2: # BB#0: 1540 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1541 ; AVX2-NEXT: retq 1542 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 1543 ret <16 x i16> %shuffle 1544 } 1545 1546 define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) { 1547 ; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 1548 ; AVX1: # BB#0: 1549 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 1550 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 1551 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1552 ; AVX1-NEXT: retq 1553 ; 1554 ; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 1555 ; AVX2: # BB#0: 1556 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1557 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17] 1558 ; AVX2-NEXT: retq 1559 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16> 1560 ret <16 x i16> %shuffle 1561 } 1562 1563 define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) { 1564 ; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 1565 ; AVX1: # BB#0: 1566 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1567 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1568 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1569 ; AVX1-NEXT: retq 1570 ; 1571 ; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 1572 ; AVX2: # BB#0: 1573 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1574 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1575 ; AVX2-NEXT: retq 1576 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22> 1577 ret <16 x i16> %shuffle 1578 } 1579 1580 define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) { 1581 ; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 1582 ; AVX1: # BB#0: 1583 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1584 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1585 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] 1586 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 1587 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1588 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1589 ; AVX1-NEXT: retq 1590 ; 1591 ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 1592 ; AVX2: # BB#0: 1593 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1594 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1595 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] 1596 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 1597 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1598 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1599 ; AVX2-NEXT: retq 1600 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11> 1601 ret <16 x i16> %shuffle 1602 } 1603 1604 define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) { 1605 ; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 1606 ; AVX1: # BB#0: 1607 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1608 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 1609 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 1610 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 1611 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1612 ; AVX1-NEXT: retq 1613 ; 1614 ; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 1615 ; AVX2: # BB#0: 1616 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1617 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 1618 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 1619 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 1620 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1621 ; AVX2-NEXT: retq 1622 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9> 1623 ret <16 x i16> %shuffle 1624 } 1625 1626 define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) { 1627 ; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 1628 ; AVX1: # BB#0: 1629 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1630 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1631 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7] 1632 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 1633 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] 1634 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1635 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1636 ; AVX1-NEXT: retq 1637 ; 1638 ; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 1639 ; AVX2: # BB#0: 1640 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1641 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1642 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1643 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1644 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1645 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1646 ; AVX2-NEXT: retq 1647 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27> 1648 ret <16 x i16> %shuffle 1649 } 1650 1651 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1652 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 1653 ; AVX1: # BB#0: 1654 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1655 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1656 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 1657 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1658 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1659 ; AVX1-NEXT: retq 1660 ; 1661 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 1662 ; AVX2: # BB#0: 1663 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1664 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1665 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 1666 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 1667 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1668 ; AVX2-NEXT: retq 1669 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1670 ret <16 x i16> %shuffle 1671 } 1672 1673 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1674 ; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 1675 ; AVX1: # BB#0: 1676 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1677 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1678 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1679 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] 1680 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1681 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1682 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1683 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1684 ; AVX1-NEXT: retq 1685 ; 1686 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 1687 ; AVX2: # BB#0: 1688 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1689 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 1690 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1691 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] 1692 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1693 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1694 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1695 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1696 ; AVX2-NEXT: retq 1697 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 1698 ret <16 x i16> %shuffle 1699 } 1700 1701 define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) { 1702 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 1703 ; AVX1: # BB#0: 1704 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1705 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1706 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1707 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1708 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1709 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1710 ; AVX1-NEXT: retq 1711 ; 1712 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 1713 ; AVX2: # BB#0: 1714 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1715 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1716 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1717 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1718 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1719 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1720 ; AVX2-NEXT: retq 1721 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11> 1722 ret <16 x i16> %shuffle 1723 } 1724 1725 define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) { 1726 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 1727 ; AVX1: # BB#0: 1728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1729 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1730 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1731 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1732 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1733 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1734 ; AVX1-NEXT: retq 1735 ; 1736 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 1737 ; AVX2: # BB#0: 1738 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1739 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1740 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1741 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1742 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1743 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1744 ; AVX2-NEXT: retq 1745 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15> 1746 ret <16 x i16> %shuffle 1747 } 1748 1749 define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1750 ; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 1751 ; AVX1: # BB#0: 1752 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1753 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1754 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1755 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1756 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] 1757 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1758 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1759 ; AVX1-NEXT: retq 1760 ; 1761 ; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 1762 ; AVX2: # BB#0: 1763 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1764 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1765 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1766 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1767 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] 1768 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1769 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1770 ; AVX2-NEXT: retq 1771 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13> 1772 ret <16 x i16> %shuffle 1773 } 1774 1775 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1776 ; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 1777 ; AVX1: # BB#0: 1778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1779 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1780 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] 1781 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1782 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1783 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1784 ; AVX1-NEXT: retq 1785 ; 1786 ; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 1787 ; AVX2: # BB#0: 1788 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1789 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1790 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] 1791 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1792 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1793 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1794 ; AVX2-NEXT: retq 1795 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8> 1796 ret <16 x i16> %shuffle 1797 } 1798 1799 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1800 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 1801 ; AVX1: # BB#0: 1802 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1803 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1804 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1805 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] 1806 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1807 ; AVX1-NEXT: retq 1808 ; 1809 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 1810 ; AVX2: # BB#0: 1811 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1812 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1813 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1814 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] 1815 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1816 ; AVX2-NEXT: retq 1817 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 1818 ret <16 x i16> %shuffle 1819 } 1820 1821 define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1822 ; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 1823 ; AVX1: # BB#0: 1824 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1825 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1826 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 1827 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1828 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] 1829 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1830 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1831 ; AVX1-NEXT: retq 1832 ; 1833 ; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 1834 ; AVX2: # BB#0: 1835 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1836 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1837 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 1838 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1839 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] 1840 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1841 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1842 ; AVX2-NEXT: retq 1843 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13> 1844 ret <16 x i16> %shuffle 1845 } 1846 1847 define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) { 1848 ; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 1849 ; AVX1: # BB#0: 1850 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1851 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1852 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1853 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] 1854 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1855 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1856 ; AVX1-NEXT: retq 1857 ; 1858 ; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 1859 ; AVX2: # BB#0: 1860 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1861 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1862 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1863 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] 1864 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1865 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1866 ; AVX2-NEXT: retq 1867 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15> 1868 ret <16 x i16> %shuffle 1869 } 1870 1871 define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) { 1872 ; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 1873 ; AVX1: # BB#0: 1874 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1875 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 1876 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 1877 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] 1878 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1879 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1880 ; AVX1-NEXT: retq 1881 ; 1882 ; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 1883 ; AVX2: # BB#0: 1884 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1885 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 1886 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 1887 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] 1888 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1889 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 1890 ; AVX2-NEXT: retq 1891 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8> 1892 ret <16 x i16> %shuffle 1893 } 1894 1895 define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 1896 ; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 1897 ; AVX1: # BB#0: 1898 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1899 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1900 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] 1901 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1902 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 1903 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1904 ; AVX1-NEXT: retq 1905 ; 1906 ; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 1907 ; AVX2: # BB#0: 1908 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1909 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1910 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] 1911 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1912 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 1913 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1914 ; AVX2-NEXT: retq 1915 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8> 1916 ret <16 x i16> %shuffle 1917 } 1918 1919 define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 1920 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 1921 ; AVX1: # BB#0: 1922 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1923 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1924 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] 1925 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1926 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 1927 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1928 ; AVX1-NEXT: retq 1929 ; 1930 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 1931 ; AVX2: # BB#0: 1932 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1933 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1934 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] 1935 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1936 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 1937 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1938 ; AVX2-NEXT: retq 1939 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8> 1940 ret <16 x i16> %shuffle 1941 } 1942 1943 define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) { 1944 ; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 1945 ; AVX1: # BB#0: 1946 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1947 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1948 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] 1949 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1950 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 1951 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1952 ; AVX1-NEXT: retq 1953 ; 1954 ; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 1955 ; AVX2: # BB#0: 1956 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1957 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 1958 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] 1959 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1960 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 1961 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1962 ; AVX2-NEXT: retq 1963 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12> 1964 ret <16 x i16> %shuffle 1965 } 1966 1967 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) { 1968 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 1969 ; AVX1: # BB#0: 1970 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1971 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1972 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] 1973 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1974 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 1975 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1976 ; AVX1-NEXT: retq 1977 ; 1978 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 1979 ; AVX2: # BB#0: 1980 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1981 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1982 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] 1983 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1984 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 1985 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1986 ; AVX2-NEXT: retq 1987 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8> 1988 ret <16 x i16> %shuffle 1989 } 1990 1991 define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) { 1992 ; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 1993 ; AVX1: # BB#0: 1994 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1995 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1996 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] 1997 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1998 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 1999 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2000 ; AVX1-NEXT: retq 2001 ; 2002 ; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 2003 ; AVX2: # BB#0: 2004 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2005 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2006 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] 2007 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2008 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 2009 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2010 ; AVX2-NEXT: retq 2011 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12> 2012 ret <16 x i16> %shuffle 2013 } 2014 2015 define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2016 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2017 ; AVX1: # BB#0: 2018 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2019 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 2020 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2021 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2022 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2023 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2024 ; AVX1-NEXT: retq 2025 ; 2026 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2027 ; AVX2: # BB#0: 2028 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2029 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 2030 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2031 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2032 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2033 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2034 ; AVX2-NEXT: retq 2035 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11> 2036 ret <16 x i16> %shuffle 2037 } 2038 2039 define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2040 ; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2041 ; AVX1: # BB#0: 2042 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2043 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 2044 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2045 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2046 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2047 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2048 ; AVX1-NEXT: retq 2049 ; 2050 ; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2051 ; AVX2: # BB#0: 2052 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2053 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 2054 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2055 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2056 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2057 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2058 ; AVX2-NEXT: retq 2059 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11> 2060 ret <16 x i16> %shuffle 2061 } 2062 2063 define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) { 2064 ; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2065 ; AVX1: # BB#0: 2066 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2067 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 2068 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2069 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2070 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2071 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2072 ; AVX1-NEXT: retq 2073 ; 2074 ; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2075 ; AVX2: # BB#0: 2076 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2077 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 2078 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2079 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2080 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2081 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2082 ; AVX2-NEXT: retq 2083 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13> 2084 ret <16 x i16> %shuffle 2085 } 2086 2087 define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) { 2088 ; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2089 ; AVX1: # BB#0: 2090 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2091 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 2092 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2093 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 2094 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2095 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2096 ; AVX1-NEXT: retq 2097 ; 2098 ; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2099 ; AVX2: # BB#0: 2100 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2101 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 2102 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2103 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 2104 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2105 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2106 ; AVX2-NEXT: retq 2107 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11> 2108 ret <16 x i16> %shuffle 2109 } 2110 2111 define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2112 ; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2113 ; AVX1: # BB#0: 2114 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2115 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2116 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] 2117 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2118 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 2119 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2120 ; AVX1-NEXT: retq 2121 ; 2122 ; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2123 ; AVX2: # BB#0: 2124 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2125 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2126 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] 2127 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2128 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 2129 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2130 ; AVX2-NEXT: retq 2131 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2132 ret <16 x i16> %shuffle 2133 } 2134 2135 define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2136 ; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2137 ; AVX1: # BB#0: 2138 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2139 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2140 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] 2141 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2142 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 2143 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2144 ; AVX1-NEXT: retq 2145 ; 2146 ; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2147 ; AVX2: # BB#0: 2148 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2149 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2150 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] 2151 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2152 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 2153 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2154 ; AVX2-NEXT: retq 2155 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 2156 ret <16 x i16> %shuffle 2157 } 2158 2159 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2160 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2161 ; AVX1: # BB#0: 2162 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2163 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2164 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2165 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2166 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2167 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2168 ; AVX1-NEXT: retq 2169 ; 2170 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2171 ; AVX2: # BB#0: 2172 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2173 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2174 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2175 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2176 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2177 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2178 ; AVX2-NEXT: retq 2179 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 2180 ret <16 x i16> %shuffle 2181 } 2182 2183 define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 2184 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2185 ; AVX1: # BB#0: 2186 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2187 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2188 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] 2189 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2190 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 2191 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2192 ; AVX1-NEXT: retq 2193 ; 2194 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2195 ; AVX2: # BB#0: 2196 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2197 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 2198 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] 2199 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2200 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 2201 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2202 ; AVX2-NEXT: retq 2203 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8> 2204 ret <16 x i16> %shuffle 2205 } 2206 2207 define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 2208 ; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2209 ; AVX1: # BB#0: 2210 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2211 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2212 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 2213 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2214 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2215 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] 2216 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2217 ; AVX1-NEXT: retq 2218 ; 2219 ; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2220 ; AVX2: # BB#0: 2221 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2222 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2223 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 2224 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2225 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2226 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] 2227 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2228 ; AVX2-NEXT: retq 2229 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15> 2230 ret <16 x i16> %shuffle 2231 } 2232 2233 define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2234 ; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2235 ; AVX1: # BB#0: 2236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2237 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2238 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] 2239 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2240 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 2241 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2242 ; AVX1-NEXT: retq 2243 ; 2244 ; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2245 ; AVX2: # BB#0: 2246 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2247 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2248 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] 2249 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2250 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 2251 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2252 ; AVX2-NEXT: retq 2253 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2254 ret <16 x i16> %shuffle 2255 } 2256 2257 define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2258 ; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2259 ; AVX1: # BB#0: 2260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2261 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2262 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2263 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2264 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2265 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2266 ; AVX1-NEXT: retq 2267 ; 2268 ; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2269 ; AVX2: # BB#0: 2270 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2271 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2272 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2273 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2274 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2275 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2276 ; AVX2-NEXT: retq 2277 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12> 2278 ret <16 x i16> %shuffle 2279 } 2280 2281 define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2282 ; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 2283 ; AVX1: # BB#0: 2284 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2285 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2286 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2287 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2288 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2289 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2290 ; AVX1-NEXT: retq 2291 ; 2292 ; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 2293 ; AVX2: # BB#0: 2294 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2295 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2296 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2297 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2298 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2299 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2300 ; AVX2-NEXT: retq 2301 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 2302 ret <16 x i16> %shuffle 2303 } 2304 2305 define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2306 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 2307 ; AVX1: # BB#0: 2308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2309 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 2310 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2311 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2312 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2313 ; AVX1-NEXT: retq 2314 ; 2315 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 2316 ; AVX2: # BB#0: 2317 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31] 2318 ; AVX2-NEXT: retq 2319 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 2320 ret <16 x i16> %shuffle 2321 } 2322 2323 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 2324 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 2325 ; AVX1: # BB#0: 2326 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2327 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] 2328 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2329 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 2330 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2331 ; AVX1-NEXT: retq 2332 ; 2333 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 2334 ; AVX2: # BB#0: 2335 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2336 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2 2337 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2338 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 2339 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2340 ; AVX2-NEXT: retq 2341 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11> 2342 ret <16 x i16> %shuffle 2343 } 2344 2345 define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2346 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 2347 ; AVX1: # BB#0: 2348 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2349 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 2350 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2351 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2352 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2353 ; AVX1-NEXT: retq 2354 ; 2355 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 2356 ; AVX2: # BB#0: 2357 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19] 2358 ; AVX2-NEXT: retq 2359 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2360 ret <16 x i16> %shuffle 2361 } 2362 2363 define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 2364 ; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 2365 ; AVX1: # BB#0: 2366 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2367 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 2368 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2369 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2370 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2371 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2372 ; AVX1-NEXT: retq 2373 ; 2374 ; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 2375 ; AVX2: # BB#0: 2376 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2377 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 2378 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2379 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2380 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2381 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2382 ; AVX2-NEXT: retq 2383 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11> 2384 ret <16 x i16> %shuffle 2385 } 2386 2387 define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) { 2388 ; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 2389 ; AVX1: # BB#0: 2390 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2391 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] 2392 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2393 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 2394 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2395 ; AVX1-NEXT: retq 2396 ; 2397 ; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 2398 ; AVX2: # BB#0: 2399 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2400 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] 2401 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2402 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 2403 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2404 ; AVX2-NEXT: retq 2405 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15> 2406 ret <16 x i16> %shuffle 2407 } 2408 2409 define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) { 2410 ; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 2411 ; AVX1: # BB#0: 2412 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2413 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 2414 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2415 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 2416 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2417 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2418 ; AVX1-NEXT: retq 2419 ; 2420 ; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 2421 ; AVX2: # BB#0: 2422 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2423 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 2424 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2425 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 2426 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2427 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2428 ; AVX2-NEXT: retq 2429 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13> 2430 ret <16 x i16> %shuffle 2431 } 2432 2433 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 2434 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 2435 ; AVX1: # BB#0: 2436 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2437 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2438 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2439 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2440 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2441 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2442 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2443 ; AVX1-NEXT: retq 2444 ; 2445 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 2446 ; AVX2: # BB#0: 2447 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2448 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2449 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2450 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7] 2451 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7] 2452 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2453 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] 2454 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2455 ; AVX2-NEXT: retq 2456 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 2457 ret <16 x i16> %shuffle 2458 } 2459 2460 define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) { 2461 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 2462 ; AVX1: # BB#0: 2463 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2464 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2465 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] 2466 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 2467 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2468 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2469 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2470 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2471 ; AVX1-NEXT: retq 2472 ; 2473 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 2474 ; AVX2: # BB#0: 2475 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2476 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15] 2477 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2 2478 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2479 ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7] 2480 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2481 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 2482 ; AVX2-NEXT: retq 2483 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31> 2484 ret <16 x i16> %shuffle 2485 } 2486 2487 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 2488 ; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 2489 ; AVX1: # BB#0: 2490 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2491 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2492 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2493 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2494 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2495 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2496 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2497 ; AVX1-NEXT: retq 2498 ; 2499 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 2500 ; AVX2: # BB#0: 2501 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2502 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2503 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2504 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7] 2505 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7] 2506 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2507 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 2508 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2509 ; AVX2-NEXT: retq 2510 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 2511 ret <16 x i16> %shuffle 2512 } 2513 2514 define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) { 2515 ; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 2516 ; AVX1: # BB#0: 2517 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2518 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2519 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2520 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2521 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2522 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2523 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2524 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2525 ; AVX1-NEXT: retq 2526 ; 2527 ; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 2528 ; AVX2: # BB#0: 2529 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2530 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2531 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2532 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7] 2533 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2534 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2535 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2536 ; AVX2-NEXT: retq 2537 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27> 2538 ret <16 x i16> %shuffle 2539 } 2540 2541 define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 2542 ; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 2543 ; AVX1: # BB#0: 2544 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2545 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3] 2546 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2547 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 2548 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2549 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7] 2550 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 2551 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] 2552 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 2553 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 2554 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2555 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2556 ; AVX1-NEXT: retq 2557 ; 2558 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 2559 ; AVX2: # BB#0: 2560 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2561 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7] 2562 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 2563 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] 2564 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7] 2565 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] 2566 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2567 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 2568 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 2569 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2570 ; AVX2-NEXT: retq 2571 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31> 2572 ret <16 x i16> %shuffle 2573 } 2574 2575 define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) { 2576 ; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 2577 ; AVX1: # BB#0: 2578 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2579 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3] 2580 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2581 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 2582 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2583 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] 2584 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3] 2585 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 2586 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2587 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2588 ; AVX1-NEXT: retq 2589 ; 2590 ; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 2591 ; AVX2: # BB#0: 2592 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2593 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3] 2594 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4 2595 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] 2596 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2597 ; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 2598 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 2599 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 2600 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2601 ; AVX2-NEXT: retq 2602 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25> 2603 ret <16 x i16> %shuffle 2604 } 2605 2606 define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) { 2607 ; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 2608 ; AVX1: # BB#0: 2609 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2610 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2611 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11] 2612 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7] 2613 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 2614 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2615 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2616 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 2617 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 2618 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2619 ; AVX1-NEXT: retq 2620 ; 2621 ; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 2622 ; AVX2: # BB#0: 2623 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2624 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2625 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7] 2626 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5] 2627 ; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5] 2628 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2629 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23] 2630 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 2631 ; AVX2-NEXT: retq 2632 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26> 2633 ret <16 x i16> %shuffle 2634 } 2635 2636 define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) { 2637 ; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 2638 ; AVX1: # BB#0: 2639 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2640 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2641 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2642 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2643 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2644 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2645 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2646 ; AVX1-NEXT: retq 2647 ; 2648 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 2649 ; AVX2: # BB#0: 2650 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2651 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2652 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2653 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2654 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2655 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2656 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] 2657 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 2658 ; AVX2-NEXT: retq 2659 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11> 2660 ret <16 x i16> %shuffle 2661 } 2662 2663 define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) { 2664 ; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 2665 ; AVX1: # BB#0: 2666 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2667 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2668 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2669 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2670 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2671 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2672 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2673 ; AVX1-NEXT: retq 2674 ; 2675 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 2676 ; AVX2: # BB#0: 2677 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2678 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2679 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2680 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2681 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2682 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2683 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15] 2684 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 2685 ; AVX2-NEXT: retq 2686 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15> 2687 ret <16 x i16> %shuffle 2688 } 2689 2690 define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) { 2691 ; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 2692 ; AVX1: # BB#0: 2693 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2694 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7] 2695 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2696 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] 2697 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7] 2698 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] 2699 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2700 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7] 2701 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 2702 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2703 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2704 ; AVX1-NEXT: retq 2705 ; 2706 ; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 2707 ; AVX2: # BB#0: 2708 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2709 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2710 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 2711 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 2712 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2713 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 2714 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 2715 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2716 ; AVX2-NEXT: retq 2717 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31> 2718 ret <16 x i16> %shuffle 2719 } 2720 2721 define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2722 ; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 2723 ; AVX1: # BB#0: 2724 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 2725 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] 2726 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7] 2727 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2729 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 2730 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 2731 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 2732 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 2733 ; AVX1-NEXT: retq 2734 ; 2735 ; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 2736 ; AVX2: # BB#0: 2737 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15] 2738 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7] 2739 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15] 2740 ; AVX2-NEXT: retq 2741 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef> 2742 ret <16 x i16> %shuffle 2743 } 2744 2745 define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2746 ; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 2747 ; AVX1: # BB#0: 2748 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2749 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2750 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 2751 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 2752 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2753 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 2754 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 2755 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2756 ; AVX1-NEXT: retq 2757 ; 2758 ; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 2759 ; AVX2: # BB#0: 2760 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2761 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19] 2762 ; AVX2-NEXT: retq 2763 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2764 ret <16 x i16> %shuffle 2765 } 2766 2767 define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2768 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu: 2769 ; AVX1: # BB#0: 2770 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7] 2771 ; AVX1-NEXT: retq 2772 ; 2773 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu: 2774 ; AVX2: # BB#0: 2775 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7] 2776 ; AVX2-NEXT: retq 2777 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2778 ret <16 x i16> %shuffle 2779 } 2780 2781 define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2782 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 2783 ; AVX1: # BB#0: 2784 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2785 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2786 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 2787 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7] 2788 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 2789 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2791 ; AVX1-NEXT: retq 2792 ; 2793 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 2794 ; AVX2: # BB#0: 2795 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2796 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] 2797 ; AVX2-NEXT: retq 2798 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2799 ret <16 x i16> %shuffle 2800 } 2801 2802 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 2803 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 2804 ; AVX1: # BB#0: 2805 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2806 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2807 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2808 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7] 2809 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 2810 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2811 ; AVX1-NEXT: retq 2812 ; 2813 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 2814 ; AVX2: # BB#0: 2815 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2] 2816 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] 2817 ; AVX2-NEXT: retq 2818 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11> 2819 ret <16 x i16> %shuffle 2820 } 2821 2822 define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2823 ; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 2824 ; AVX1: # BB#0: 2825 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2826 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2827 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2828 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7] 2829 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 2830 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 2831 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2832 ; AVX1-NEXT: retq 2833 ; 2834 ; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 2835 ; AVX2: # BB#0: 2836 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] 2837 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15] 2838 ; AVX2-NEXT: retq 2839 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2840 ret <16 x i16> %shuffle 2841 } 2842 2843 define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 2844 ; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 2845 ; AVX1: # BB#0: 2846 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2847 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2848 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3] 2849 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7] 2850 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2851 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 2852 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 2853 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2854 ; AVX1-NEXT: retq 2855 ; 2856 ; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 2857 ; AVX2: # BB#0: 2858 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2859 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2860 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2861 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 2862 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2863 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2864 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2865 ; AVX2-NEXT: retq 2866 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11> 2867 ret <16 x i16> %shuffle 2868 } 2869 2870 define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) { 2871 ; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 2872 ; AVX1: # BB#0: 2873 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2874 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2875 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7] 2876 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 2877 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 2878 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2879 ; AVX1-NEXT: retq 2880 ; 2881 ; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 2882 ; AVX2: # BB#0: 2883 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 2884 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15] 2885 ; AVX2-NEXT: retq 2886 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15> 2887 ret <16 x i16> %shuffle 2888 } 2889 2890 define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) { 2891 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 2892 ; AVX1: # BB#0: 2893 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2894 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 2895 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2896 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7] 2897 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7] 2898 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7] 2899 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 2900 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 2901 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2902 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2903 ; AVX1-NEXT: retq 2904 ; 2905 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 2906 ; AVX2: # BB#0: 2907 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 2908 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 2909 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15] 2910 ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15] 2911 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] 2912 ; AVX2-NEXT: retq 2913 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25> 2914 ret <16 x i16> %shuffle 2915 } 2916 2917 define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) { 2918 ; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 2919 ; AVX1: # BB#0: 2920 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2921 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 2922 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2923 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2924 ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] 2925 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 2926 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2927 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 2928 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 2929 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2930 ; AVX1-NEXT: retq 2931 ; 2932 ; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 2933 ; AVX2: # BB#0: 2934 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21] 2935 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2936 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2937 ; AVX2-NEXT: retq 2938 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef> 2939 ret <16 x i16> %shuffle 2940 } 2941 2942 define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 2943 ; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 2944 ; AVX1: # BB#0: 2945 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2946 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2947 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 2948 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 2949 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 2950 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2951 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 2952 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2953 ; AVX1-NEXT: retq 2954 ; 2955 ; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 2956 ; AVX2: # BB#0: 2957 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15] 2958 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2959 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 2960 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2961 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2962 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2963 ; AVX2-NEXT: retq 2964 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12> 2965 ret <16 x i16> %shuffle 2966 } 2967 2968 define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 2969 ; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 2970 ; AVX1: # BB#0: 2971 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2972 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2973 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 2974 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 2975 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2976 ; AVX1-NEXT: retq 2977 ; 2978 ; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 2979 ; AVX2: # BB#0: 2980 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25] 2981 ; AVX2-NEXT: retq 2982 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 2983 ret <16 x i16> %shuffle 2984 } 2985 2986 define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 2987 ; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 2988 ; AVX1: # BB#0: 2989 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2990 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 2991 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2992 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2993 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2994 ; AVX1-NEXT: retq 2995 ; 2996 ; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 2997 ; AVX2: # BB#0: 2998 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2999 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3000 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3001 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3002 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3003 ; AVX2-NEXT: retq 3004 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12> 3005 ret <16 x i16> %shuffle 3006 } 3007 3008 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3009 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3010 ; AVX1: # BB#0: 3011 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3012 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3013 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3014 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3015 ; AVX1-NEXT: retq 3016 ; 3017 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3018 ; AVX2: # BB#0: 3019 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] 3020 ; AVX2-NEXT: retq 3021 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3022 ret <16 x i16> %shuffle 3023 } 3024 3025 define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3026 ; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3027 ; AVX1: # BB#0: 3028 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3029 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3030 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3031 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3032 ; AVX1-NEXT: retq 3033 ; 3034 ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3035 ; AVX2: # BB#0: 3036 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25] 3037 ; AVX2-NEXT: retq 3038 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3039 ret <16 x i16> %shuffle 3040 } 3041 3042 define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) { 3043 ; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3044 ; AVX1: # BB#0: 3045 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3046 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3047 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3048 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 3049 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 3050 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3051 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7] 3052 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3053 ; AVX1-NEXT: retq 3054 ; 3055 ; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3056 ; AVX2: # BB#0: 3057 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15] 3058 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3059 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3060 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3061 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3062 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3063 ; AVX2-NEXT: retq 3064 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10> 3065 ret <16 x i16> %shuffle 3066 } 3067 3068 define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 3069 ; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 3070 ; AVX1: # BB#0: 3071 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3072 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3073 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3074 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 3075 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3076 ; AVX1-NEXT: retq 3077 ; 3078 ; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 3079 ; AVX2: # BB#0: 3080 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21] 3081 ; AVX2-NEXT: retq 3082 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef> 3083 ret <16 x i16> %shuffle 3084 } 3085 3086 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) { 3087 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 3088 ; AVX1: # BB#0: 3089 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3090 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3091 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3092 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3093 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3094 ; AVX1-NEXT: retq 3095 ; 3096 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 3097 ; AVX2: # BB#0: 3098 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3099 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3100 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3101 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3102 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3103 ; AVX2-NEXT: retq 3104 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10> 3105 ret <16 x i16> %shuffle 3106 } 3107 3108 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 3109 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 3110 ; AVX1: # BB#0: 3111 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3112 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3113 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3114 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3115 ; AVX1-NEXT: retq 3116 ; 3117 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 3118 ; AVX2: # BB#0: 3119 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21] 3120 ; AVX2-NEXT: retq 3121 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef> 3122 ret <16 x i16> %shuffle 3123 } 3124 3125 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3126 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 3127 ; AVX1: # BB#0: 3128 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3129 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3130 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3131 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3132 ; AVX1-NEXT: retq 3133 ; 3134 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 3135 ; AVX2: # BB#0: 3136 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero 3137 ; AVX2-NEXT: retq 3138 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef> 3139 ret <16 x i16> %shuffle 3140 } 3141 3142 define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) { 3143 ; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 3144 ; AVX1: # BB#0: 3145 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3146 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3147 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3148 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3149 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 3150 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7] 3152 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3153 ; AVX1-NEXT: retq 3154 ; 3155 ; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 3156 ; AVX2: # BB#0: 3157 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 3158 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3159 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3160 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3161 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3162 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3163 ; AVX2-NEXT: retq 3164 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26> 3165 ret <16 x i16> %shuffle 3166 } 3167 3168 define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) { 3169 ; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 3170 ; AVX1: # BB#0: 3171 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3172 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3173 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3174 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 3175 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3176 ; AVX1-NEXT: retq 3177 ; 3178 ; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 3179 ; AVX2: # BB#0: 3180 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21] 3181 ; AVX2-NEXT: retq 3182 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef> 3183 ret <16 x i16> %shuffle 3184 } 3185 3186 define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) { 3187 ; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 3188 ; AVX1: # BB#0: 3189 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3190 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3191 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3192 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 3193 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 3194 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3195 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7] 3196 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3197 ; AVX1-NEXT: retq 3198 ; 3199 ; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 3200 ; AVX2: # BB#0: 3201 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15] 3202 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3203 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3204 ; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3205 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3206 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3207 ; AVX2-NEXT: retq 3208 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28> 3209 ret <16 x i16> %shuffle 3210 } 3211 3212 define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) { 3213 ; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 3214 ; AVX1: # BB#0: 3215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3216 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3217 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3218 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 3219 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3220 ; AVX1-NEXT: retq 3221 ; 3222 ; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 3223 ; AVX2: # BB#0: 3224 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25] 3225 ; AVX2-NEXT: retq 3226 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef> 3227 ret <16 x i16> %shuffle 3228 } 3229 3230 define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) { 3231 ; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 3232 ; AVX1: # BB#0: 3233 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3234 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 3235 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3236 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4] 3237 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3238 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 3239 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 3240 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3241 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3242 ; AVX1-NEXT: retq 3243 ; 3244 ; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 3245 ; AVX2: # BB#0: 3246 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15] 3247 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31] 3248 ; AVX2-NEXT: retq 3249 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef> 3250 ret <16 x i16> %shuffle 3251 } 3252 3253 define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) { 3254 ; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 3255 ; AVX1: # BB#0: 3256 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] 3257 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3258 ; AVX1-NEXT: retq 3259 ; 3260 ; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 3261 ; AVX2: # BB#0: 3262 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] 3263 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 3264 ; AVX2-NEXT: retq 3265 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 3266 ret <16 x i16> %shuffle 3267 } 3268 3269 define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) { 3270 ; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: 3271 ; AVX1: # BB#0: 3272 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3273 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 3274 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3275 ; AVX1-NEXT: retq 3276 ; 3277 ; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: 3278 ; AVX2: # BB#0: 3279 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 3280 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 3281 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 3282 ; AVX2-NEXT: retq 3283 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 3284 ret <16 x i16> %shuffle 3285 } 3286 3287 define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { 3288 ; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 3289 ; AVX1: # BB#0: 3290 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3291 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 3292 ; AVX1-NEXT: retq 3293 ; 3294 ; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 3295 ; AVX2: # BB#0: 3296 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 3297 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 3298 ; AVX2-NEXT: retq 3299 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 3300 ret <16 x i16> %shuffle 3301 } 3302 3303 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) { 3304 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector: 3305 ; ALL: # BB#0: 3306 ; ALL-NEXT: movzwl (%rdi), %eax 3307 ; ALL-NEXT: vmovd %eax, %xmm0 3308 ; ALL-NEXT: retq 3309 %val = load i16, i16* %ptr 3310 %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0 3311 ret <16 x i16> %i0 3312 } 3313 3314 define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) { 3315 ; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: 3316 ; AVX1: # BB#0: 3317 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 3318 ; AVX1-NEXT: retq 3319 ; 3320 ; AVX2-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: 3321 ; AVX2: # BB#0: 3322 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 3323 ; AVX2-NEXT: retq 3324 %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3325 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3326 %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3327 ret <16 x i16> %shuf 3328 } 3329 3330 define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) { 3331 ; ALL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc: 3332 ; ALL: # BB#0: 3333 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 3334 ; ALL-NEXT: retq 3335 %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3336 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3337 %bc0hi = bitcast <8 x i16> %ahi to <16 x i8> 3338 %bc1hi = bitcast <8 x i16> %bhi to <16 x i8> 3339 %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 3340 %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16> 3341 ret <16 x i16> %shuffle16 3342 } 3343 3344 define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) { 3345 ; AVX1-LABEL: insert_dup_mem_v16i16_i32: 3346 ; AVX1: # BB#0: 3347 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3348 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 3349 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3350 ; AVX1-NEXT: retq 3351 ; 3352 ; AVX2-LABEL: insert_dup_mem_v16i16_i32: 3353 ; AVX2: # BB#0: 3354 ; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 3355 ; AVX2-NEXT: retq 3356 %tmp = load i32, i32* %ptr, align 4 3357 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 3358 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 3359 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer 3360 ret <16 x i16> %tmp3 3361 } 3362 3363 define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) { 3364 ; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16: 3365 ; AVX1: # BB#0: 3366 ; AVX1-NEXT: movswl (%rdi), %eax 3367 ; AVX1-NEXT: vmovd %eax, %xmm0 3368 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 3369 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3370 ; AVX1-NEXT: retq 3371 ; 3372 ; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16: 3373 ; AVX2: # BB#0: 3374 ; AVX2-NEXT: movswl (%rdi), %eax 3375 ; AVX2-NEXT: vmovd %eax, %xmm0 3376 ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 3377 ; AVX2-NEXT: retq 3378 %tmp = load i16, i16* %ptr, align 2 3379 %tmp1 = sext i16 %tmp to i32 3380 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 3381 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 3382 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer 3383 ret <16 x i16> %tmp4 3384 } 3385 3386 define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 { 3387 ; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32: 3388 ; AVX1: # BB#0: 3389 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3390 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 3391 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3392 ; AVX1-NEXT: retq 3393 ; 3394 ; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32: 3395 ; AVX2: # BB#0: 3396 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0 3397 ; AVX2-NEXT: retq 3398 %tmp = load i32, i32* %ptr, align 4 3399 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 3400 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 3401 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3402 ret <16 x i16> %tmp3 3403 } 3404 3405 define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 { 3406 ; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32: 3407 ; AVX1: # BB#0: 3408 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3409 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 3410 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3411 ; AVX1-NEXT: retq 3412 ; 3413 ; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32: 3414 ; AVX2: # BB#0: 3415 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0 3416 ; AVX2-NEXT: retq 3417 %tmp = load i32, i32* %ptr, align 4 3418 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 3419 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 3420 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 3421 ret <16 x i16> %tmp3 3422 } 3423