1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 2 ; CHECK: LCP 3 ; CHECK: .long 2 4 ; CHECK: .long 5 5 ; CHECK: .long 0 6 ; CHECK: .long 0 7 ; CHECK: .long 7 8 ; CHECK: .long 0 9 ; CHECK: .long 10 10 ; CHECK: .long 1 11 ; CHECK: .long 0 12 ; CHECK: .long 5 13 ; CHECK: .long 0 14 ; CHECK: .long 4 15 ; CHECK: .long 7 16 ; CHECK: .long 0 17 ; CHECK: .long 10 18 ; CHECK: .long 1 19 ; CHECK-LABEL: test1: 20 ; CHECK: vpermps 21 ; CHECK: ret 22 define <16 x float> @test1(<16 x float> %a) nounwind { 23 %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1> 24 ret <16 x float> %c 25 } 26 27 ; CHECK-LABEL: test2: 28 ; CHECK: vpermd 29 ; CHECK: ret 30 define <16 x i32> @test2(<16 x i32> %a) nounwind { 31 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1> 32 ret <16 x i32> %c 33 } 34 35 ; CHECK-LABEL: test3: 36 ; CHECK: vpermq 37 ; CHECK: ret 38 define <8 x i64> @test3(<8 x i64> %a) nounwind { 39 %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1> 40 ret <8 x i64> %c 41 } 42 43 ; CHECK-LABEL: test4: 44 ; CHECK: vpermpd 45 ; CHECK: ret 46 define <8 x double> @test4(<8 x double> %a) nounwind { 47 %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 48 ret <8 x double> %c 49 } 50 51 ; CHECK-LABEL: test5: 52 ; CHECK: vpermt2pd 53 ; CHECK: ret 54 define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind { 55 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5> 56 ret <8 x double> %c 57 } 58 59 ; The reg variant of vpermt2 with a writemask 60 ; CHECK-LABEL: test5m: 61 ; CHECK: vpermt2pd {{.* {%k[1-7]} {z}}} 62 define <8 x double> @test5m(<8 x double> %a, <8 x double> %b, i8 %mask) nounwind { 63 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5> 64 %m = bitcast i8 %mask to <8 x i1> 65 %res = select <8 x i1> %m, <8 x double> %c, <8 x double> zeroinitializer 66 ret <8 x double> %res 67 } 68 69 ; CHECK-LABEL: test6: 70 ; CHECK: vpermq $30 71 ; CHECK: ret 72 define <8 x i64> @test6(<8 x i64> %a) nounwind { 73 %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4> 74 ret <8 x i64> %c 75 } 76 77 ; CHECK-LABEL: test7: 78 ; CHECK: vpermt2q 79 ; CHECK: ret 80 define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind { 81 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5> 82 ret <8 x i64> %c 83 } 84 85 ; The reg variant of vpermt2 with a writemask 86 ; CHECK-LABEL: test7m: 87 ; CHECK: vpermt2q {{.* {%k[1-7]} {z}}} 88 define <8 x i64> @test7m(<8 x i64> %a, <8 x i64> %b, i8 %mask) nounwind { 89 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5> 90 %m = bitcast i8 %mask to <8 x i1> 91 %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer 92 ret <8 x i64> %res 93 } 94 95 ; The mem variant of vpermt2 with a writemask 96 ; CHECK-LABEL: test7mm: 97 ; CHECK: vpermt2q {{\(.*\).* {%k[1-7]} {z}}} 98 define <8 x i64> @test7mm(<8 x i64> %a, <8 x i64> *%pb, i8 %mask) nounwind { 99 %b = load <8 x i64>* %pb 100 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5> 101 %m = bitcast i8 %mask to <8 x i1> 102 %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer 103 ret <8 x i64> %res 104 } 105 106 ; CHECK-LABEL: test8: 107 ; CHECK: vpermt2d 108 ; CHECK: ret 109 define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind { 110 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 111 ret <16 x i32> %c 112 } 113 114 ; The reg variant of vpermt2 with a writemask 115 ; CHECK-LABEL: test8m: 116 ; CHECK: vpermt2d {{.* {%k[1-7]} {z}}} 117 define <16 x i32> @test8m(<16 x i32> %a, <16 x i32> %b, i16 %mask) nounwind { 118 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 119 %m = bitcast i16 %mask to <16 x i1> 120 %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer 121 ret <16 x i32> %res 122 } 123 124 ; The mem variant of vpermt2 with a writemask 125 ; CHECK-LABEL: test8mm: 126 ; CHECK: vpermt2d {{\(.*\).* {%k[1-7]} {z}}} 127 define <16 x i32> @test8mm(<16 x i32> %a, <16 x i32> *%pb, i16 %mask) nounwind { 128 %b = load <16 x i32> * %pb 129 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 130 %m = bitcast i16 %mask to <16 x i1> 131 %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer 132 ret <16 x i32> %res 133 } 134 135 ; CHECK-LABEL: test9: 136 ; CHECK: vpermt2ps 137 ; CHECK: ret 138 define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind { 139 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 140 ret <16 x float> %c 141 } 142 143 ; The reg variant of vpermt2 with a writemask 144 ; CHECK-LABEL: test9m: 145 ; CHECK: vpermt2ps {{.*}} {%k{{.}}} {z} 146 define <16 x float> @test9m(<16 x float> %a, <16 x float> %b, i16 %mask) nounwind { 147 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 148 %m = bitcast i16 %mask to <16 x i1> 149 %res = select <16 x i1> %m, <16 x float> %c, <16 x float> zeroinitializer 150 ret <16 x float> %res 151 } 152 153 ; CHECK-LABEL: test10: 154 ; CHECK: vpermt2ps ( 155 ; CHECK: ret 156 define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind { 157 %c = load <16 x float>* %b 158 %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 159 ret <16 x float> %d 160 } 161 162 ; CHECK-LABEL: test11: 163 ; CHECK: vpermt2d 164 ; CHECK: ret 165 define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind { 166 %c = load <16 x i32>* %b 167 %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24> 168 ret <16 x i32> %d 169 } 170 171 ; CHECK-LABEL: test12 172 ; CHECK: vmovlhps {{.*}}## encoding: [0x62 173 ; CHECK: ret 174 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) nounwind { 175 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 176 ret <4 x i32> %c 177 } 178 179 ; CHECK-LABEL: test13 180 ; CHECK: vpermilps $-79, %zmm 181 ; CHECK: ret 182 define <16 x float> @test13(<16 x float> %a) { 183 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 184 ret <16 x float> %b 185 } 186 187 ; CHECK-LABEL: test14 188 ; CHECK: vpermilpd $-53, %zmm 189 ; CHECK: ret 190 define <8 x double> @test14(<8 x double> %a) { 191 %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7> 192 ret <8 x double> %b 193 } 194 195 ; CHECK-LABEL: test15 196 ; CHECK: vpshufd $-79, %zmm 197 ; CHECK: ret 198 define <16 x i32> @test15(<16 x i32> %a) { 199 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 200 ret <16 x i32> %b 201 } 202 ; CHECK-LABEL: test16 203 ; CHECK: valignq $2, %zmm0, %zmm1 204 ; CHECK: ret 205 define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind { 206 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> 207 ret <8 x double> %c 208 } 209 210 ; CHECK-LABEL: test17 211 ; CHECK: vshufpd $19, %zmm1, %zmm0 212 ; CHECK: ret 213 define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind { 214 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef> 215 ret <8 x double> %c 216 } 217 218 ; CHECK-LABEL: test18 219 ; CHECK: vpunpckhdq %zmm 220 ; CHECK: ret 221 define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) { 222 %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15, i32 18, i32 26, i32 19, i32 27, i32 22, i32 30, i32 23, i32 31> 223 ret <16 x i32> %b 224 } 225 226 ; CHECK-LABEL: test19 227 ; CHECK: vpunpckldq %zmm 228 ; CHECK: ret 229 define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) { 230 %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29> 231 ret <16 x i32> %b 232 } 233 234 ; CHECK-LABEL: test20 235 ; CHECK: vpunpckhqdq %zmm 236 ; CHECK: ret 237 define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) { 238 %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 5, i32 3, i32 7, i32 9, i32 13, i32 11, i32 15> 239 ret <8 x i64> %b 240 } 241 242 ; CHECK-LABEL: test21 243 ; CHECK: vunpcklps %zmm 244 ; CHECK: ret 245 define <16 x float> @test21(<16 x float> %a, <16 x float> %c) { 246 %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29> 247 ret <16 x float> %b 248 } 249 250 ; CHECK-LABEL: test22 251 ; CHECK: vmovhlps {{.*}}## encoding: [0x62 252 ; CHECK: ret 253 define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) nounwind { 254 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 255 ret <4 x i32> %c 256 } 257 258 ; CHECK-LABEL: @test23 259 ; CHECK: vshufps $-112, %zmm 260 ; CHECK: ret 261 define <16 x float> @test23(<16 x float> %a, <16 x float> %c) { 262 %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30> 263 ret <16 x float> %b 264 } 265 266 ; CHECK-LABEL: @test24 267 ; CHECK: vpermt2d 268 ; CHECK: ret 269 define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind { 270 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 271 ret <16 x i32> %c 272 } 273 274 ; CHECK-LABEL: @test25 275 ; CHECK: vshufps $52 276 ; CHECK: ret 277 define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind { 278 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 undef, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef> 279 ret <16 x i32> %c 280 } 281 282 ; CHECK-LABEL: @test26 283 ; CHECK: vmovshdup 284 ; CHECK: ret 285 define <16 x i32> @test26(<16 x i32> %a) nounwind { 286 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef> 287 ret <16 x i32> %c 288 } 289 290 ; CHECK-LABEL: @test27 291 ; CHECK: ret 292 define <16 x i32> @test27(<4 x i32>%a) { 293 %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 294 ret <16 x i32> %res 295 } 296 297 ; CHECK-LABEL: @test28 298 ; CHECK: vinserti64x4 $1 299 ; CHECK: ret 300 define <16 x i32> @test28(<16 x i32>%x, <16 x i32>%y) { 301 %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 302 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 303 ret <16 x i32> %res 304 } 305 306 ; CHECK-LABEL: @test29 307 ; CHECK: vinserti64x4 $0 308 ; CHECK: ret 309 define <16 x i32> @test29(<16 x i32>%x, <16 x i32>%y) { 310 %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, 311 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 312 ret <16 x i32> %res 313 } 314 315