1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present 2 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx -o /dev/null 4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null 8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null 9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null 10 11 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 12 ; Force the execution domain with an add. 13 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 14 %x = and <4 x i64> %a2, %b 15 ret <4 x i64> %x 16 } 17 18 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 19 ; Force the execution domain with an add. 20 %a2 = add <2 x i64> %a, <i64 1, i64 1> 21 %x = and <2 x i64> %a2, %b 22 ret <2 x i64> %x 23 } 24 25 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 26 ; Force the execution domain with an add. 27 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 28 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1> 29 %x = and <4 x i64> %a, %y 30 ret <4 x i64> %x 31 } 32 33 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 34 ; Force the execution domain with an add. 35 %a2 = add <2 x i64> %a, <i64 1, i64 1> 36 %y = xor <2 x i64> %a2, <i64 -1, i64 -1> 37 %x = and <2 x i64> %a, %y 38 ret <2 x i64> %x 39 } 40 41 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 42 ; Force the execution domain with an add. 43 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 44 %x = or <4 x i64> %a2, %b 45 ret <4 x i64> %x 46 } 47 48 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 49 ; Force the execution domain with an add. 50 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 51 %x = xor <4 x i64> %a2, %b 52 ret <4 x i64> %x 53 } 54 55 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 56 ; Force the execution domain with an add. 57 %a2 = add <2 x i64> %a, <i64 1, i64 1> 58 %x = or <2 x i64> %a2, %b 59 ret <2 x i64> %x 60 } 61 62 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 63 ; Force the execution domain with an add. 64 %a2 = add <2 x i64> %a, <i64 1, i64 1> 65 %x = xor <2 x i64> %a2, %b 66 ret <2 x i64> %x 67 } 68 69 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 70 %x = add <4 x i64> %i, %j 71 ret <4 x i64> %x 72 } 73 74 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 75 %x = add <8 x i32> %i, %j 76 ret <8 x i32> %x 77 } 78 79 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 80 %x = add <16 x i16> %i, %j 81 ret <16 x i16> %x 82 } 83 84 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 85 %x = add <32 x i8> %i, %j 86 ret <32 x i8> %x 87 } 88 89 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 90 %x = sub <4 x i64> %i, %j 91 ret <4 x i64> %x 92 } 93 94 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 95 %x = sub <8 x i32> %i, %j 96 ret <8 x i32> %x 97 } 98 99 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 100 %x = sub <16 x i16> %i, %j 101 ret <16 x i16> %x 102 } 103 104 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 105 %x = sub <32 x i8> %i, %j 106 ret <32 x i8> %x 107 } 108 109 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 110 %x = mul <16 x i16> %i, %j 111 ret <16 x i16> %x 112 } 113 114 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 115 %bincmp = icmp slt <8 x i32> %i, %j 116 %x = sext <8 x i1> %bincmp to <8 x i32> 117 ret <8 x i32> %x 118 } 119 120 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 121 %bincmp = icmp eq <32 x i8> %i, %j 122 %x = sext <32 x i1> %bincmp to <32 x i8> 123 ret <32 x i8> %x 124 } 125 126 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 127 %bincmp = icmp eq <16 x i16> %i, %j 128 %x = sext <16 x i1> %bincmp to <16 x i16> 129 ret <16 x i16> %x 130 } 131 132 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 133 %bincmp = icmp slt <32 x i8> %i, %j 134 %x = sext <32 x i1> %bincmp to <32 x i8> 135 ret <32 x i8> %x 136 } 137 138 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 139 %bincmp = icmp slt <16 x i16> %i, %j 140 %x = sext <16 x i1> %bincmp to <16 x i16> 141 ret <16 x i16> %x 142 } 143 144 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 145 %bincmp = icmp eq <8 x i32> %i, %j 146 %x = sext <8 x i1> %bincmp to <8 x i32> 147 ret <8 x i32> %x 148 } 149 150 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 151 %x = add <2 x i64> %i, %j 152 ret <2 x i64> %x 153 } 154 155 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 156 %x = add <4 x i32> %i, %j 157 ret <4 x i32> %x 158 } 159 160 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 161 %x = add <8 x i16> %i, %j 162 ret <8 x i16> %x 163 } 164 165 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 166 %x = add <16 x i8> %i, %j 167 ret <16 x i8> %x 168 } 169 170 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 171 %x = sub <2 x i64> %i, %j 172 ret <2 x i64> %x 173 } 174 175 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 176 %x = sub <4 x i32> %i, %j 177 ret <4 x i32> %x 178 } 179 180 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 181 %x = sub <8 x i16> %i, %j 182 ret <8 x i16> %x 183 } 184 185 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 186 %x = sub <16 x i8> %i, %j 187 ret <16 x i8> %x 188 } 189 190 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 191 %x = mul <8 x i16> %i, %j 192 ret <8 x i16> %x 193 } 194 195 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 196 %bincmp = icmp slt <8 x i16> %i, %j 197 %x = sext <8 x i1> %bincmp to <8 x i16> 198 ret <8 x i16> %x 199 } 200 201 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 202 %bincmp = icmp slt <16 x i8> %i, %j 203 %x = sext <16 x i1> %bincmp to <16 x i8> 204 ret <16 x i8> %x 205 } 206 207 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 208 %bincmp = icmp eq <8 x i16> %i, %j 209 %x = sext <8 x i1> %bincmp to <8 x i16> 210 ret <8 x i16> %x 211 } 212 213 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 214 %bincmp = icmp eq <16 x i8> %i, %j 215 %x = sext <16 x i1> %bincmp to <16 x i8> 216 ret <16 x i8> %x 217 } 218 219 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) { 220 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 221 ret <8 x i16> %shuffle 222 } 223 224 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) { 225 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 226 ret <16 x i16> %shuffle 227 } 228 229 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) { 230 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 231 ret <16 x i8> %shuffle 232 } 233 234 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) { 235 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 236 ret <32 x i8> %shuffle 237 } 238 239 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) { 240 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 241 ret <2 x i64> %shuffle 242 } 243 244 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) { 245 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2> 246 ret <4 x i32> %shuffle 247 } 248 249 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) { 250 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 251 ret <8 x i32> %shuffle 252 } 253 254 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 255 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 256 ret <4 x double> %shuffle 257 } 258 259 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 260 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 261 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 262 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 263 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 264 ret <2 x double> %bitcast64 265 } 266 267 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { 268 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> 269 ret <16 x i16> %shuffle 270 } 271 272 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 273 %r1 = extractelement <2 x i64> %x, i32 0 274 %r2 = extractelement <2 x i64> %x, i32 1 275 store i64 %r2, i64* %dst, align 1 276 ret i64 %r1 277 } 278 279 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 280 %r1 = extractelement <4 x i32> %x, i32 1 281 %r2 = extractelement <4 x i32> %x, i32 3 282 store i32 %r2, i32* %dst, align 1 283 ret i32 %r1 284 } 285 286 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 287 %r1 = extractelement <8 x i16> %x, i32 1 288 %r2 = extractelement <8 x i16> %x, i32 3 289 store i16 %r2, i16* %dst, align 1 290 ret i16 %r1 291 } 292 293 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 294 %r1 = extractelement <16 x i8> %x, i32 1 295 %r2 = extractelement <16 x i8> %x, i32 3 296 store i8 %r2, i8* %dst, align 1 297 ret i8 %r1 298 } 299 300 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 301 %val = load i64, i64* %ptr 302 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 303 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3 304 ret <2 x i64> %r2 305 } 306 307 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 308 %val = load i32, i32* %ptr 309 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 310 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 311 ret <4 x i32> %r2 312 } 313 314 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 315 %val = load i16, i16* %ptr 316 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 317 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 318 ret <8 x i16> %r2 319 } 320 321 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 322 %val = load i8, i8* %ptr 323 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 324 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 325 ret <16 x i8> %r2 326 } 327 328 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { 329 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 330 ret <4 x i32> %shuffle 331 } 332 333 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { 334 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 335 ret <4 x i32> %shuffle 336 } 337 338 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 339 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 340 ret <16 x i8> %shuffle 341 } 342 343 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 344 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 345 ret <16 x i16> %shuffle 346 } 347 348 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 349 ; vmovshdup 256 test 350 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 351 ret <8 x float> %shuffle 352 } 353 354 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { 355 ; vmovshdup 128 test 356 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 357 ret <4 x float> %shuffle 358 } 359 360 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 361 ; vmovsldup 256 test 362 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 363 ret <8 x float> %shuffle 364 } 365 366 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { 367 ; vmovsldup 128 test 368 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 369 ret <4 x float> %shuffle 370 } 371 372 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 373 %a = load double, double* %ptr 374 %v = insertelement <2 x double> undef, double %a, i32 0 375 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 376 ret <2 x double> %shuffle 377 } 378 379 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 380 %a = load double, double* %ptr 381 %v = insertelement <2 x double> undef, double %a, i32 0 382 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 383 ret <2 x double> %shuffle 384 } 385 386 define void @store_floats(<4 x float> %x, i64* %p) { 387 %a = fadd <4 x float> %x, %x 388 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 389 %c = bitcast <2 x float> %b to i64 390 store i64 %c, i64* %p 391 ret void 392 } 393 394 define void @store_double(<2 x double> %x, i64* %p) { 395 %a = fadd <2 x double> %x, %x 396 %b = extractelement <2 x double> %a, i32 0 397 %c = bitcast double %b to i64 398 store i64 %c, i64* %p 399 ret void 400 } 401 402 define void @store_h_double(<2 x double> %x, i64* %p) { 403 %a = fadd <2 x double> %x, %x 404 %b = extractelement <2 x double> %a, i32 1 405 %c = bitcast double %b to i64 406 store i64 %c, i64* %p 407 ret void 408 } 409 410 define <2 x double> @test39(double* %ptr) nounwind { 411 %a = load double, double* %ptr 412 %v = insertelement <2 x double> undef, double %a, i32 0 413 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 414 ret <2 x double> %shuffle 415 } 416 417 define <2 x double> @test40(<2 x double>* %ptr) nounwind { 418 %v = load <2 x double>, <2 x double>* %ptr 419 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 420 ret <2 x double> %shuffle 421 } 422 423 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 424 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 425 ret <2 x double> %shuffle 426 } 427 428 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 429 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 430 ret <4 x double> %shuffle 431 } 432 433 define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) { 434 %shift = ashr <8 x i32> %a, %b 435 ret <8 x i32> %shift 436 } 437 438 define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) { 439 %shift = lshr <8 x i32> %a, %b 440 ret <8 x i32> %shift 441 } 442 443 define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) { 444 %shift = shl <8 x i32> %a, %b 445 ret <8 x i32> %shift 446 } 447 448 define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) { 449 %shift = ashr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 450 ret <8 x i32> %shift 451 } 452 453 define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) { 454 %shift = lshr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 455 ret <8 x i32> %shift 456 } 457 458 define <8 x i32> @shl_const_v8i32(<8 x i32> %a) { 459 %shift = shl <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 460 ret <8 x i32> %shift 461 } 462 463 define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) { 464 %shift = ashr <4 x i64> %a, %b 465 ret <4 x i64> %shift 466 } 467 468 define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) { 469 %shift = lshr <4 x i64> %a, %b 470 ret <4 x i64> %shift 471 } 472 473 define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) { 474 %shift = shl <4 x i64> %a, %b 475 ret <4 x i64> %shift 476 } 477 478 define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) { 479 %shift = ashr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3> 480 ret <4 x i64> %shift 481 } 482 483 define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) { 484 %shift = lshr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3> 485 ret <4 x i64> %shift 486 } 487 488 define <4 x i64> @shl_const_v4i64(<4 x i64> %a) { 489 %shift = shl <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3> 490 ret <4 x i64> %shift 491 } 492 493 define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) { 494 %shift = ashr <16 x i16> %a, %b 495 ret <16 x i16> %shift 496 } 497 498 define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) { 499 %shift = lshr <16 x i16> %a, %b 500 ret <16 x i16> %shift 501 } 502 503 define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) { 504 %shift = shl <16 x i16> %a, %b 505 ret <16 x i16> %shift 506 } 507 508 define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) { 509 %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 510 ret <16 x i16> %shift 511 } 512 513 define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) { 514 %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 515 ret <16 x i16> %shift 516 } 517 518 define <16 x i16> @shl_const_v16i16(<16 x i16> %a) { 519 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 520 ret <16 x i16> %shift 521 } 522 523 define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) { 524 %shift = ashr <4 x i32> %a, %b 525 ret <4 x i32> %shift 526 } 527 528 define <4 x i32> @shl_const_v4i32(<4 x i32> %a) { 529 %shift = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 530 ret <4 x i32> %shift 531 } 532 533 define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) { 534 %shift = ashr <2 x i64> %a, %b 535 ret <2 x i64> %shift 536 } 537 538 define <2 x i64> @shl_const_v2i64(<2 x i64> %a) { 539 %shift = shl <2 x i64> %a, <i64 3, i64 3> 540 ret <2 x i64> %shift 541 } 542 543 define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) { 544 %shift = ashr <8 x i16> %a, %b 545 ret <8 x i16> %shift 546 } 547 548 define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) { 549 %shift = lshr <8 x i16> %a, %b 550 ret <8 x i16> %shift 551 } 552 553 define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) { 554 %shift = shl <8 x i16> %a, %b 555 ret <8 x i16> %shift 556 } 557 558 define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) { 559 %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 560 ret <8 x i16> %shift 561 } 562 563 define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) { 564 %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 565 ret <8 x i16> %shift 566 } 567 568 define <8 x i16> @shl_const_v8i16(<8 x i16> %a) { 569 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 570 ret <8 x i16> %shift 571 } 572 573 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 574 entry: 575 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 576 %C = zext <8 x i8> %B to <8 x i16> 577 ret <8 x i16> %C 578 } 579 580 define <32 x i8> @_broadcast32xi8(i8 %a) { 581 %b = insertelement <32 x i8> undef, i8 %a, i32 0 582 %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 583 ret <32 x i8> %c 584 } 585 586 define <16 x i8> @_broadcast16xi8(i8 %a) { 587 %b = insertelement <16 x i8> undef, i8 %a, i32 0 588 %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer 589 ret <16 x i8> %c 590 } 591 592 define <16 x i16> @_broadcast16xi16(i16 %a) { 593 %b = insertelement <16 x i16> undef, i16 %a, i32 0 594 %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 595 ret <16 x i16> %c 596 } 597 598 define <8 x i16> @_broadcast8xi16(i16 %a) { 599 %b = insertelement <8 x i16> undef, i16 %a, i32 0 600 %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer 601 ret <8 x i16> %c 602 } 603 604 define <8 x i32> @_broadcast8xi32(i32 %a) { 605 %b = insertelement <8 x i32> undef, i32 %a, i32 0 606 %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 607 ret <8 x i32> %c 608 } 609 610 define <4 x i32> @_broadcast4xi32(i32 %a) { 611 %b = insertelement <4 x i32> undef, i32 %a, i32 0 612 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer 613 ret <4 x i32> %c 614 } 615 616 define <4 x i64> @_broadcast4xi64(i64 %a) { 617 %b = insertelement <4 x i64> undef, i64 %a, i64 0 618 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 619 ret <4 x i64> %c 620 } 621 622 define <2 x i64> @_broadcast2xi64(i64 %a) { 623 %b = insertelement <2 x i64> undef, i64 %a, i64 0 624 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer 625 ret <2 x i64> %c 626 } 627 628 define <8 x float> @_broadcast8xfloat(float %a) { 629 %b = insertelement <8 x float> undef, float %a, i32 0 630 %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer 631 ret <8 x float> %c 632 } 633 634 define <4 x float> @_broadcast4xfloat(float %a) { 635 %b = insertelement <4 x float> undef, float %a, i32 0 636 %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 637 ret <4 x float> %c 638 } 639 640 define <4 x double> @_broadcast4xdouble(double %a) { 641 %b = insertelement <4 x double> undef, double %a, i32 0 642 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 643 ret <4 x double> %c 644 } 645 646 define <2 x double> @_broadcast2xdouble(double %a) { 647 %b = insertelement <2 x double> undef, double %a, i32 0 648 %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer 649 ret <2 x double> %c 650 } 651 652 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 653 %x = fmul <4 x float> %a0, %a1 654 %res = fsub <4 x float> %x, %a2 655 ret <4 x float> %res 656 } 657 658 define <32 x i8> @test_cmpgtb(<32 x i8> %A) { 659 ; generate the follow code 660 ; vpxor %ymm1, %ymm1, %ymm1 661 ; vpcmpgtb %ymm0, %ymm1, %ymm0 662 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 663 ret <32 x i8> %B 664 } 665 666 define <4 x float> @_inreg4xfloat(float %a) { 667 %b = insertelement <4 x float> undef, float %a, i32 0 668 %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 669 ret <4 x float> %c 670 } 671 672 define <8 x float> @_inreg8xfloat(float %a) { 673 %b = insertelement <8 x float> undef, float %a, i32 0 674 %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer 675 ret <8 x float> %c 676 } 677 678 define <4 x double> @_inreg4xdouble(double %a) { 679 %b = insertelement <4 x double> undef, double %a, i32 0 680 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 681 ret <4 x double> %c 682 } 683