1 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ 2 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s 3 4 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ 5 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s 6 7 @uca = global <16 x i8> zeroinitializer, align 16 8 @ucb = global <16 x i8> zeroinitializer, align 16 9 @sca = global <16 x i8> zeroinitializer, align 16 10 @scb = global <16 x i8> zeroinitializer, align 16 11 @usa = global <8 x i16> zeroinitializer, align 16 12 @usb = global <8 x i16> zeroinitializer, align 16 13 @ssa = global <8 x i16> zeroinitializer, align 16 14 @ssb = global <8 x i16> zeroinitializer, align 16 15 @uia = global <4 x i32> zeroinitializer, align 16 16 @uib = global <4 x i32> zeroinitializer, align 16 17 @sia = global <4 x i32> zeroinitializer, align 16 18 @sib = global <4 x i32> zeroinitializer, align 16 19 @ulla = global <2 x i64> zeroinitializer, align 16 20 @ullb = global <2 x i64> zeroinitializer, align 16 21 @slla = global <2 x i64> zeroinitializer, align 16 22 @sllb = global <2 x i64> zeroinitializer, align 16 23 @uxa = global <1 x i128> zeroinitializer, align 16 24 @uxb = global <1 x i128> zeroinitializer, align 16 25 @sxa = global <1 x i128> zeroinitializer, align 16 26 @sxb = global <1 x i128> zeroinitializer, align 16 27 @vfa = global <4 x float> zeroinitializer, align 16 28 @vfb = global <4 x float> zeroinitializer, align 16 29 @vda = global <2 x double> zeroinitializer, align 16 30 @vdb = global <2 x double> zeroinitializer, align 16 31 32 define void @_Z4testv() { 33 entry: 34 ; CHECK-LABEL: @_Z4testv 35 %0 = load <16 x i8>, <16 x i8>* @uca, align 16 36 %1 = load <16 x i8>, <16 x i8>* @ucb, align 16 37 %add.i = add <16 x i8> %1, %0 38 tail call void (...) @sink(<16 x i8> %add.i) 39 ; CHECK: lxvx 34, 0, 3 40 ; CHECK: lxvx 35, 0, 4 41 ; CHECK: vaddubm 2, 3, 2 42 ; CHECK: stxv 34, 43 ; CHECK: bl sink 44 %2 = load <16 x i8>, <16 x i8>* @sca, align 16 45 %3 = load <16 x i8>, <16 x i8>* @scb, align 16 46 %add.i22 = add <16 x i8> %3, %2 47 tail call void (...) @sink(<16 x i8> %add.i22) 48 ; CHECK: lxvx 34, 0, 3 49 ; CHECK: lxvx 35, 0, 4 50 ; CHECK: vaddubm 2, 3, 2 51 ; CHECK: stxv 34, 52 ; CHECK: bl sink 53 %4 = load <8 x i16>, <8 x i16>* @usa, align 16 54 %5 = load <8 x i16>, <8 x i16>* @usb, align 16 55 %add.i21 = add <8 x i16> %5, %4 56 tail call void (...) @sink(<8 x i16> %add.i21) 57 ; CHECK: lxvx 34, 0, 3 58 ; CHECK: lxvx 35, 0, 4 59 ; CHECK: vadduhm 2, 3, 2 60 ; CHECK: stxv 34, 61 ; CHECK: bl sink 62 %6 = load <8 x i16>, <8 x i16>* @ssa, align 16 63 %7 = load <8 x i16>, <8 x i16>* @ssb, align 16 64 %add.i20 = add <8 x i16> %7, %6 65 tail call void (...) @sink(<8 x i16> %add.i20) 66 ; CHECK: lxvx 34, 0, 3 67 ; CHECK: lxvx 35, 0, 4 68 ; CHECK: vadduhm 2, 3, 2 69 ; CHECK: stxv 34, 70 ; CHECK: bl sink 71 %8 = load <4 x i32>, <4 x i32>* @uia, align 16 72 %9 = load <4 x i32>, <4 x i32>* @uib, align 16 73 %add.i19 = add <4 x i32> %9, %8 74 tail call void (...) @sink(<4 x i32> %add.i19) 75 ; CHECK: lxvx 34, 0, 3 76 ; CHECK: lxvx 35, 0, 4 77 ; CHECK: vadduwm 2, 3, 2 78 ; CHECK: stxv 34, 79 ; CHECK: bl sink 80 %10 = load <4 x i32>, <4 x i32>* @sia, align 16 81 %11 = load <4 x i32>, <4 x i32>* @sib, align 16 82 %add.i18 = add <4 x i32> %11, %10 83 tail call void (...) @sink(<4 x i32> %add.i18) 84 ; CHECK: lxvx 34, 0, 3 85 ; CHECK: lxvx 35, 0, 4 86 ; CHECK: vadduwm 2, 3, 2 87 ; CHECK: stxv 34, 88 ; CHECK: bl sink 89 %12 = load <2 x i64>, <2 x i64>* @ulla, align 16 90 %13 = load <2 x i64>, <2 x i64>* @ullb, align 16 91 %add.i17 = add <2 x i64> %13, %12 92 tail call void (...) @sink(<2 x i64> %add.i17) 93 ; CHECK: lxvx 34, 0, 3 94 ; CHECK: lxvx 35, 0, 4 95 ; CHECK: vaddudm 2, 3, 2 96 ; CHECK: stxv 34, 97 ; CHECK: bl sink 98 %14 = load <2 x i64>, <2 x i64>* @slla, align 16 99 %15 = load <2 x i64>, <2 x i64>* @sllb, align 16 100 %add.i16 = add <2 x i64> %15, %14 101 tail call void (...) @sink(<2 x i64> %add.i16) 102 ; CHECK: lxvx 34, 0, 3 103 ; CHECK: lxvx 35, 0, 4 104 ; CHECK: vaddudm 2, 3, 2 105 ; CHECK: stxv 34, 106 ; CHECK: bl sink 107 %16 = load <1 x i128>, <1 x i128>* @uxa, align 16 108 %17 = load <1 x i128>, <1 x i128>* @uxb, align 16 109 %add.i15 = add <1 x i128> %17, %16 110 tail call void (...) @sink(<1 x i128> %add.i15) 111 ; CHECK: lxvx 34, 0, 3 112 ; CHECK: lxvx 35, 0, 4 113 ; CHECK: vadduqm 2, 3, 2 114 ; CHECK: stxv 34, 115 ; CHECK: bl sink 116 %18 = load <1 x i128>, <1 x i128>* @sxa, align 16 117 %19 = load <1 x i128>, <1 x i128>* @sxb, align 16 118 %add.i14 = add <1 x i128> %19, %18 119 tail call void (...) @sink(<1 x i128> %add.i14) 120 ; CHECK: lxvx 34, 0, 3 121 ; CHECK: lxvx 35, 0, 4 122 ; CHECK: vadduqm 2, 3, 2 123 ; CHECK: stxv 34, 124 ; CHECK: bl sink 125 %20 = load <4 x float>, <4 x float>* @vfa, align 16 126 %21 = load <4 x float>, <4 x float>* @vfb, align 16 127 %add.i13 = fadd <4 x float> %20, %21 128 tail call void (...) @sink(<4 x float> %add.i13) 129 ; CHECK: lxvx 0, 0, 3 130 ; CHECK: lxvx 1, 0, 4 131 ; CHECK: xvaddsp 34, 0, 1 132 ; CHECK: stxv 34, 133 ; CHECK: bl sink 134 %22 = load <2 x double>, <2 x double>* @vda, align 16 135 %23 = load <2 x double>, <2 x double>* @vdb, align 16 136 %add.i12 = fadd <2 x double> %22, %23 137 tail call void (...) @sink(<2 x double> %add.i12) 138 ; CHECK: lxvx 0, 0, 3 139 ; CHECK: lxvx 1, 0, 4 140 ; CHECK: xvadddp 0, 0, 1 141 ; CHECK: stxv 0, 142 ; CHECK: bl sink 143 ret void 144 } 145 146 ; Function Attrs: nounwind readnone 147 define <4 x float> @testXVIEXPSP(<4 x i32> %a, <4 x i32> %b) { 148 entry: 149 %0 = tail call <4 x float> @llvm.ppc.vsx.xviexpsp(<4 x i32> %a, <4 x i32> %b) 150 ret <4 x float> %0 151 ; CHECK-LABEL: testXVIEXPSP 152 ; CHECK: xviexpsp 34, 34, 35 153 ; CHECK: blr 154 } 155 ; Function Attrs: nounwind readnone 156 declare <4 x float> @llvm.ppc.vsx.xviexpsp(<4 x i32>, <4 x i32>) 157 158 ; Function Attrs: nounwind readnone 159 define <2 x double> @testXVIEXPDP(<2 x i64> %a, <2 x i64> %b) { 160 entry: 161 %0 = tail call <2 x double> @llvm.ppc.vsx.xviexpdp(<2 x i64> %a, <2 x i64> %b) 162 ret <2 x double> %0 163 ; CHECK-LABEL: testXVIEXPDP 164 ; CHECK: xviexpdp 34, 34, 35 165 ; CHECK: blr 166 } 167 ; Function Attrs: nounwind readnone 168 declare <2 x double> @llvm.ppc.vsx.xviexpdp(<2 x i64>, <2 x i64>) 169 170 define <16 x i8> @testVSLV(<16 x i8> %a, <16 x i8> %b) { 171 entry: 172 %0 = tail call <16 x i8> @llvm.ppc.altivec.vslv(<16 x i8> %a, <16 x i8> %b) 173 ret <16 x i8> %0 174 ; CHECK-LABEL: testVSLV 175 ; CHECK: vslv 2, 2, 3 176 ; CHECK: blr 177 } 178 ; Function Attrs: nounwind readnone 179 declare <16 x i8> @llvm.ppc.altivec.vslv(<16 x i8>, <16 x i8>) 180 181 ; Function Attrs: nounwind readnone 182 define <16 x i8> @testVSRV(<16 x i8> %a, <16 x i8> %b) { 183 entry: 184 %0 = tail call <16 x i8> @llvm.ppc.altivec.vsrv(<16 x i8> %a, <16 x i8> %b) 185 ret <16 x i8> %0 186 ; CHECK-LABEL: testVSRV 187 ; CHECK: vsrv 2, 2, 3 188 ; CHECK: blr 189 } 190 ; Function Attrs: nounwind readnone 191 declare <16 x i8> @llvm.ppc.altivec.vsrv(<16 x i8>, <16 x i8>) 192 193 ; Function Attrs: nounwind readnone 194 define <8 x i16> @testXVCVSPHP(<4 x float> %a) { 195 entry: 196 ; CHECK-LABEL: testXVCVSPHP 197 ; CHECK: xvcvsphp 34, 34 198 ; CHECK: blr 199 %0 = tail call <4 x float> @llvm.ppc.vsx.xvcvsphp(<4 x float> %a) 200 %1 = bitcast <4 x float> %0 to <8 x i16> 201 ret <8 x i16> %1 202 } 203 204 ; Function Attrs: nounwind readnone 205 define <4 x i32> @testVRLWMI(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 206 entry: 207 ; CHECK-LABEL: testVRLWMI 208 ; CHECK: vrlwmi 3, 2, 4 209 ; CHECK: blr 210 %0 = tail call <4 x i32> @llvm.ppc.altivec.vrlwmi(<4 x i32> %a, <4 x i32> %c, <4 x i32> %b) 211 ret <4 x i32> %0 212 } 213 214 ; Function Attrs: nounwind readnone 215 define <2 x i64> @testVRLDMI(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { 216 entry: 217 ; CHECK-LABEL: testVRLDMI 218 ; CHECK: vrldmi 3, 2, 4 219 ; CHECK: blr 220 %0 = tail call <2 x i64> @llvm.ppc.altivec.vrldmi(<2 x i64> %a, <2 x i64> %c, <2 x i64> %b) 221 ret <2 x i64> %0 222 } 223 224 ; Function Attrs: nounwind readnone 225 define <4 x i32> @testVRLWNM(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 226 entry: 227 %0 = tail call <4 x i32> @llvm.ppc.altivec.vrlwnm(<4 x i32> %a, <4 x i32> %b) 228 %and.i = and <4 x i32> %0, %c 229 ret <4 x i32> %and.i 230 ; CHECK-LABEL: testVRLWNM 231 ; CHECK: vrlwnm 2, 2, 3 232 ; CHECK: xxland 34, 34, 36 233 ; CHECK: blr 234 } 235 236 ; Function Attrs: nounwind readnone 237 define <2 x i64> @testVRLDNM(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { 238 entry: 239 %0 = tail call <2 x i64> @llvm.ppc.altivec.vrldnm(<2 x i64> %a, <2 x i64> %b) 240 %and.i = and <2 x i64> %0, %c 241 ret <2 x i64> %and.i 242 ; CHECK-LABEL: testVRLDNM 243 ; CHECK: vrldnm 2, 2, 3 244 ; CHECK: xxland 34, 34, 36 245 ; CHECK: blr 246 } 247 248 ; Function Attrs: nounwind readnone 249 declare <4 x float> @llvm.ppc.vsx.xvcvsphp(<4 x float>) 250 251 ; Function Attrs: nounwind readnone 252 declare <4 x i32> @llvm.ppc.altivec.vrlwmi(<4 x i32>, <4 x i32>, <4 x i32>) 253 254 ; Function Attrs: nounwind readnone 255 declare <2 x i64> @llvm.ppc.altivec.vrldmi(<2 x i64>, <2 x i64>, <2 x i64>) 256 257 ; Function Attrs: nounwind readnone 258 declare <4 x i32> @llvm.ppc.altivec.vrlwnm(<4 x i32>, <4 x i32>) 259 260 ; Function Attrs: nounwind readnone 261 declare <2 x i64> @llvm.ppc.altivec.vrldnm(<2 x i64>, <2 x i64>) 262 263 define <4 x i32> @testXVXEXPSP(<4 x float> %a) { 264 entry: 265 %0 = tail call <4 x i32> @llvm.ppc.vsx.xvxexpsp(<4 x float> %a) 266 ret <4 x i32> %0 267 ; CHECK-LABEL: testXVXEXPSP 268 ; CHECK: xvxexpsp 34, 34 269 ; CHECK: blr 270 } 271 ; Function Attrs: nounwind readnone 272 declare <4 x i32> @llvm.ppc.vsx.xvxexpsp(<4 x float>) 273 274 ; Function Attrs: nounwind readnone 275 define <2 x i64> @testXVXEXPDP(<2 x double> %a) { 276 entry: 277 %0 = tail call <2 x i64> @llvm.ppc.vsx.xvxexpdp(<2 x double> %a) 278 ret <2 x i64> %0 279 ; CHECK-LABEL: testXVXEXPDP 280 ; CHECK: xvxexpdp 34, 34 281 ; CHECK: blr 282 } 283 ; Function Attrs: nounwind readnone 284 declare <2 x i64>@llvm.ppc.vsx.xvxexpdp(<2 x double>) 285 286 ; Function Attrs: nounwind readnone 287 define <4 x i32> @testXVXSIGSP(<4 x float> %a) { 288 entry: 289 %0 = tail call <4 x i32> @llvm.ppc.vsx.xvxsigsp(<4 x float> %a) 290 ret <4 x i32> %0 291 ; CHECK-LABEL: testXVXSIGSP 292 ; CHECK: xvxsigsp 34, 34 293 ; CHECK: blr 294 } 295 ; Function Attrs: nounwind readnone 296 declare <4 x i32> @llvm.ppc.vsx.xvxsigsp(<4 x float>) 297 298 ; Function Attrs: nounwind readnone 299 define <2 x i64> @testXVXSIGDP(<2 x double> %a) { 300 entry: 301 %0 = tail call <2 x i64> @llvm.ppc.vsx.xvxsigdp(<2 x double> %a) 302 ret <2 x i64> %0 303 ; CHECK-LABEL: testXVXSIGDP 304 ; CHECK: xvxsigdp 34, 34 305 ; CHECK: blr 306 } 307 ; Function Attrs: nounwind readnone 308 declare <2 x i64> @llvm.ppc.vsx.xvxsigdp(<2 x double>) 309 310 ; Function Attrs: nounwind readnone 311 define <4 x i32> @testXVTSTDCSP(<4 x float> %a) { 312 entry: 313 %0 = tail call <4 x i32> @llvm.ppc.vsx.xvtstdcsp(<4 x float> %a, i32 127) 314 ret <4 x i32> %0 315 ; CHECK-LABEL: testXVTSTDCSP 316 ; CHECK: xvtstdcsp 34, 34, 127 317 ; CHECK: blr 318 } 319 ; Function Attrs: nounwind readnone 320 declare <4 x i32> @llvm.ppc.vsx.xvtstdcsp(<4 x float> %a, i32 %b) 321 322 ; Function Attrs: nounwind readnone 323 define <2 x i64> @testXVTSTDCDP(<2 x double> %a) { 324 entry: 325 %0 = tail call <2 x i64> @llvm.ppc.vsx.xvtstdcdp(<2 x double> %a, i32 127) 326 ret <2 x i64> %0 327 ; CHECK-LABEL: testXVTSTDCDP 328 ; CHECK: xvtstdcdp 34, 34, 127 329 ; CHECK: blr 330 } 331 ; Function Attrs: nounwind readnone 332 declare <2 x i64> @llvm.ppc.vsx.xvtstdcdp(<2 x double> %a, i32 %b) 333 334 define <4 x float> @testXVCVHPSP(<8 x i16> %a) { 335 entry: 336 %0 = tail call <4 x float>@llvm.ppc.vsx.xvcvhpsp(<8 x i16> %a) 337 ret <4 x float> %0 338 ; CHECK-LABEL: testXVCVHPSP 339 ; CHECK: xvcvhpsp 34, 34 340 ; CHECK: blr 341 } 342 ; Function Attrs: nounwind readnone 343 declare <4 x float>@llvm.ppc.vsx.xvcvhpsp(<8 x i16>) 344 345 ; Function Attrs: nounwind readnone 346 define <4 x i32> @testLXVL(i8* %a, i64 %b) { 347 entry: 348 %0 = tail call <4 x i32> @llvm.ppc.vsx.lxvl(i8* %a, i64 %b) 349 ret <4 x i32> %0 350 ; CHECK-LABEL: testLXVL 351 ; CHECK: lxvl 34, 3, 4 352 ; CHECK: blr 353 } 354 ; Function Attrs: nounwind readnone 355 declare <4 x i32> @llvm.ppc.vsx.lxvl(i8*, i64) 356 357 define void @testSTXVL(<4 x i32> %a, i8* %b, i64 %c) { 358 entry: 359 tail call void @llvm.ppc.vsx.stxvl(<4 x i32> %a, i8* %b, i64 %c) 360 ret void 361 ; CHECK-LABEL: testSTXVL 362 ; CHECK: stxvl 34, 5, 6 363 ; CHECK: blr 364 } 365 ; Function Attrs: nounwind readnone 366 declare void @llvm.ppc.vsx.stxvl(<4 x i32>, i8*, i64) 367 368 ; Function Attrs: nounwind readnone 369 define <4 x i32> @testLXVLL(i8* %a, i64 %b) { 370 entry: 371 %0 = tail call <4 x i32> @llvm.ppc.vsx.lxvll(i8* %a, i64 %b) 372 ret <4 x i32> %0 373 ; CHECK-LABEL: testLXVLL 374 ; CHECK: lxvll 34, 3, 4 375 ; CHECK: blr 376 } 377 ; Function Attrs: nounwind readnone 378 declare <4 x i32> @llvm.ppc.vsx.lxvll(i8*, i64) 379 380 define void @testSTXVLL(<4 x i32> %a, i8* %b, i64 %c) { 381 entry: 382 tail call void @llvm.ppc.vsx.stxvll(<4 x i32> %a, i8* %b, i64 %c) 383 ret void 384 ; CHECK-LABEL: testSTXVLL 385 ; CHECK: stxvll 34, 5, 6 386 ; CHECK: blr 387 } 388 ; Function Attrs: nounwind readnone 389 declare void @llvm.ppc.vsx.stxvll(<4 x i32>, i8*, i64) 390 391 define <4 x i32> @test0(<4 x i32> %a) local_unnamed_addr #0 { 392 entry: 393 %sub.i = sub <4 x i32> zeroinitializer, %a 394 ret <4 x i32> %sub.i 395 396 ; CHECK-LABEL: @test0 397 ; CHECK: vnegw 2, 2 398 ; CHECK: blr 399 400 } 401 402 define <2 x i64> @test1(<2 x i64> %a) local_unnamed_addr #0 { 403 entry: 404 %sub.i = sub <2 x i64> zeroinitializer, %a 405 ret <2 x i64> %sub.i 406 407 ; CHECK-LABEL: @test1 408 ; CHECK: vnegd 2, 2 409 ; CHECK: blr 410 411 } 412 413 declare void @sink(...) 414 415 ; stack object should be accessed using D-form load/store instead of X-form 416 define signext i32 @func1() { 417 ; CHECK-LABEL: @func1 418 ; CHECK-NOT: stxvx 419 ; CHECK: stxv {{[0-9]+}}, {{[0-9]+}}(1) 420 ; CHECK-NOT: stxvx 421 ; CHECK: blr 422 entry: 423 %a = alloca [4 x i32], align 4 424 %0 = bitcast [4 x i32]* %a to i8* 425 call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 16, i1 false) 426 %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 0 427 %call = call signext i32 @callee(i32* nonnull %arraydecay) #3 428 ret i32 %call 429 } 430 431 ; stack object should be accessed using D-form load/store instead of X-form 432 define signext i32 @func2() { 433 ; CHECK-LABEL: @func2 434 ; CHECK-NOT: stxvx 435 ; CHECK: stxv [[ZEROREG:[0-9]+]], {{[0-9]+}}(1) 436 ; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1) 437 ; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1) 438 ; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1) 439 ; CHECK-NOT: stxvx 440 ; CHECK: blr 441 entry: 442 %a = alloca [16 x i32], align 4 443 %0 = bitcast [16 x i32]* %a to i8* 444 call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 64, i1 false) 445 %arraydecay = getelementptr inbounds [16 x i32], [16 x i32]* %a, i64 0, i64 0 446 %call = call signext i32 @callee(i32* nonnull %arraydecay) #3 447 ret i32 %call 448 } 449 450 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 451 declare signext i32 @callee(i32*) local_unnamed_addr #2 452