Home | History | Annotate | Download | only in PowerPC
      1 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
      2 ; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
      3 
      4 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
      5 ; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
      6 
      7 @uca = global <16 x i8> zeroinitializer, align 16
      8 @ucb = global <16 x i8> zeroinitializer, align 16
      9 @sca = global <16 x i8> zeroinitializer, align 16
     10 @scb = global <16 x i8> zeroinitializer, align 16
     11 @usa = global <8 x i16> zeroinitializer, align 16
     12 @usb = global <8 x i16> zeroinitializer, align 16
     13 @ssa = global <8 x i16> zeroinitializer, align 16
     14 @ssb = global <8 x i16> zeroinitializer, align 16
     15 @uia = global <4 x i32> zeroinitializer, align 16
     16 @uib = global <4 x i32> zeroinitializer, align 16
     17 @sia = global <4 x i32> zeroinitializer, align 16
     18 @sib = global <4 x i32> zeroinitializer, align 16
     19 @ulla = global <2 x i64> zeroinitializer, align 16
     20 @ullb = global <2 x i64> zeroinitializer, align 16
     21 @slla = global <2 x i64> zeroinitializer, align 16
     22 @sllb = global <2 x i64> zeroinitializer, align 16
     23 @uxa = global <1 x i128> zeroinitializer, align 16
     24 @uxb = global <1 x i128> zeroinitializer, align 16
     25 @sxa = global <1 x i128> zeroinitializer, align 16
     26 @sxb = global <1 x i128> zeroinitializer, align 16
     27 @vfa = global <4 x float> zeroinitializer, align 16
     28 @vfb = global <4 x float> zeroinitializer, align 16
     29 @vda = global <2 x double> zeroinitializer, align 16
     30 @vdb = global <2 x double> zeroinitializer, align 16
     31 
     32 define void @_Z4testv() {
     33 entry:
     34 ; CHECK-LABEL: @_Z4testv
     35   %0 = load <16 x i8>, <16 x i8>* @uca, align 16
     36   %1 = load <16 x i8>, <16 x i8>* @ucb, align 16
     37   %add.i = add <16 x i8> %1, %0
     38   tail call void (...) @sink(<16 x i8> %add.i)
     39 ; CHECK: lxvx 34, 0, 3
     40 ; CHECK: lxvx 35, 0, 4
     41 ; CHECK: vaddubm 2, 3, 2
     42 ; CHECK: stxv 34,
     43 ; CHECK: bl sink
     44   %2 = load <16 x i8>, <16 x i8>* @sca, align 16
     45   %3 = load <16 x i8>, <16 x i8>* @scb, align 16
     46   %add.i22 = add <16 x i8> %3, %2
     47   tail call void (...) @sink(<16 x i8> %add.i22)
     48 ; CHECK: lxvx 34, 0, 3
     49 ; CHECK: lxvx 35, 0, 4
     50 ; CHECK: vaddubm 2, 3, 2
     51 ; CHECK: stxv 34,
     52 ; CHECK: bl sink
     53   %4 = load <8 x i16>, <8 x i16>* @usa, align 16
     54   %5 = load <8 x i16>, <8 x i16>* @usb, align 16
     55   %add.i21 = add <8 x i16> %5, %4
     56   tail call void (...) @sink(<8 x i16> %add.i21)
     57 ; CHECK: lxvx 34, 0, 3
     58 ; CHECK: lxvx 35, 0, 4
     59 ; CHECK: vadduhm 2, 3, 2
     60 ; CHECK: stxv 34,
     61 ; CHECK: bl sink
     62   %6 = load <8 x i16>, <8 x i16>* @ssa, align 16
     63   %7 = load <8 x i16>, <8 x i16>* @ssb, align 16
     64   %add.i20 = add <8 x i16> %7, %6
     65   tail call void (...) @sink(<8 x i16> %add.i20)
     66 ; CHECK: lxvx 34, 0, 3
     67 ; CHECK: lxvx 35, 0, 4
     68 ; CHECK: vadduhm 2, 3, 2
     69 ; CHECK: stxv 34,
     70 ; CHECK: bl sink
     71   %8 = load <4 x i32>, <4 x i32>* @uia, align 16
     72   %9 = load <4 x i32>, <4 x i32>* @uib, align 16
     73   %add.i19 = add <4 x i32> %9, %8
     74   tail call void (...) @sink(<4 x i32> %add.i19)
     75 ; CHECK: lxvx 34, 0, 3
     76 ; CHECK: lxvx 35, 0, 4
     77 ; CHECK: vadduwm 2, 3, 2
     78 ; CHECK: stxv 34,
     79 ; CHECK: bl sink
     80   %10 = load <4 x i32>, <4 x i32>* @sia, align 16
     81   %11 = load <4 x i32>, <4 x i32>* @sib, align 16
     82   %add.i18 = add <4 x i32> %11, %10
     83   tail call void (...) @sink(<4 x i32> %add.i18)
     84 ; CHECK: lxvx 34, 0, 3
     85 ; CHECK: lxvx 35, 0, 4
     86 ; CHECK: vadduwm 2, 3, 2
     87 ; CHECK: stxv 34,
     88 ; CHECK: bl sink
     89   %12 = load <2 x i64>, <2 x i64>* @ulla, align 16
     90   %13 = load <2 x i64>, <2 x i64>* @ullb, align 16
     91   %add.i17 = add <2 x i64> %13, %12
     92   tail call void (...) @sink(<2 x i64> %add.i17)
     93 ; CHECK: lxvx 34, 0, 3
     94 ; CHECK: lxvx 35, 0, 4
     95 ; CHECK: vaddudm 2, 3, 2
     96 ; CHECK: stxv 34,
     97 ; CHECK: bl sink
     98   %14 = load <2 x i64>, <2 x i64>* @slla, align 16
     99   %15 = load <2 x i64>, <2 x i64>* @sllb, align 16
    100   %add.i16 = add <2 x i64> %15, %14
    101   tail call void (...) @sink(<2 x i64> %add.i16)
    102 ; CHECK: lxvx 34, 0, 3
    103 ; CHECK: lxvx 35, 0, 4
    104 ; CHECK: vaddudm 2, 3, 2
    105 ; CHECK: stxv 34,
    106 ; CHECK: bl sink
    107   %16 = load <1 x i128>, <1 x i128>* @uxa, align 16
    108   %17 = load <1 x i128>, <1 x i128>* @uxb, align 16
    109   %add.i15 = add <1 x i128> %17, %16
    110   tail call void (...) @sink(<1 x i128> %add.i15)
    111 ; CHECK: lxvx 34, 0, 3
    112 ; CHECK: lxvx 35, 0, 4
    113 ; CHECK: vadduqm 2, 3, 2
    114 ; CHECK: stxv 34,
    115 ; CHECK: bl sink
    116   %18 = load <1 x i128>, <1 x i128>* @sxa, align 16
    117   %19 = load <1 x i128>, <1 x i128>* @sxb, align 16
    118   %add.i14 = add <1 x i128> %19, %18
    119   tail call void (...) @sink(<1 x i128> %add.i14)
    120 ; CHECK: lxvx 34, 0, 3
    121 ; CHECK: lxvx 35, 0, 4
    122 ; CHECK: vadduqm 2, 3, 2
    123 ; CHECK: stxv 34,
    124 ; CHECK: bl sink
    125   %20 = load <4 x float>, <4 x float>* @vfa, align 16
    126   %21 = load <4 x float>, <4 x float>* @vfb, align 16
    127   %add.i13 = fadd <4 x float> %20, %21
    128   tail call void (...) @sink(<4 x float> %add.i13)
    129 ; CHECK: lxvx 0, 0, 3
    130 ; CHECK: lxvx 1, 0, 4
    131 ; CHECK: xvaddsp 34, 0, 1
    132 ; CHECK: stxv 34,
    133 ; CHECK: bl sink
    134   %22 = load <2 x double>, <2 x double>* @vda, align 16
    135   %23 = load <2 x double>, <2 x double>* @vdb, align 16
    136   %add.i12 = fadd <2 x double> %22, %23
    137   tail call void (...) @sink(<2 x double> %add.i12)
    138 ; CHECK: lxvx 0, 0, 3
    139 ; CHECK: lxvx 1, 0, 4
    140 ; CHECK: xvadddp 0, 0, 1
    141 ; CHECK: stxv 0,
    142 ; CHECK: bl sink
    143   ret void
    144 }
    145 
    146 ; Function Attrs: nounwind readnone
    147 define <4 x float> @testXVIEXPSP(<4 x i32> %a, <4 x i32> %b) {
    148 entry:
    149   %0 = tail call <4 x float> @llvm.ppc.vsx.xviexpsp(<4 x i32> %a, <4 x i32> %b)
    150   ret <4 x float> %0
    151 ; CHECK-LABEL: testXVIEXPSP
    152 ; CHECK: xviexpsp 34, 34, 35
    153 ; CHECK: blr
    154 }
    155 ; Function Attrs: nounwind readnone
    156 declare <4 x float> @llvm.ppc.vsx.xviexpsp(<4 x i32>, <4 x i32>)
    157 
    158 ; Function Attrs: nounwind readnone
    159 define <2 x double> @testXVIEXPDP(<2 x i64> %a, <2 x i64> %b) {
    160 entry:
    161   %0 = tail call <2 x double> @llvm.ppc.vsx.xviexpdp(<2 x i64> %a, <2 x i64> %b)
    162   ret <2 x double> %0
    163 ; CHECK-LABEL: testXVIEXPDP
    164 ; CHECK: xviexpdp 34, 34, 35
    165 ; CHECK: blr
    166 }
    167 ; Function Attrs: nounwind readnone
    168 declare <2 x double> @llvm.ppc.vsx.xviexpdp(<2 x i64>, <2 x i64>)
    169 
    170 define <16 x i8> @testVSLV(<16 x i8> %a, <16 x i8> %b) {
    171 entry:
    172   %0 = tail call <16 x i8> @llvm.ppc.altivec.vslv(<16 x i8> %a, <16 x i8> %b)
    173   ret <16 x i8> %0
    174 ; CHECK-LABEL: testVSLV
    175 ; CHECK: vslv 2, 2, 3
    176 ; CHECK: blr
    177 }
    178 ; Function Attrs: nounwind readnone
    179 declare <16 x i8> @llvm.ppc.altivec.vslv(<16 x i8>, <16 x i8>)
    180 
    181 ; Function Attrs: nounwind readnone
    182 define <16 x i8> @testVSRV(<16 x i8> %a, <16 x i8> %b) {
    183 entry:
    184   %0 = tail call <16 x i8> @llvm.ppc.altivec.vsrv(<16 x i8> %a, <16 x i8> %b)
    185   ret <16 x i8> %0
    186 ; CHECK-LABEL: testVSRV
    187 ; CHECK: vsrv 2, 2, 3
    188 ; CHECK: blr
    189 }
    190 ; Function Attrs: nounwind readnone
    191 declare <16 x i8> @llvm.ppc.altivec.vsrv(<16 x i8>, <16 x i8>)
    192 
    193 ; Function Attrs: nounwind readnone
    194 define <8 x i16> @testXVCVSPHP(<4 x float> %a) {
    195 entry:
    196 ; CHECK-LABEL: testXVCVSPHP
    197 ; CHECK: xvcvsphp 34, 34
    198 ; CHECK: blr
    199   %0 = tail call <4 x float> @llvm.ppc.vsx.xvcvsphp(<4 x float> %a)
    200   %1 = bitcast <4 x float> %0 to <8 x i16>
    201   ret <8 x i16> %1
    202 }
    203 
    204 ; Function Attrs: nounwind readnone
    205 define <4 x i32> @testVRLWMI(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
    206 entry:
    207 ; CHECK-LABEL: testVRLWMI
    208 ; CHECK: vrlwmi 3, 2, 4
    209 ; CHECK: blr
    210   %0 = tail call <4 x i32> @llvm.ppc.altivec.vrlwmi(<4 x i32> %a, <4 x i32> %c, <4 x i32> %b)
    211   ret <4 x i32> %0
    212 }
    213 
    214 ; Function Attrs: nounwind readnone
    215 define <2 x i64> @testVRLDMI(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
    216 entry:
    217 ; CHECK-LABEL: testVRLDMI
    218 ; CHECK: vrldmi 3, 2, 4
    219 ; CHECK: blr
    220   %0 = tail call <2 x i64> @llvm.ppc.altivec.vrldmi(<2 x i64> %a, <2 x i64> %c, <2 x i64> %b)
    221   ret <2 x i64> %0
    222 }
    223 
    224 ; Function Attrs: nounwind readnone
    225 define <4 x i32> @testVRLWNM(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
    226 entry:
    227   %0 = tail call <4 x i32> @llvm.ppc.altivec.vrlwnm(<4 x i32> %a, <4 x i32> %b)
    228   %and.i = and <4 x i32> %0, %c
    229   ret <4 x i32> %and.i
    230 ; CHECK-LABEL: testVRLWNM
    231 ; CHECK: vrlwnm 2, 2, 3
    232 ; CHECK: xxland 34, 34, 36
    233 ; CHECK: blr
    234 }
    235 
    236 ; Function Attrs: nounwind readnone
    237 define <2 x i64> @testVRLDNM(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
    238 entry:
    239   %0 = tail call <2 x i64> @llvm.ppc.altivec.vrldnm(<2 x i64> %a, <2 x i64> %b)
    240   %and.i = and <2 x i64> %0, %c
    241   ret <2 x i64> %and.i
    242 ; CHECK-LABEL: testVRLDNM
    243 ; CHECK: vrldnm 2, 2, 3
    244 ; CHECK: xxland 34, 34, 36
    245 ; CHECK: blr
    246 }
    247 
    248 ; Function Attrs: nounwind readnone
    249 declare <4 x float> @llvm.ppc.vsx.xvcvsphp(<4 x float>)
    250 
    251 ; Function Attrs: nounwind readnone
    252 declare <4 x i32> @llvm.ppc.altivec.vrlwmi(<4 x i32>, <4 x i32>, <4 x i32>)
    253 
    254 ; Function Attrs: nounwind readnone
    255 declare <2 x i64> @llvm.ppc.altivec.vrldmi(<2 x i64>, <2 x i64>, <2 x i64>)
    256 
    257 ; Function Attrs: nounwind readnone
    258 declare <4 x i32> @llvm.ppc.altivec.vrlwnm(<4 x i32>, <4 x i32>)
    259 
    260 ; Function Attrs: nounwind readnone
    261 declare <2 x i64> @llvm.ppc.altivec.vrldnm(<2 x i64>, <2 x i64>)
    262 
    263 define <4 x i32> @testXVXEXPSP(<4 x float> %a) {
    264 entry:
    265   %0 = tail call <4 x i32> @llvm.ppc.vsx.xvxexpsp(<4 x float> %a)
    266   ret <4 x i32> %0
    267 ; CHECK-LABEL: testXVXEXPSP
    268 ; CHECK: xvxexpsp 34, 34
    269 ; CHECK: blr
    270 }
    271 ; Function Attrs: nounwind readnone
    272 declare <4 x i32> @llvm.ppc.vsx.xvxexpsp(<4 x float>)
    273 
    274 ; Function Attrs: nounwind readnone
    275 define <2 x i64> @testXVXEXPDP(<2 x double> %a) {
    276 entry:
    277   %0 = tail call <2 x i64> @llvm.ppc.vsx.xvxexpdp(<2 x double> %a)
    278   ret <2 x i64> %0
    279 ; CHECK-LABEL: testXVXEXPDP
    280 ; CHECK: xvxexpdp 34, 34
    281 ; CHECK: blr
    282 }
    283 ; Function Attrs: nounwind readnone
    284 declare <2 x i64>@llvm.ppc.vsx.xvxexpdp(<2 x double>)
    285 
    286 ; Function Attrs: nounwind readnone
    287 define <4 x i32> @testXVXSIGSP(<4 x float> %a) {
    288 entry:
    289   %0 = tail call <4 x i32> @llvm.ppc.vsx.xvxsigsp(<4 x float> %a)
    290   ret <4 x i32> %0
    291 ; CHECK-LABEL: testXVXSIGSP
    292 ; CHECK: xvxsigsp 34, 34
    293 ; CHECK: blr
    294 }
    295 ; Function Attrs: nounwind readnone
    296 declare <4 x i32> @llvm.ppc.vsx.xvxsigsp(<4 x float>)
    297 
    298 ; Function Attrs: nounwind readnone
    299 define <2 x i64> @testXVXSIGDP(<2 x double> %a) {
    300 entry:
    301   %0 = tail call <2 x i64> @llvm.ppc.vsx.xvxsigdp(<2 x double> %a)
    302   ret <2 x i64> %0
    303 ; CHECK-LABEL: testXVXSIGDP
    304 ; CHECK: xvxsigdp 34, 34
    305 ; CHECK: blr
    306 }
    307 ; Function Attrs: nounwind readnone
    308 declare <2 x i64> @llvm.ppc.vsx.xvxsigdp(<2 x double>)
    309 
    310 ; Function Attrs: nounwind readnone
    311 define <4 x i32> @testXVTSTDCSP(<4 x float> %a) {
    312 entry:
    313   %0 = tail call <4 x i32> @llvm.ppc.vsx.xvtstdcsp(<4 x float> %a, i32 127)
    314   ret <4 x i32> %0
    315 ; CHECK-LABEL: testXVTSTDCSP
    316 ; CHECK: xvtstdcsp 34, 34, 127
    317 ; CHECK: blr
    318 }
    319 ; Function Attrs: nounwind readnone
    320 declare <4 x i32> @llvm.ppc.vsx.xvtstdcsp(<4 x float> %a, i32 %b)
    321 
    322 ; Function Attrs: nounwind readnone
    323 define <2 x i64> @testXVTSTDCDP(<2 x double> %a) {
    324 entry:
    325   %0 = tail call <2 x i64> @llvm.ppc.vsx.xvtstdcdp(<2 x double> %a, i32 127)
    326   ret <2 x i64> %0
    327 ; CHECK-LABEL: testXVTSTDCDP
    328 ; CHECK: xvtstdcdp 34, 34, 127
    329 ; CHECK: blr
    330 }
    331 ; Function Attrs: nounwind readnone
    332 declare <2 x i64> @llvm.ppc.vsx.xvtstdcdp(<2 x double> %a, i32 %b)
    333 
    334 define <4 x float> @testXVCVHPSP(<8 x i16> %a) {
    335 entry:
    336   %0 = tail call <4 x float>@llvm.ppc.vsx.xvcvhpsp(<8 x i16> %a)
    337   ret <4 x float> %0
    338 ; CHECK-LABEL: testXVCVHPSP
    339 ; CHECK: xvcvhpsp 34, 34
    340 ; CHECK: blr
    341 }
    342 ; Function Attrs: nounwind readnone
    343 declare <4 x float>@llvm.ppc.vsx.xvcvhpsp(<8 x i16>)
    344 
    345 ; Function Attrs: nounwind readnone
    346 define <4 x i32> @testLXVL(i8* %a, i64 %b) {
    347 entry:
    348   %0 = tail call <4 x i32> @llvm.ppc.vsx.lxvl(i8* %a, i64 %b)
    349   ret <4 x i32> %0
    350 ; CHECK-LABEL: testLXVL
    351 ; CHECK: lxvl 34, 3, 4
    352 ; CHECK: blr
    353 }
    354 ; Function Attrs: nounwind readnone
    355 declare <4 x i32> @llvm.ppc.vsx.lxvl(i8*, i64)
    356 
    357 define void @testSTXVL(<4 x i32> %a, i8* %b, i64 %c) {
    358 entry:
    359   tail call void @llvm.ppc.vsx.stxvl(<4 x i32> %a, i8* %b, i64 %c)
    360   ret void
    361 ; CHECK-LABEL: testSTXVL
    362 ; CHECK: stxvl 34, 5, 6
    363 ; CHECK: blr
    364 }
    365 ; Function Attrs: nounwind readnone
    366 declare void @llvm.ppc.vsx.stxvl(<4 x i32>, i8*, i64)
    367 
    368 ; Function Attrs: nounwind readnone
    369 define <4 x i32> @testLXVLL(i8* %a, i64 %b) {
    370 entry:
    371   %0 = tail call <4 x i32> @llvm.ppc.vsx.lxvll(i8* %a, i64 %b)
    372   ret <4 x i32> %0
    373 ; CHECK-LABEL: testLXVLL
    374 ; CHECK: lxvll 34, 3, 4
    375 ; CHECK: blr
    376 }
    377 ; Function Attrs: nounwind readnone
    378 declare <4 x i32> @llvm.ppc.vsx.lxvll(i8*, i64)
    379 
    380 define void @testSTXVLL(<4 x i32> %a, i8* %b, i64 %c) {
    381 entry:
    382   tail call void @llvm.ppc.vsx.stxvll(<4 x i32> %a, i8* %b, i64 %c)
    383   ret void
    384 ; CHECK-LABEL: testSTXVLL
    385 ; CHECK: stxvll 34, 5, 6
    386 ; CHECK: blr
    387 }
    388 ; Function Attrs: nounwind readnone
    389 declare void @llvm.ppc.vsx.stxvll(<4 x i32>, i8*, i64)
    390 
    391 define <4 x i32> @test0(<4 x i32> %a) local_unnamed_addr #0 {
    392 entry:
    393   %sub.i = sub <4 x i32> zeroinitializer, %a
    394   ret <4 x i32> %sub.i
    395 
    396 ; CHECK-LABEL: @test0
    397 ; CHECK: vnegw 2, 2
    398 ; CHECK: blr
    399 
    400 }
    401 
    402 define <2 x i64> @test1(<2 x i64> %a) local_unnamed_addr #0 {
    403 entry:
    404   %sub.i = sub <2 x i64> zeroinitializer, %a
    405   ret <2 x i64> %sub.i
    406 
    407 ; CHECK-LABEL: @test1
    408 ; CHECK: vnegd 2, 2
    409 ; CHECK: blr
    410 
    411 }
    412 
    413 declare void @sink(...)
    414 
    415 ; stack object should be accessed using D-form load/store instead of X-form
    416 define signext i32 @func1() {
    417 ; CHECK-LABEL: @func1
    418 ; CHECK-NOT: stxvx
    419 ; CHECK: stxv {{[0-9]+}}, {{[0-9]+}}(1)
    420 ; CHECK-NOT: stxvx
    421 ; CHECK: blr
    422 entry:
    423   %a = alloca [4 x i32], align 4
    424   %0 = bitcast [4 x i32]* %a to i8*
    425   call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 16, i1 false)
    426   %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 0
    427   %call = call signext i32 @callee(i32* nonnull %arraydecay) #3
    428   ret i32 %call
    429 }
    430 
    431 ; stack object should be accessed using D-form load/store instead of X-form
    432 define signext i32 @func2() {
    433 ; CHECK-LABEL: @func2
    434 ; CHECK-NOT: stxvx
    435 ; CHECK: stxv [[ZEROREG:[0-9]+]], {{[0-9]+}}(1)
    436 ; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
    437 ; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
    438 ; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
    439 ; CHECK-NOT: stxvx
    440 ; CHECK: blr
    441 entry:
    442   %a = alloca [16 x i32], align 4
    443   %0 = bitcast [16 x i32]* %a to i8*
    444   call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 64, i1 false)
    445   %arraydecay = getelementptr inbounds [16 x i32], [16 x i32]* %a, i64 0, i64 0
    446   %call = call signext i32 @callee(i32* nonnull %arraydecay) #3
    447   ret i32 %call
    448 }
    449 
    450 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
    451 declare signext i32 @callee(i32*) local_unnamed_addr #2
    452