Home | History | Annotate | Download | only in PowerPC
      1 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
      2 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
      3 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not vabsdu
      4 
      5 ; Function Attrs: nounwind readnone
      6 define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr {
      7 entry:
      8   %sub.i = sub <4 x i32> zeroinitializer, %a
      9   %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i)
     10   ret <4 x i32> %0
     11 ; CHECK-LABEL: simple_absv_32
     12 ; CHECK-DAG: vxor {{[0-9]+}}, [[REG:[0-9]+]], [[REG]]
     13 ; CHECK-DAG: xvnegsp 34, 34
     14 ; CHECK-DAG: xvnegsp 35, {{[0-9]+}}
     15 ; CHECK-NEXT: vabsduw 2, 2, {{[0-9]+}}
     16 ; CHECK-NEXT: blr
     17 ; CHECK-PWR8-LABEL: simple_absv_32
     18 ; CHECK-PWR8: xxlxor
     19 ; CHECK-PWR8: vsubuwm
     20 ; CHECK-PWR8: vmaxsw
     21 ; CHECK-PWR8: blr
     22 }
     23 
     24 ; Function Attrs: nounwind readnone
     25 define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr {
     26 entry:
     27   %sub.i = sub <4 x i32> zeroinitializer, %a
     28   %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a)
     29   ret <4 x i32> %0
     30 ; CHECK-LABEL: simple_absv_32_swap
     31 ; CHECK-DAG: vxor {{[0-9]+}}, [[REG:[0-9]+]], [[REG]]
     32 ; CHECK-DAG: xvnegsp 34, 34
     33 ; CHECK-DAG: xvnegsp 35, {{[0-9]+}}
     34 ; CHECK-NEXT: vabsduw 2, 2, {{[0-9]+}}
     35 ; CHECK-NEXT: blr
     36 ; CHECK-PWR8-LABEL: simple_absv_32_swap
     37 ; CHECK-PWR8: xxlxor
     38 ; CHECK-PWR8: vsubuwm
     39 ; CHECK-PWR8: vmaxsw
     40 ; CHECK-PWR8: blr
     41 }
     42 
     43 define <8 x i16> @simple_absv_16(<8 x i16> %a) local_unnamed_addr {
     44 entry:
     45   %sub.i = sub <8 x i16> zeroinitializer, %a
     46   %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i)
     47   ret <8 x i16> %0
     48 ; CHECK-LABEL: simple_absv_16
     49 ; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
     50 ; CHECK-NEXT: vadduhm 2, 2, [[IMM:[0-9]+]]
     51 ; CHECK-NEXT: vabsduh 2, 2, [[IMM]]
     52 ; CHECK-NEXT: blr
     53 ; CHECK-PWR8-LABEL: simple_absv_16
     54 ; CHECK-PWR8: xxlxor
     55 ; CHECK-PWR8: vsubuhm
     56 ; CHECK-PWR8: vmaxsh
     57 ; CHECK-PWR8: blr
     58 }
     59 
     60 ; Function Attrs: nounwind readnone
     61 define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr {
     62 entry:
     63   %sub.i = sub <16 x i8> zeroinitializer, %a
     64   %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i)
     65   ret <16 x i8> %0
     66 ; CHECK-LABEL: simple_absv_8
     67 ; CHECK: xxspltib {{[0-9]+}}, 128
     68 ; CHECK-NEXT: vaddubm 2, 2, [[IMM:[0-9]+]]
     69 ; CHECK-NEXT: vabsdub 2, 2, [[IMM]]
     70 ; CHECK-NEXT: blr
     71 ; CHECK-PWR8-LABEL: simple_absv_8
     72 ; CHECK-PWR8: xxlxor
     73 ; CHECK-PWR8: vsububm
     74 ; CHECK-PWR8: vmaxsb
     75 ; CHECK-PWR8: blr
     76 }
     77 
     78 ; The select pattern can only be detected for v4i32.
     79 ; Function Attrs: norecurse nounwind readnone
     80 define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
     81 entry:
     82   %0 = sub nsw <4 x i32> %a, %b
     83   %1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
     84   %2 = sub <4 x i32> zeroinitializer, %0
     85   %3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2
     86   ret <4 x i32> %3
     87 ; CHECK-LABEL: sub_absv_32
     88 ; CHECK-DAG: xvnegsp 34, 34
     89 ; CHECK-DAG: xvnegsp 35, 35
     90 ; CHECK-NEXT: vabsduw 2, 2, 3
     91 ; CHECK-NEXT: blr
     92 ; CHECK-PWR8-LABEL: sub_absv_32
     93 ; CHECK-PWR8: vsubuwm
     94 ; CHECK-PWR8: xxlxor
     95 ; CHECK-PWR8: blr
     96 }
     97 
     98 ; FIXME: This does not produce the ISD::ABS that we are looking for.
     99 ; We should fix the missing canonicalization.
    100 ; We do manage to find the word version of ABS but not the halfword.
    101 ; Threfore, we end up doing more work than is required with a pair of abs for word
    102 ;  instead of just one for the halfword.
    103 ; Function Attrs: norecurse nounwind readnone
    104 define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
    105 entry:
    106   %0 = sext <8 x i16> %a to <8 x i32>
    107   %1 = sext <8 x i16> %b to <8 x i32>
    108   %2 = sub nsw <8 x i32> %0, %1
    109   %3 = icmp sgt <8 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
    110   %4 = sub nsw <8 x i32> zeroinitializer, %2
    111   %5 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> %4
    112   %6 = trunc <8 x i32> %5 to <8 x i16>
    113   ret <8 x i16> %6
    114 ; CHECK-LABEL: sub_absv_16
    115 ; CHECK-NOT: vabsduh
    116 ; CHECK: vabsduw
    117 ; CHECK-NOT: vabsduh
    118 ; CHECK: vabsduw
    119 ; CHECK-NOT: vabsduh
    120 ; CHECK: blr
    121 ; CHECK-PWR8-LABEL: sub_absv_16
    122 ; CHECK-PWR8: vsubuwm
    123 ; CHECK-PWR8: xxlxor
    124 ; CHECK-PWR8: blr
    125 }
    126 
    127 ; FIXME: This does not produce ISD::ABS. This does not even vectorize correctly!
    128 ; This function should look like sub_absv_32 and sub_absv_16 except that the type is v16i8.
    129 ; Function Attrs: norecurse nounwind readnone
    130 define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
    131 entry:
    132   %vecext = extractelement <16 x i8> %a, i32 0
    133   %conv = zext i8 %vecext to i32
    134   %vecext1 = extractelement <16 x i8> %b, i32 0
    135   %conv2 = zext i8 %vecext1 to i32
    136   %sub = sub nsw i32 %conv, %conv2
    137   %ispos = icmp sgt i32 %sub, -1
    138   %neg = sub nsw i32 0, %sub
    139   %0 = select i1 %ispos, i32 %sub, i32 %neg
    140   %conv3 = trunc i32 %0 to i8
    141   %vecins = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %conv3, i32 0
    142   %vecext4 = extractelement <16 x i8> %a, i32 1
    143   %conv5 = zext i8 %vecext4 to i32
    144   %vecext6 = extractelement <16 x i8> %b, i32 1
    145   %conv7 = zext i8 %vecext6 to i32
    146   %sub8 = sub nsw i32 %conv5, %conv7
    147   %ispos171 = icmp sgt i32 %sub8, -1
    148   %neg172 = sub nsw i32 0, %sub8
    149   %1 = select i1 %ispos171, i32 %sub8, i32 %neg172
    150   %conv10 = trunc i32 %1 to i8
    151   %vecins11 = insertelement <16 x i8> %vecins, i8 %conv10, i32 1
    152   %vecext12 = extractelement <16 x i8> %a, i32 2
    153   %conv13 = zext i8 %vecext12 to i32
    154   %vecext14 = extractelement <16 x i8> %b, i32 2
    155   %conv15 = zext i8 %vecext14 to i32
    156   %sub16 = sub nsw i32 %conv13, %conv15
    157   %ispos173 = icmp sgt i32 %sub16, -1
    158   %neg174 = sub nsw i32 0, %sub16
    159   %2 = select i1 %ispos173, i32 %sub16, i32 %neg174
    160   %conv18 = trunc i32 %2 to i8
    161   %vecins19 = insertelement <16 x i8> %vecins11, i8 %conv18, i32 2
    162   %vecext20 = extractelement <16 x i8> %a, i32 3
    163   %conv21 = zext i8 %vecext20 to i32
    164   %vecext22 = extractelement <16 x i8> %b, i32 3
    165   %conv23 = zext i8 %vecext22 to i32
    166   %sub24 = sub nsw i32 %conv21, %conv23
    167   %ispos175 = icmp sgt i32 %sub24, -1
    168   %neg176 = sub nsw i32 0, %sub24
    169   %3 = select i1 %ispos175, i32 %sub24, i32 %neg176
    170   %conv26 = trunc i32 %3 to i8
    171   %vecins27 = insertelement <16 x i8> %vecins19, i8 %conv26, i32 3
    172   %vecext28 = extractelement <16 x i8> %a, i32 4
    173   %conv29 = zext i8 %vecext28 to i32
    174   %vecext30 = extractelement <16 x i8> %b, i32 4
    175   %conv31 = zext i8 %vecext30 to i32
    176   %sub32 = sub nsw i32 %conv29, %conv31
    177   %ispos177 = icmp sgt i32 %sub32, -1
    178   %neg178 = sub nsw i32 0, %sub32
    179   %4 = select i1 %ispos177, i32 %sub32, i32 %neg178
    180   %conv34 = trunc i32 %4 to i8
    181   %vecins35 = insertelement <16 x i8> %vecins27, i8 %conv34, i32 4
    182   %vecext36 = extractelement <16 x i8> %a, i32 5
    183   %conv37 = zext i8 %vecext36 to i32
    184   %vecext38 = extractelement <16 x i8> %b, i32 5
    185   %conv39 = zext i8 %vecext38 to i32
    186   %sub40 = sub nsw i32 %conv37, %conv39
    187   %ispos179 = icmp sgt i32 %sub40, -1
    188   %neg180 = sub nsw i32 0, %sub40
    189   %5 = select i1 %ispos179, i32 %sub40, i32 %neg180
    190   %conv42 = trunc i32 %5 to i8
    191   %vecins43 = insertelement <16 x i8> %vecins35, i8 %conv42, i32 5
    192   %vecext44 = extractelement <16 x i8> %a, i32 6
    193   %conv45 = zext i8 %vecext44 to i32
    194   %vecext46 = extractelement <16 x i8> %b, i32 6
    195   %conv47 = zext i8 %vecext46 to i32
    196   %sub48 = sub nsw i32 %conv45, %conv47
    197   %ispos181 = icmp sgt i32 %sub48, -1
    198   %neg182 = sub nsw i32 0, %sub48
    199   %6 = select i1 %ispos181, i32 %sub48, i32 %neg182
    200   %conv50 = trunc i32 %6 to i8
    201   %vecins51 = insertelement <16 x i8> %vecins43, i8 %conv50, i32 6
    202   %vecext52 = extractelement <16 x i8> %a, i32 7
    203   %conv53 = zext i8 %vecext52 to i32
    204   %vecext54 = extractelement <16 x i8> %b, i32 7
    205   %conv55 = zext i8 %vecext54 to i32
    206   %sub56 = sub nsw i32 %conv53, %conv55
    207   %ispos183 = icmp sgt i32 %sub56, -1
    208   %neg184 = sub nsw i32 0, %sub56
    209   %7 = select i1 %ispos183, i32 %sub56, i32 %neg184
    210   %conv58 = trunc i32 %7 to i8
    211   %vecins59 = insertelement <16 x i8> %vecins51, i8 %conv58, i32 7
    212   %vecext60 = extractelement <16 x i8> %a, i32 8
    213   %conv61 = zext i8 %vecext60 to i32
    214   %vecext62 = extractelement <16 x i8> %b, i32 8
    215   %conv63 = zext i8 %vecext62 to i32
    216   %sub64 = sub nsw i32 %conv61, %conv63
    217   %ispos185 = icmp sgt i32 %sub64, -1
    218   %neg186 = sub nsw i32 0, %sub64
    219   %8 = select i1 %ispos185, i32 %sub64, i32 %neg186
    220   %conv66 = trunc i32 %8 to i8
    221   %vecins67 = insertelement <16 x i8> %vecins59, i8 %conv66, i32 8
    222   %vecext68 = extractelement <16 x i8> %a, i32 9
    223   %conv69 = zext i8 %vecext68 to i32
    224   %vecext70 = extractelement <16 x i8> %b, i32 9
    225   %conv71 = zext i8 %vecext70 to i32
    226   %sub72 = sub nsw i32 %conv69, %conv71
    227   %ispos187 = icmp sgt i32 %sub72, -1
    228   %neg188 = sub nsw i32 0, %sub72
    229   %9 = select i1 %ispos187, i32 %sub72, i32 %neg188
    230   %conv74 = trunc i32 %9 to i8
    231   %vecins75 = insertelement <16 x i8> %vecins67, i8 %conv74, i32 9
    232   %vecext76 = extractelement <16 x i8> %a, i32 10
    233   %conv77 = zext i8 %vecext76 to i32
    234   %vecext78 = extractelement <16 x i8> %b, i32 10
    235   %conv79 = zext i8 %vecext78 to i32
    236   %sub80 = sub nsw i32 %conv77, %conv79
    237   %ispos189 = icmp sgt i32 %sub80, -1
    238   %neg190 = sub nsw i32 0, %sub80
    239   %10 = select i1 %ispos189, i32 %sub80, i32 %neg190
    240   %conv82 = trunc i32 %10 to i8
    241   %vecins83 = insertelement <16 x i8> %vecins75, i8 %conv82, i32 10
    242   %vecext84 = extractelement <16 x i8> %a, i32 11
    243   %conv85 = zext i8 %vecext84 to i32
    244   %vecext86 = extractelement <16 x i8> %b, i32 11
    245   %conv87 = zext i8 %vecext86 to i32
    246   %sub88 = sub nsw i32 %conv85, %conv87
    247   %ispos191 = icmp sgt i32 %sub88, -1
    248   %neg192 = sub nsw i32 0, %sub88
    249   %11 = select i1 %ispos191, i32 %sub88, i32 %neg192
    250   %conv90 = trunc i32 %11 to i8
    251   %vecins91 = insertelement <16 x i8> %vecins83, i8 %conv90, i32 11
    252   %vecext92 = extractelement <16 x i8> %a, i32 12
    253   %conv93 = zext i8 %vecext92 to i32
    254   %vecext94 = extractelement <16 x i8> %b, i32 12
    255   %conv95 = zext i8 %vecext94 to i32
    256   %sub96 = sub nsw i32 %conv93, %conv95
    257   %ispos193 = icmp sgt i32 %sub96, -1
    258   %neg194 = sub nsw i32 0, %sub96
    259   %12 = select i1 %ispos193, i32 %sub96, i32 %neg194
    260   %conv98 = trunc i32 %12 to i8
    261   %vecins99 = insertelement <16 x i8> %vecins91, i8 %conv98, i32 12
    262   %vecext100 = extractelement <16 x i8> %a, i32 13
    263   %conv101 = zext i8 %vecext100 to i32
    264   %vecext102 = extractelement <16 x i8> %b, i32 13
    265   %conv103 = zext i8 %vecext102 to i32
    266   %sub104 = sub nsw i32 %conv101, %conv103
    267   %ispos195 = icmp sgt i32 %sub104, -1
    268   %neg196 = sub nsw i32 0, %sub104
    269   %13 = select i1 %ispos195, i32 %sub104, i32 %neg196
    270   %conv106 = trunc i32 %13 to i8
    271   %vecins107 = insertelement <16 x i8> %vecins99, i8 %conv106, i32 13
    272   %vecext108 = extractelement <16 x i8> %a, i32 14
    273   %conv109 = zext i8 %vecext108 to i32
    274   %vecext110 = extractelement <16 x i8> %b, i32 14
    275   %conv111 = zext i8 %vecext110 to i32
    276   %sub112 = sub nsw i32 %conv109, %conv111
    277   %ispos197 = icmp sgt i32 %sub112, -1
    278   %neg198 = sub nsw i32 0, %sub112
    279   %14 = select i1 %ispos197, i32 %sub112, i32 %neg198
    280   %conv114 = trunc i32 %14 to i8
    281   %vecins115 = insertelement <16 x i8> %vecins107, i8 %conv114, i32 14
    282   %vecext116 = extractelement <16 x i8> %a, i32 15
    283   %conv117 = zext i8 %vecext116 to i32
    284   %vecext118 = extractelement <16 x i8> %b, i32 15
    285   %conv119 = zext i8 %vecext118 to i32
    286   %sub120 = sub nsw i32 %conv117, %conv119
    287   %ispos199 = icmp sgt i32 %sub120, -1
    288   %neg200 = sub nsw i32 0, %sub120
    289   %15 = select i1 %ispos199, i32 %sub120, i32 %neg200
    290   %conv122 = trunc i32 %15 to i8
    291   %vecins123 = insertelement <16 x i8> %vecins115, i8 %conv122, i32 15
    292   ret <16 x i8> %vecins123
    293 ; CHECK-LABEL: sub_absv_8
    294 ; CHECK-NOT: vabsdub
    295 ; CHECK: subf
    296 ; CHECK-NOT: vabsdub
    297 ; CHECK: xor
    298 ; CHECK-NOT: vabsdub
    299 ; CHECK: blr
    300 ; CHECK-PWR8-LABEL: sub_absv_8
    301 ; CHECK-PWR8: subf
    302 ; CHECK-PWR8: xor
    303 ; CHECK-PWR8: blr
    304 }
    305 
    306 ; Function Attrs: nounwind readnone
    307 define <4 x i32> @sub_absv_vec_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
    308 entry:
    309   %sub = sub <4 x i32> %a, %b
    310   %sub.i = sub <4 x i32> zeroinitializer, %sub
    311   %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub, <4 x i32> %sub.i)
    312   ret <4 x i32> %0
    313 ; CHECK-LABEL: sub_absv_vec_32
    314 ; CHECK: vabsduw 2, 2, 3
    315 ; CHECK-NEXT: blr
    316 ; CHECK-PWR8-LABEL: sub_absv_vec_32
    317 ; CHECK-PWR8: xxlxor
    318 ; CHECK-PWR8: vsubuwm
    319 ; CHECK-PWR8: vmaxsw
    320 ; CHECK-PWR8: blr
    321 }
    322 
    323 ; Function Attrs: nounwind readnone
    324 define <8 x i16> @sub_absv_vec_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
    325 entry:
    326   %sub = sub <8 x i16> %a, %b
    327   %sub.i = sub <8 x i16> zeroinitializer, %sub
    328   %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %sub, <8 x i16> %sub.i)
    329   ret <8 x i16> %0
    330 ; CHECK-LABEL: sub_absv_vec_16
    331 ; CHECK: vabsduh 2, 2, 3
    332 ; CHECK-NEXT: blr
    333 ; CHECK-PWR8-LABEL: sub_absv_vec_16
    334 ; CHECK-PWR8: xxlxor
    335 ; CHECK-PWR8: vsubuhm
    336 ; CHECK-PWR8: vmaxsh
    337 ; CHECK-PWR8: blr
    338 }
    339 
    340 ; Function Attrs: nounwind readnone
    341 define <16 x i8> @sub_absv_vec_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
    342 entry:
    343   %sub = sub <16 x i8> %a, %b
    344   %sub.i = sub <16 x i8> zeroinitializer, %sub
    345   %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %sub, <16 x i8> %sub.i)
    346   ret <16 x i8> %0
    347 ; CHECK-LABEL: sub_absv_vec_8
    348 ; CHECK: vabsdub 2, 2, 3
    349 ; CHECK-NEXT: blr
    350 ; CHECK-PWR8-LABEL: sub_absv_vec_8
    351 ; CHECK-PWR8: xxlxor
    352 ; CHECK-PWR8: vsububm
    353 ; CHECK-PWR8: vmaxsb
    354 ; CHECK-PWR8: blr
    355 }
    356 
    357 
    358 ; Function Attrs: nounwind readnone
    359 declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
    360 
    361 ; Function Attrs: nounwind readnone
    362 declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)
    363 
    364 ; Function Attrs: nounwind readnone
    365 declare <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8>, <16 x i8>)
    366 
    367