Home | History | Annotate | Download | only in SystemZ
      1 ; Test various target-specific DAG combiner patterns.
      2 ;
      3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
      4 
      5 ; Check that an extraction followed by a truncation is effectively treated
      6 ; as a bitcast.
      7 define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) {
      8 ; CHECK-LABEL: f1:
      9 ; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26
     10 ; CHECK-DAG: vsteb [[REG]], 0(%r2), 3
     11 ; CHECK-DAG: vsteb [[REG]], 0(%r3), 15
     12 ; CHECK: br %r14
     13   %add = add <4 x i32> %v1, %v2
     14   %elem1 = extractelement <4 x i32> %add, i32 0
     15   %elem2 = extractelement <4 x i32> %add, i32 3
     16   %trunc1 = trunc i32 %elem1 to i8
     17   %trunc2 = trunc i32 %elem2 to i8
     18   store i8 %trunc1, i8 *%ptr1
     19   store i8 %trunc2, i8 *%ptr2
     20   ret void
     21 }
     22 
     23 ; Test a case where a pack-type shuffle can be eliminated.
     24 define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
     25 ; CHECK-LABEL: f2:
     26 ; CHECK-NOT: vpk
     27 ; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26
     28 ; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28
     29 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3
     30 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7
     31 ; CHECK: br %r14
     32   %add1 = add <4 x i32> %v1, %v2
     33   %add2 = add <4 x i32> %v2, %v3
     34   %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2,
     35                            <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     36   %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
     37   %elem1 = extractelement <8 x i16> %bitcast, i32 1
     38   %elem2 = extractelement <8 x i16> %bitcast, i32 7
     39   %res = add i16 %elem1, %elem2
     40   ret i16 %res
     41 }
     42 
     43 ; ...and again in a case where there's also a splat and a bitcast.
     44 define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
     45 ; CHECK-LABEL: f3:
     46 ; CHECK-NOT: vrepg
     47 ; CHECK-NOT: vpk
     48 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
     49 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
     50 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
     51 ; CHECK: br %r14
     52   %add = add <4 x i32> %v1, %v2
     53   %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
     54                          <2 x i32> <i32 0, i32 0>
     55   %splatcast = bitcast <2 x i64> %splat to <4 x i32>
     56   %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
     57                            <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     58   %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
     59   %elem1 = extractelement <8 x i16> %bitcast, i32 2
     60   %elem2 = extractelement <8 x i16> %bitcast, i32 7
     61   %res = add i16 %elem1, %elem2
     62   ret i16 %res
     63 }
     64 
     65 ; ...and again with a merge low instead of a pack.
     66 define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
     67 ; CHECK-LABEL: f4:
     68 ; CHECK-NOT: vrepg
     69 ; CHECK-NOT: vmr
     70 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
     71 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
     72 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
     73 ; CHECK: br %r14
     74   %add = add <4 x i32> %v1, %v2
     75   %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
     76                          <2 x i32> <i32 0, i32 0>
     77   %splatcast = bitcast <2 x i64> %splat to <4 x i32>
     78   %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
     79                            <4 x i32> <i32 2, i32 6, i32 3, i32 7>
     80   %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
     81   %elem1 = extractelement <8 x i16> %bitcast, i32 4
     82   %elem2 = extractelement <8 x i16> %bitcast, i32 7
     83   %res = add i16 %elem1, %elem2
     84   ret i16 %res
     85 }
     86 
     87 ; ...and again with a merge high.
     88 define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
     89 ; CHECK-LABEL: f5:
     90 ; CHECK-NOT: vrepg
     91 ; CHECK-NOT: vmr
     92 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
     93 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2
     94 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
     95 ; CHECK: br %r14
     96   %add = add <4 x i32> %v1, %v2
     97   %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
     98                          <2 x i32> <i32 0, i32 0>
     99   %splatcast = bitcast <2 x i64> %splat to <4 x i32>
    100   %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
    101                            <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    102   %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
    103   %elem1 = extractelement <8 x i16> %bitcast, i32 4
    104   %elem2 = extractelement <8 x i16> %bitcast, i32 7
    105   %res = add i16 %elem1, %elem2
    106   ret i16 %res
    107 }
    108 
    109 ; Test a case where an unpack high can be eliminated from the usual
    110 ; load-extend sequence.
    111 define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
    112 ; CHECK-LABEL: f6:
    113 ; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2)
    114 ; CHECK-NOT: vup
    115 ; CHECK-DAG: vsteb [[REG]], 0(%r3), 1
    116 ; CHECK-DAG: vsteb [[REG]], 0(%r4), 2
    117 ; CHECK-DAG: vsteb [[REG]], 0(%r5), 7
    118 ; CHECK: br %r14
    119   %vec = load <8 x i8>, <8 x i8> *%ptr1
    120   %ext = sext <8 x i8> %vec to <8 x i16>
    121   %elem1 = extractelement <8 x i16> %ext, i32 1
    122   %elem2 = extractelement <8 x i16> %ext, i32 2
    123   %elem3 = extractelement <8 x i16> %ext, i32 7
    124   %trunc1 = trunc i16 %elem1 to i8
    125   %trunc2 = trunc i16 %elem2 to i8
    126   %trunc3 = trunc i16 %elem3 to i8
    127   store i8 %trunc1, i8 *%ptr2
    128   store i8 %trunc2, i8 *%ptr3
    129   store i8 %trunc3, i8 *%ptr4
    130   ret void
    131 }
    132 
    133 ; ...and again with a bitcast inbetween.
    134 define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
    135 ; CHECK-LABEL: f7:
    136 ; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2)
    137 ; CHECK-NOT: vup
    138 ; CHECK-DAG: vsteb [[REG]], 0(%r3), 0
    139 ; CHECK-DAG: vsteb [[REG]], 0(%r4), 1
    140 ; CHECK-DAG: vsteb [[REG]], 0(%r5), 3
    141 ; CHECK: br %r14
    142   %vec = load <4 x i8>, <4 x i8> *%ptr1
    143   %ext = sext <4 x i8> %vec to <4 x i32>
    144   %bitcast = bitcast <4 x i32> %ext to <8 x i16>
    145   %elem1 = extractelement <8 x i16> %bitcast, i32 1
    146   %elem2 = extractelement <8 x i16> %bitcast, i32 3
    147   %elem3 = extractelement <8 x i16> %bitcast, i32 7
    148   %trunc1 = trunc i16 %elem1 to i8
    149   %trunc2 = trunc i16 %elem2 to i8
    150   %trunc3 = trunc i16 %elem3 to i8
    151   store i8 %trunc1, i8 *%ptr2
    152   store i8 %trunc2, i8 *%ptr3
    153   store i8 %trunc3, i8 *%ptr4
    154   ret void
    155 }
    156