1 ; Test various target-specific DAG combiner patterns. 2 ; 3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s 4 5 ; Check that an extraction followed by a truncation is effectively treated 6 ; as a bitcast. 7 define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) { 8 ; CHECK-LABEL: f1: 9 ; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26 10 ; CHECK-DAG: vsteb [[REG]], 0(%r2), 3 11 ; CHECK-DAG: vsteb [[REG]], 0(%r3), 15 12 ; CHECK: br %r14 13 %add = add <4 x i32> %v1, %v2 14 %elem1 = extractelement <4 x i32> %add, i32 0 15 %elem2 = extractelement <4 x i32> %add, i32 3 16 %trunc1 = trunc i32 %elem1 to i8 17 %trunc2 = trunc i32 %elem2 to i8 18 store i8 %trunc1, i8 *%ptr1 19 store i8 %trunc2, i8 *%ptr2 20 ret void 21 } 22 23 ; Test a case where a pack-type shuffle can be eliminated. 24 define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { 25 ; CHECK-LABEL: f2: 26 ; CHECK-NOT: vpk 27 ; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26 28 ; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28 29 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3 30 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7 31 ; CHECK: br %r14 32 %add1 = add <4 x i32> %v1, %v2 33 %add2 = add <4 x i32> %v2, %v3 34 %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2, 35 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 36 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> 37 %elem1 = extractelement <8 x i16> %bitcast, i32 1 38 %elem2 = extractelement <8 x i16> %bitcast, i32 7 39 %res = add i16 %elem1, %elem2 40 ret i16 %res 41 } 42 43 ; ...and again in a case where there's also a splat and a bitcast. 44 define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { 45 ; CHECK-LABEL: f3: 46 ; CHECK-NOT: vrepg 47 ; CHECK-NOT: vpk 48 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 49 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6 50 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 51 ; CHECK: br %r14 52 %add = add <4 x i32> %v1, %v2 53 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, 54 <2 x i32> <i32 0, i32 0> 55 %splatcast = bitcast <2 x i64> %splat to <4 x i32> 56 %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, 57 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 58 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> 59 %elem1 = extractelement <8 x i16> %bitcast, i32 2 60 %elem2 = extractelement <8 x i16> %bitcast, i32 7 61 %res = add i16 %elem1, %elem2 62 ret i16 %res 63 } 64 65 ; ...and again with a merge low instead of a pack. 66 define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { 67 ; CHECK-LABEL: f4: 68 ; CHECK-NOT: vrepg 69 ; CHECK-NOT: vmr 70 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 71 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6 72 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 73 ; CHECK: br %r14 74 %add = add <4 x i32> %v1, %v2 75 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, 76 <2 x i32> <i32 0, i32 0> 77 %splatcast = bitcast <2 x i64> %splat to <4 x i32> 78 %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, 79 <4 x i32> <i32 2, i32 6, i32 3, i32 7> 80 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> 81 %elem1 = extractelement <8 x i16> %bitcast, i32 4 82 %elem2 = extractelement <8 x i16> %bitcast, i32 7 83 %res = add i16 %elem1, %elem2 84 ret i16 %res 85 } 86 87 ; ...and again with a merge high. 88 define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { 89 ; CHECK-LABEL: f5: 90 ; CHECK-NOT: vrepg 91 ; CHECK-NOT: vmr 92 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 93 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2 94 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 95 ; CHECK: br %r14 96 %add = add <4 x i32> %v1, %v2 97 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, 98 <2 x i32> <i32 0, i32 0> 99 %splatcast = bitcast <2 x i64> %splat to <4 x i32> 100 %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, 101 <4 x i32> <i32 0, i32 4, i32 1, i32 5> 102 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> 103 %elem1 = extractelement <8 x i16> %bitcast, i32 4 104 %elem2 = extractelement <8 x i16> %bitcast, i32 7 105 %res = add i16 %elem1, %elem2 106 ret i16 %res 107 } 108 109 ; Test a case where an unpack high can be eliminated from the usual 110 ; load-extend sequence. 111 define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) { 112 ; CHECK-LABEL: f6: 113 ; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2) 114 ; CHECK-NOT: vup 115 ; CHECK-DAG: vsteb [[REG]], 0(%r3), 1 116 ; CHECK-DAG: vsteb [[REG]], 0(%r4), 2 117 ; CHECK-DAG: vsteb [[REG]], 0(%r5), 7 118 ; CHECK: br %r14 119 %vec = load <8 x i8>, <8 x i8> *%ptr1 120 %ext = sext <8 x i8> %vec to <8 x i16> 121 %elem1 = extractelement <8 x i16> %ext, i32 1 122 %elem2 = extractelement <8 x i16> %ext, i32 2 123 %elem3 = extractelement <8 x i16> %ext, i32 7 124 %trunc1 = trunc i16 %elem1 to i8 125 %trunc2 = trunc i16 %elem2 to i8 126 %trunc3 = trunc i16 %elem3 to i8 127 store i8 %trunc1, i8 *%ptr2 128 store i8 %trunc2, i8 *%ptr3 129 store i8 %trunc3, i8 *%ptr4 130 ret void 131 } 132 133 ; ...and again with a bitcast inbetween. 134 define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) { 135 ; CHECK-LABEL: f7: 136 ; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2) 137 ; CHECK-NOT: vup 138 ; CHECK-DAG: vsteb [[REG]], 0(%r3), 0 139 ; CHECK-DAG: vsteb [[REG]], 0(%r4), 1 140 ; CHECK-DAG: vsteb [[REG]], 0(%r5), 3 141 ; CHECK: br %r14 142 %vec = load <4 x i8>, <4 x i8> *%ptr1 143 %ext = sext <4 x i8> %vec to <4 x i32> 144 %bitcast = bitcast <4 x i32> %ext to <8 x i16> 145 %elem1 = extractelement <8 x i16> %bitcast, i32 1 146 %elem2 = extractelement <8 x i16> %bitcast, i32 3 147 %elem3 = extractelement <8 x i16> %bitcast, i32 7 148 %trunc1 = trunc i16 %elem1 to i8 149 %trunc2 = trunc i16 %elem2 to i8 150 %trunc3 = trunc i16 %elem3 to i8 151 store i8 %trunc1, i8 *%ptr2 152 store i8 %trunc2, i8 *%ptr3 153 store i8 %trunc3, i8 *%ptr4 154 ret void 155 } 156