1 ; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s 2 3 ;----------------------------------------------------------------------------- 4 ; RDMA Vector 5 6 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) 7 declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) 8 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) 9 declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) 10 11 declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) 12 declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) 13 declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) 14 declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) 15 16 declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) 17 declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) 18 declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) 19 declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) 20 21 define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { 22 ; CHECK-LABEL: test_vqrdmlah_v4i16: 23 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) 24 %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) 25 ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 26 ret <4 x i16> %retval 27 } 28 29 define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { 30 ; CHECK-LABEL: test_vqrdmlah_v8i16: 31 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) 32 %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) 33 ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} 34 ret <8 x i16> %retval 35 } 36 37 define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { 38 ; CHECK-LABEL: test_vqrdmlah_v2i32: 39 %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) 40 %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) 41 ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 42 ret <2 x i32> %retval 43 } 44 45 define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { 46 ; CHECK-LABEL: test_vqrdmlah_v4i32: 47 %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) 48 %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) 49 ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} 50 ret <4 x i32> %retval 51 } 52 53 define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { 54 ; CHECK-LABEL: test_vqrdmlsh_v4i16: 55 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) 56 %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) 57 ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 58 ret <4 x i16> %retval 59 } 60 61 define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { 62 ; CHECK-LABEL: test_vqrdmlsh_v8i16: 63 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) 64 %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) 65 ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} 66 ret <8 x i16> %retval 67 } 68 69 define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { 70 ; CHECK-LABEL: test_vqrdmlsh_v2i32: 71 %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) 72 %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) 73 ; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 74 ret <2 x i32> %retval 75 } 76 77 define <4 x i32> @test_vqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { 78 ; CHECK-LABEL: test_vqrdmlsh_v4i32: 79 %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) 80 %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) 81 ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} 82 ret <4 x i32> %retval 83 } 84 85 ;----------------------------------------------------------------------------- 86 ; RDMA Scalar 87 88 define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { 89 ; CHECK-LABEL: test_vqrdmlah_lane_s16: 90 entry: 91 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 92 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) 93 %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) 94 ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] 95 ret <4 x i16> %retval 96 } 97 98 define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { 99 ; CHECK-LABEL: test_vqrdmlahq_lane_s16: 100 entry: 101 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 102 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) 103 %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) 104 ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] 105 ret <8 x i16> %retval 106 } 107 108 define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { 109 ; CHECK-LABEL: test_vqrdmlah_lane_s32: 110 entry: 111 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 112 %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) 113 %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) 114 ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] 115 ret <2 x i32> %retval 116 } 117 118 define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { 119 ; CHECK-LABEL: test_vqrdmlahq_lane_s32: 120 entry: 121 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 122 %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) 123 %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) 124 ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] 125 ret <4 x i32> %retval 126 } 127 128 define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { 129 ; CHECK-LABEL: test_vqrdmlsh_lane_s16: 130 entry: 131 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 132 %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) 133 %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) 134 ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] 135 ret <4 x i16> %retval 136 } 137 138 define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { 139 ; CHECK-LABEL: test_vqrdmlshq_lane_s16: 140 entry: 141 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 142 %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) 143 %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) 144 ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] 145 ret <8 x i16> %retval 146 } 147 148 define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { 149 ; CHECK-LABEL: test_vqrdmlsh_lane_s32: 150 entry: 151 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 152 %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) 153 %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) 154 ; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] 155 ret <2 x i32> %retval 156 } 157 158 define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { 159 ; CHECK-LABEL: test_vqrdmlshq_lane_s32: 160 entry: 161 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 162 %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) 163 %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) 164 ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] 165 ret <4 x i32> %retval 166 } 167