1 // REQUIRES: aarch64-registered-target 2 3 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ 4 // RUN: -target-feature +v8.1a -S -emit-llvm -o - %s | FileCheck %s 5 6 #include <arm_neon.h> 7 8 // CHECK-LABEL: test_vqrdmlah_laneq_s16 9 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 10 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 11 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) 12 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) 13 return vqrdmlah_laneq_s16(a, b, v, 7); 14 } 15 16 // CHECK-LABEL: test_vqrdmlah_laneq_s32 17 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 18 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3> 19 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) 20 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) 21 return vqrdmlah_laneq_s32(a, b, v, 3); 22 } 23 24 // CHECK-LABEL: test_vqrdmlahq_laneq_s16 25 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 26 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 27 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) 28 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) 29 return vqrdmlahq_laneq_s16(a, b, v, 7); 30 } 31 32 // CHECK-LABEL: test_vqrdmlahq_laneq_s32 33 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 34 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 35 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) 36 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) 37 return vqrdmlahq_laneq_s32(a, b, v, 3); 38 } 39 40 // CHECK-LABEL: test_vqrdmlahh_s16 41 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) { 42 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 43 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 44 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) 45 // CHECK: extractelement <4 x i16> [[mul]], i64 0 46 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 47 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 48 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) 49 // CHECK: extractelement <4 x i16> [[add]], i64 0 50 return vqrdmlahh_s16(a, b, c); 51 } 52 53 // CHECK-LABEL: test_vqrdmlahs_s32 54 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) { 55 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) 56 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) 57 return vqrdmlahs_s32(a, b, c); 58 } 59 60 // CHECK-LABEL: test_vqrdmlahh_lane_s16 61 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) { 62 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3 63 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 64 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 65 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) 66 // CHECK: extractelement <4 x i16> [[mul]], i64 0 67 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 68 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 69 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) 70 // CHECK: extractelement <4 x i16> [[add]], i64 0 71 return vqrdmlahh_lane_s16(a, b, c, 3); 72 } 73 74 // CHECK-LABEL: test_vqrdmlahs_lane_s32 75 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) { 76 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1 77 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) 78 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) 79 return vqrdmlahs_lane_s32(a, b, c, 1); 80 } 81 82 // CHECK-LABEL: test_vqrdmlahh_laneq_s16 83 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { 84 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7 85 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 86 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 87 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) 88 // CHECK: extractelement <4 x i16> [[mul]], i64 0 89 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 90 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 91 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) 92 // CHECK: extractelement <4 x i16> [[add]], i64 0 93 return vqrdmlahh_laneq_s16(a, b, c, 7); 94 } 95 96 // CHECK-LABEL: test_vqrdmlahs_laneq_s32 97 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { 98 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3 99 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) 100 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) 101 return vqrdmlahs_laneq_s32(a, b, c, 3); 102 } 103 104 // CHECK-LABEL: test_vqrdmlsh_laneq_s16 105 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 106 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 107 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) 108 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) 109 return vqrdmlsh_laneq_s16(a, b, v, 7); 110 } 111 112 // CHECK-LABEL: test_vqrdmlsh_laneq_s32 113 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 114 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3> 115 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) 116 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) 117 return vqrdmlsh_laneq_s32(a, b, v, 3); 118 } 119 120 // CHECK-LABEL: test_vqrdmlshq_laneq_s16 121 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 122 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 123 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) 124 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) 125 return vqrdmlshq_laneq_s16(a, b, v, 7); 126 } 127 128 // CHECK-LABEL: test_vqrdmlshq_laneq_s32 129 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 130 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 131 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) 132 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) 133 return vqrdmlshq_laneq_s32(a, b, v, 3); 134 } 135 136 // CHECK-LABEL: test_vqrdmlshh_s16 137 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) { 138 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 139 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 140 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) 141 // CHECK: extractelement <4 x i16> [[mul]], i64 0 142 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 143 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 144 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) 145 // CHECK: extractelement <4 x i16> [[sub]], i64 0 146 return vqrdmlshh_s16(a, b, c); 147 } 148 149 // CHECK-LABEL: test_vqrdmlshs_s32 150 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) { 151 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) 152 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) 153 return vqrdmlshs_s32(a, b, c); 154 } 155 156 // CHECK-LABEL: test_vqrdmlshh_lane_s16 157 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) { 158 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3 159 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 160 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 161 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) 162 // CHECK: extractelement <4 x i16> [[mul]], i64 0 163 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 164 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 165 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) 166 // CHECK: extractelement <4 x i16> [[sub]], i64 0 167 return vqrdmlshh_lane_s16(a, b, c, 3); 168 } 169 170 // CHECK-LABEL: test_vqrdmlshs_lane_s32 171 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) { 172 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1 173 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) 174 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) 175 return vqrdmlshs_lane_s32(a, b, c, 1); 176 } 177 178 // CHECK-LABEL: test_vqrdmlshh_laneq_s16 179 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { 180 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7 181 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 182 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 183 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) 184 // CHECK: extractelement <4 x i16> [[mul]], i64 0 185 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 186 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 187 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) 188 // CHECK: extractelement <4 x i16> [[sub]], i64 0 189 return vqrdmlshh_laneq_s16(a, b, c, 7); 190 } 191 192 // CHECK-LABEL: test_vqrdmlshs_laneq_s32 193 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { 194 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3 195 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) 196 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) 197 return vqrdmlshs_laneq_s32(a, b, c, 3); 198 } 199