1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s 2 3 // Test new aarch64 intrinsics and types 4 5 #include <arm_neon.h> 6 7 // CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 8 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 9 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 10 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 11 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] 12 // CHECK: ret <2 x float> [[ADD_I]] 13 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 14 return vmla_n_f32(a, b, c); 15 } 16 17 // CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 18 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 19 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 20 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 21 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 22 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 23 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] 24 // CHECK: ret <4 x float> [[ADD_I]] 25 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 26 return vmlaq_n_f32(a, b, c); 27 } 28 29 // CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 30 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 31 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 32 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] 33 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]] 34 // CHECK: ret <2 x double> [[ADD_I]] 35 float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 36 return vmlaq_n_f64(a, b, c); 37 } 38 39 // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 40 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 41 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 42 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 43 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 44 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 45 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] 46 // CHECK: ret <4 x float> [[SUB_I]] 47 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 48 return vmlsq_n_f32(a, b, c); 49 } 50 51 // CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 52 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 53 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 54 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 55 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] 56 // CHECK: ret <2 x float> [[SUB_I]] 57 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 58 return vmls_n_f32(a, b, c); 59 } 60 61 // CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 62 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 63 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 64 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] 65 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]] 66 // CHECK: ret <2 x double> [[SUB_I]] 67 float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 68 return vmlsq_n_f64(a, b, c); 69 } 70 71 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 72 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer 73 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 74 // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 75 // CHECK: ret <2 x float> [[ADD]] 76 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 77 return vmla_lane_f32(a, b, v, 0); 78 } 79 80 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 81 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer 82 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 83 // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 84 // CHECK: ret <4 x float> [[ADD]] 85 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 86 return vmlaq_lane_f32(a, b, v, 0); 87 } 88 89 // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 90 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer 91 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 92 // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 93 // CHECK: ret <2 x float> [[ADD]] 94 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 95 return vmla_laneq_f32(a, b, v, 0); 96 } 97 98 // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 99 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer 100 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 101 // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 102 // CHECK: ret <4 x float> [[ADD]] 103 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 104 return vmlaq_laneq_f32(a, b, v, 0); 105 } 106 107 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 108 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer 109 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 110 // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 111 // CHECK: ret <2 x float> [[SUB]] 112 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 113 return vmls_lane_f32(a, b, v, 0); 114 } 115 116 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 117 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer 118 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 119 // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 120 // CHECK: ret <4 x float> [[SUB]] 121 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 122 return vmlsq_lane_f32(a, b, v, 0); 123 } 124 125 // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 126 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer 127 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 128 // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 129 // CHECK: ret <2 x float> [[SUB]] 130 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 131 return vmls_laneq_f32(a, b, v, 0); 132 } 133 134 // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 135 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer 136 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 137 // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 138 // CHECK: ret <4 x float> [[SUB]] 139 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 140 return vmlsq_laneq_f32(a, b, v, 0); 141 } 142 143 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 144 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> 145 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 146 // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 147 // CHECK: ret <2 x float> [[ADD]] 148 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 149 return vmla_lane_f32(a, b, v, 1); 150 } 151 152 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 153 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 154 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 155 // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 156 // CHECK: ret <4 x float> [[ADD]] 157 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 158 return vmlaq_lane_f32(a, b, v, 1); 159 } 160 161 // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 162 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> 163 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 164 // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 165 // CHECK: ret <2 x float> [[ADD]] 166 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 167 return vmla_laneq_f32(a, b, v, 3); 168 } 169 170 // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 171 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 172 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 173 // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 174 // CHECK: ret <4 x float> [[ADD]] 175 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 176 return vmlaq_laneq_f32(a, b, v, 3); 177 } 178 179 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { 180 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> 181 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 182 // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 183 // CHECK: ret <2 x float> [[SUB]] 184 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 185 return vmls_lane_f32(a, b, v, 1); 186 } 187 188 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { 189 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 190 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 191 // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 192 // CHECK: ret <4 x float> [[SUB]] 193 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 194 return vmlsq_lane_f32(a, b, v, 1); 195 } 196 // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { 197 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> 198 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 199 // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 200 // CHECK: ret <2 x float> [[SUB]] 201 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 202 return vmls_laneq_f32(a, b, v, 3); 203 } 204 205 // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { 206 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 207 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 208 // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 209 // CHECK: ret <4 x float> [[SUB]] 210 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 211 return vmlsq_laneq_f32(a, b, v, 3); 212 } 213 214 // CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 215 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 216 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 217 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 218 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 219 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8> 220 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 221 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 222 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 223 // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2 224 // CHECK: ret <2 x double> [[TMP6]] 225 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 226 return vfmaq_n_f64(a, b, c); 227 } 228 229 // CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { 230 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b 231 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 232 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 233 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 234 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> 235 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8> 236 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 237 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 238 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 239 // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2 240 // CHECK: ret <2 x double> [[TMP6]] 241 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { 242 return vfmsq_n_f64(a, b, c); 243 } 244