Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
      2 
      3 // Test new aarch64 intrinsics and types
      4 
      5 #include <arm_neon.h>
      6 
      7 // CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
      8 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
      9 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
     10 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
     11 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
     12 // CHECK:   ret <2 x float> [[ADD_I]]
     13 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
     14   return vmla_n_f32(a, b, c);
     15 }
     16 
     17 // CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
     18 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
     19 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
     20 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
     21 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
     22 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
     23 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
     24 // CHECK:   ret <4 x float> [[ADD_I]]
     25 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
     26   return vmlaq_n_f32(a, b, c);
     27 }
     28 
     29 // CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
     30 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
     31 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
     32 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
     33 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]]
     34 // CHECK:   ret <2 x double> [[ADD_I]]
     35 float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
     36   return vmlaq_n_f64(a, b, c);
     37 }
     38 
     39 // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
     40 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
     41 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
     42 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
     43 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
     44 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
     45 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
     46 // CHECK:   ret <4 x float> [[SUB_I]]
     47 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
     48   return vmlsq_n_f32(a, b, c);
     49 }
     50 
     51 // CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
     52 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
     53 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
     54 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
     55 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
     56 // CHECK:   ret <2 x float> [[SUB_I]]
     57 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
     58   return vmls_n_f32(a, b, c);
     59 }
     60 
     61 // CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
     62 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
     63 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
     64 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
     65 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]]
     66 // CHECK:   ret <2 x double> [[SUB_I]]
     67 float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
     68   return vmlsq_n_f64(a, b, c);
     69 }
     70 
     71 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
     72 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
     73 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
     74 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
     75 // CHECK:   ret <2 x float> [[ADD]]
     76 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
     77   return vmla_lane_f32(a, b, v, 0);
     78 }
     79 
     80 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
     81 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
     82 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
     83 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
     84 // CHECK:   ret <4 x float> [[ADD]]
     85 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
     86   return vmlaq_lane_f32(a, b, v, 0);
     87 }
     88 
     89 // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
     90 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
     91 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
     92 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
     93 // CHECK:   ret <2 x float> [[ADD]]
     94 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
     95   return vmla_laneq_f32(a, b, v, 0);
     96 }
     97 
     98 // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
     99 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
    100 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
    101 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
    102 // CHECK:   ret <4 x float> [[ADD]]
    103 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
    104   return vmlaq_laneq_f32(a, b, v, 0);
    105 }
    106 
    107 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
    108 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
    109 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
    110 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
    111 // CHECK:   ret <2 x float> [[SUB]]
    112 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
    113   return vmls_lane_f32(a, b, v, 0);
    114 }
    115 
    116 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
    117 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
    118 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
    119 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
    120 // CHECK:   ret <4 x float> [[SUB]]
    121 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
    122   return vmlsq_lane_f32(a, b, v, 0);
    123 }
    124 
    125 // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
    126 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
    127 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
    128 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
    129 // CHECK:   ret <2 x float> [[SUB]]
    130 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
    131   return vmls_laneq_f32(a, b, v, 0);
    132 }
    133 
    134 // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
    135 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
    136 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
    137 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
    138 // CHECK:   ret <4 x float> [[SUB]]
    139 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
    140   return vmlsq_laneq_f32(a, b, v, 0);
    141 }
    142 
    143 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
    144 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
    145 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
    146 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
    147 // CHECK:   ret <2 x float> [[ADD]]
    148 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
    149   return vmla_lane_f32(a, b, v, 1);
    150 }
    151 
    152 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
    153 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    154 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
    155 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
    156 // CHECK:   ret <4 x float> [[ADD]]
    157 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
    158   return vmlaq_lane_f32(a, b, v, 1);
    159 }
    160 
    161 // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
    162 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
    163 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
    164 // CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
    165 // CHECK:   ret <2 x float> [[ADD]]
    166 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
    167   return vmla_laneq_f32(a, b, v, 3);
    168 }
    169 
    170 // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
    171 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
    172 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
    173 // CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
    174 // CHECK:   ret <4 x float> [[ADD]]
    175 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
    176   return vmlaq_laneq_f32(a, b, v, 3);
    177 }
    178 
    179 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
    180 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
    181 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
    182 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
    183 // CHECK:   ret <2 x float> [[SUB]]
    184 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
    185   return vmls_lane_f32(a, b, v, 1);
    186 }
    187 
    188 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
    189 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    190 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
    191 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
    192 // CHECK:   ret <4 x float> [[SUB]]
    193 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
    194   return vmlsq_lane_f32(a, b, v, 1);
    195 }
    196 // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
    197 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
    198 // CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
    199 // CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
    200 // CHECK:   ret <2 x float> [[SUB]]
    201 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
    202   return vmls_laneq_f32(a, b, v, 3);
    203 }
    204 
    205 // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
    206 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
    207 // CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
    208 // CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
    209 // CHECK:   ret <4 x float> [[SUB]]
    210 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
    211   return vmlsq_laneq_f32(a, b, v, 3);
    212 }
    213 
    214 // CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
    215 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
    216 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
    217 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
    218 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
    219 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
    220 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
    221 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
    222 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
    223 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2
    224 // CHECK:   ret <2 x double> [[TMP6]]
    225 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
    226   return vfmaq_n_f64(a, b, c);
    227 }
    228 
    229 // CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
    230 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
    231 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
    232 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
    233 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
    234 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
    235 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
    236 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
    237 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
    238 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
    239 // CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2
    240 // CHECK:   ret <2 x double> [[TMP6]]
    241 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
    242   return vfmsq_n_f64(a, b, c);
    243 }
    244