Home | History | Annotate | Download | only in CodeGen
      1 // REQUIRES: aarch64-registered-target
      2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s
      3 
      4 // Test new aarch64 intrinsics and types
      5 
      6 #include <arm_neon.h>
      7 
      8 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
      9   // CHECK-LABEL: test_vmla_n_f32
     10   return vmla_n_f32(a, b, c);
     11   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     12   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     13   // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     14   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     15 }
     16 
     17 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
     18   // CHECK-LABEL: test_vmlaq_n_f32
     19   return vmlaq_n_f32(a, b, c);
     20   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     21   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     22   // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     23   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     24 }
     25 
     26 float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
     27   // CHECK-LABEL: test_vmlaq_n_f64
     28   return vmlaq_n_f64(a, b, c);
     29   // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
     30   // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     31   // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
     32   // CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     33 }
     34 
     35 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
     36   // CHECK-LABEL: test_vmlsq_n_f32
     37   return vmlsq_n_f32(a, b, c);
     38   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     39   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     40   // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     41   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     42 }
     43 
     44 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
     45   // CHECK-LABEL: test_vmls_n_f32
     46   return vmls_n_f32(a, b, c);
     47   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     48   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     49   // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     50   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     51 }
     52 
     53 float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
     54   // CHECK-LABEL: test_vmlsq_n_f64
     55   return vmlsq_n_f64(a, b, c);
     56   // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
     57   // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     58   // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
     59   // CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
     60 }
     61 
     62 float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
     63   // CHECK-LABEL: test_vmla_lane_f32_0
     64   return vmla_lane_f32(a, b, v, 0);
     65   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     66   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     67   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     68 }
     69 
     70 float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
     71   // CHECK-LABEL: test_vmlaq_lane_f32_0
     72   return vmlaq_lane_f32(a, b, v, 0);
     73   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     74   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     75   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     76 }
     77 
     78 float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
     79   // CHECK-LABEL: test_vmla_laneq_f32_0
     80   return vmla_laneq_f32(a, b, v, 0);
     81   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     82   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     83   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     84 }
     85 
     86 float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
     87   // CHECK-LABEL: test_vmlaq_laneq_f32_0
     88   return vmlaq_laneq_f32(a, b, v, 0);
     89   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     90   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
     91   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
     92 }
     93 
     94 float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
     95   // CHECK-LABEL: test_vmls_lane_f32_0
     96   return vmls_lane_f32(a, b, v, 0);
     97   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
     98   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
     99   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
    100 }
    101 
    102 float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
    103   // CHECK-LABEL: test_vmlsq_lane_f32_0
    104   return vmlsq_lane_f32(a, b, v, 0);
    105   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
    106   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    107   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
    108 }
    109 
    110 float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
    111   // CHECK-LABEL: test_vmls_laneq_f32_0
    112   return vmls_laneq_f32(a, b, v, 0);
    113   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
    114   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    115   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
    116 }
    117 
    118 float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
    119   // CHECK-LABEL: test_vmlsq_laneq_f32_0
    120   return vmlsq_laneq_f32(a, b, v, 0);
    121   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
    122   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    123   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
    124 }
    125 
    126 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
    127   // CHECK-LABEL: test_vmla_lane_f32
    128   return vmla_lane_f32(a, b, v, 1);
    129   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    130   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    131   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    132 }
    133 
    134 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
    135   // CHECK-LABEL: test_vmlaq_lane_f32
    136   return vmlaq_lane_f32(a, b, v, 1);
    137   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    138   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    139   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    140 }
    141 
    142 float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
    143   // CHECK-LABEL: test_vmla_laneq_f32
    144   return vmla_laneq_f32(a, b, v, 3);
    145   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
    146   // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    147   // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
    148 }
    149 
    150 float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
    151   // CHECK-LABEL: test_vmlaq_laneq_f32
    152   return vmlaq_laneq_f32(a, b, v, 3);
    153   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
    154   // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    155   // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
    156 }
    157 
    158 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
    159   // CHECK-LABEL: test_vmls_lane_f32
    160   return vmls_lane_f32(a, b, v, 1);
    161   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    162   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    163   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    164 }
    165 
    166 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
    167   // CHECK-LABEL: test_vmlsq_lane_f32
    168   return vmlsq_lane_f32(a, b, v, 1);
    169   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    170   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    171   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    172 }
    173 float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
    174   // CHECK-LABEL: test_vmls_laneq_f32
    175   return vmls_laneq_f32(a, b, v, 3);
    176   // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
    177   // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    178   // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
    179 }
    180 
    181 float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
    182   // CHECK-LABEL: test_vmlsq_laneq_f32
    183   return vmlsq_laneq_f32(a, b, v, 3);
    184   // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
    185   // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    186   // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
    187 }
    188 
    189 float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
    190   // CHECK-LABEL: test_vfmaq_n_f64:
    191   return vfmaq_n_f64(a, b, c);
    192   // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}}
    193 }
    194 
    195 float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
    196   // CHECK-LABEL: test_vfmsq_n_f64:
    197   return vfmsq_n_f64(a, b, c);
    198   // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+\.2d|v[0-9]+\.d\[0\]}}
    199 }
    200