Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
      2 // Test ARM64 SIMD fused multiply add intrinsics
      3 
      4 #include <arm_neon.h>
      5 
      6 float32x2_t test_vfma_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
      7   // CHECK: test_vfma_f32
      8   return vfma_f32(a1, a2, a3);
      9   // CHECK: llvm.fma.v2f32({{.*a2, .*a3, .*a1}})
     10   // CHECK-NEXT: ret
     11 }
     12 
     13 float32x4_t test_vfmaq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
     14   // CHECK: test_vfmaq_f32
     15   return vfmaq_f32(a1, a2, a3);
     16   // CHECK: llvm.fma.v4f32({{.*a2, .*a3, .*a1}})
     17   // CHECK-NEXT: ret
     18 }
     19 
     20 float64x2_t test_vfmaq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
     21   // CHECK: test_vfmaq_f64
     22   return vfmaq_f64(a1, a2, a3);
     23   // CHECK: llvm.fma.v2f64({{.*a2, .*a3, .*a1}})
     24   // CHECK-NEXT: ret
     25 }
     26 
     27 float32x2_t test_vfma_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
     28   // CHECK: test_vfma_lane_f32
     29   return vfma_lane_f32(a1, a2, a3, 1);
     30   // NB: the test below is deliberately lose, so that we don't depend too much
     31   // upon the exact IR used to select lane 1 (usually a shufflevector)
     32   // CHECK: llvm.fma.v2f32(<2 x float> %a2, <2 x float> {{.*}}, <2 x float> %a1)
     33   // CHECK-NEXT: ret
     34 }
     35 
     36 float32x4_t test_vfmaq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
     37   // CHECK: test_vfmaq_lane_f32
     38   return vfmaq_lane_f32(a1, a2, a3, 1);
     39   // NB: the test below is deliberately lose, so that we don't depend too much
     40   // upon the exact IR used to select lane 1 (usually a shufflevector)
     41   // CHECK: llvm.fma.v4f32(<4 x float> %a2, <4 x float> {{.*}}, <4 x float> %a1)
     42   // CHECK-NEXT: ret
     43 }
     44 
     45 float64x2_t test_vfmaq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
     46   // CHECK: test_vfmaq_lane_f64
     47   return vfmaq_lane_f64(a1, a2, a3, 0);
     48   // NB: the test below is deliberately lose, so that we don't depend too much
     49   // upon the exact IR used to select lane 1 (usually a shufflevector)
     50   // CHECK: llvm.fma.v2f64(<2 x double> %a2, <2 x double> {{.*}}, <2 x double> %a1)
     51   // CHECK-NEXT: ret
     52 }
     53 
     54 float32x2_t test_vfma_n_f32(float32x2_t a1, float32x2_t a2, float32_t a3) {
     55   // CHECK: test_vfma_n_f32
     56   return vfma_n_f32(a1, a2, a3);
     57   // NB: the test below is deliberately lose, so that we don't depend too much
     58   // upon the exact IR used to select lane 0 (usually two insertelements)
     59   // CHECK: llvm.fma.v2f32
     60   // CHECK-NEXT: ret
     61 }
     62 
     63 float32x4_t test_vfmaq_n_f32(float32x4_t a1, float32x4_t a2, float32_t a3) {
     64   // CHECK: test_vfmaq_n_f32
     65   return vfmaq_n_f32(a1, a2, a3);
     66   // NB: the test below is deliberately lose, so that we don't depend too much
     67   // upon the exact IR used to select lane 0 (usually four insertelements)
     68   // CHECK: llvm.fma.v4f32
     69   // CHECK-NEXT: ret
     70 }
     71 
     72 float64x2_t test_vfmaq_n_f64(float64x2_t a1, float64x2_t a2, float64_t a3) {
     73   // CHECK: test_vfmaq_n_f64
     74   return vfmaq_n_f64(a1, a2, a3);
     75   // NB: the test below is deliberately lose, so that we don't depend too much
     76   // upon the exact IR used to select lane 0 (usually two insertelements)
     77   // CHECK: llvm.fma.v2f64
     78   // CHECK-NEXT: ret
     79 }
     80 
     81 float32x2_t test_vfms_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
     82   // CHECK: test_vfms_f32
     83   return vfms_f32(a1, a2, a3);
     84   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a2
     85   // CHECK: llvm.fma.v2f32(<2 x float> %a3, <2 x float> [[NEG]], <2 x float> %a1)
     86   // CHECK-NEXT: ret
     87 }
     88 
     89 float32x4_t test_vfmsq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
     90   // CHECK: test_vfmsq_f32
     91   return vfmsq_f32(a1, a2, a3);
     92   // CHECK: [[NEG:%.*]] = fsub <4 x float> {{.*}}, %a2
     93   // CHECK: llvm.fma.v4f32(<4 x float> %a3, <4 x float> [[NEG]], <4 x float> %a1)
     94   // CHECK-NEXT: ret
     95 }
     96 
     97 float64x2_t test_vfmsq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
     98   // CHECK: test_vfmsq_f64
     99   return vfmsq_f64(a1, a2, a3);
    100   // CHECK: [[NEG:%.*]] = fsub <2 x double> {{.*}}, %a2
    101   // CHECK: llvm.fma.v2f64(<2 x double> %a3, <2 x double> [[NEG]], <2 x double> %a1)
    102   // CHECK-NEXT: ret
    103 }
    104 
    105 float32x2_t test_vfms_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
    106   // CHECK: test_vfms_lane_f32
    107   return vfms_lane_f32(a1, a2, a3, 1);
    108   // NB: the test below is deliberately lose, so that we don't depend too much
    109   // upon the exact IR used to select lane 1 (usually a shufflevector)
    110   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
    111   // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
    112   // CHECK: llvm.fma.v2f32(<2 x float> {{.*}}, <2 x float> [[LANE]], <2 x float> %a1)
    113   // CHECK-NEXT: ret
    114 }
    115 
    116 float32x4_t test_vfmsq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
    117   // CHECK: test_vfmsq_lane_f32
    118   return vfmsq_lane_f32(a1, a2, a3, 1);
    119   // NB: the test below is deliberately lose, so that we don't depend too much
    120   // upon the exact IR used to select lane 1 (usually a shufflevector)
    121   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
    122   // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
    123   // CHECK: llvm.fma.v4f32(<4 x float> {{.*}}, <4 x float> [[LANE]], <4 x float> %a1)
    124   // CHECK-NEXT: ret
    125 }
    126 
    127 float64x2_t test_vfmsq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
    128   // CHECK: test_vfmsq_lane_f64
    129   return vfmsq_lane_f64(a1, a2, a3, 0);
    130   // NB: the test below is deliberately lose, so that we don't depend too much
    131   // upon the exact IR used to select lane 1 (usually a shufflevector)
    132   // CHECK: [[NEG:%.*]] = fsub <1 x double> {{.*}}, %a3
    133   // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[NEG]]
    134   // CHECK: llvm.fma.v2f64(<2 x double> {{.*}}, <2 x double> [[LANE]], <2 x double> %a1)
    135   // CHECK-NEXT: ret
    136 }
    137