Home | History | Annotate | Download | only in CodeGen
      1 // REQUIRES: aarch64-registered-target
      2 
      3 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
      4 // RUN:  -target-feature +v8.1a -S -emit-llvm -o - %s | FileCheck %s
      5 
      6  #include <arm_neon.h>
      7 
      8 // CHECK-LABEL: test_vqrdmlah_laneq_s16
      9 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
     10 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
     11 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
     12 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
     13   return vqrdmlah_laneq_s16(a, b, v, 7);
     14 }
     15 
     16 // CHECK-LABEL: test_vqrdmlah_laneq_s32
     17 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
     18 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
     19 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
     20 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
     21   return vqrdmlah_laneq_s32(a, b, v, 3);
     22 }
     23 
     24 // CHECK-LABEL: test_vqrdmlahq_laneq_s16
     25 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
     26 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
     27 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
     28 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
     29   return vqrdmlahq_laneq_s16(a, b, v, 7);
     30 }
     31 
     32 // CHECK-LABEL: test_vqrdmlahq_laneq_s32
     33 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
     34 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
     35 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
     36 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
     37   return vqrdmlahq_laneq_s32(a, b, v, 3);
     38 }
     39 
     40 // CHECK-LABEL: test_vqrdmlahh_s16
     41 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
     42 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     43 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     44 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
     45 // CHECK: extractelement <4 x i16> [[mul]], i64 0
     46 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     47 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     48 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
     49 // CHECK: extractelement <4 x i16> [[add]], i64 0
     50   return vqrdmlahh_s16(a, b, c);
     51 }
     52 
     53 // CHECK-LABEL: test_vqrdmlahs_s32
     54 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
     55 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
     56 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
     57   return vqrdmlahs_s32(a, b, c);
     58 }
     59 
     60 // CHECK-LABEL: test_vqrdmlahh_lane_s16
     61 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
     62 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3
     63 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     64 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     65 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
     66 // CHECK: extractelement <4 x i16> [[mul]], i64 0
     67 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     68 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     69 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
     70 // CHECK: extractelement <4 x i16> [[add]], i64 0
     71   return vqrdmlahh_lane_s16(a, b, c, 3);
     72 }
     73 
     74 // CHECK-LABEL: test_vqrdmlahs_lane_s32
     75 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
     76 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1
     77 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
     78 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
     79   return vqrdmlahs_lane_s32(a, b, c, 1);
     80 }
     81 
     82 // CHECK-LABEL: test_vqrdmlahh_laneq_s16
     83 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
     84 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7
     85 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     86 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     87 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
     88 // CHECK: extractelement <4 x i16> [[mul]], i64 0
     89 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     90 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
     91 // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
     92 // CHECK: extractelement <4 x i16> [[add]], i64 0
     93   return vqrdmlahh_laneq_s16(a, b, c, 7);
     94 }
     95 
     96 // CHECK-LABEL: test_vqrdmlahs_laneq_s32
     97 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
     98 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3
     99 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
    100 // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}})
    101   return vqrdmlahs_laneq_s32(a, b, c, 3);
    102 }
    103 
    104 // CHECK-LABEL: test_vqrdmlsh_laneq_s16
    105 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
    106 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
    107 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
    108 // CHECK: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}})
    109   return vqrdmlsh_laneq_s16(a, b, v, 7);
    110 }
    111 
    112 // CHECK-LABEL: test_vqrdmlsh_laneq_s32
    113 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
    114 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3>
    115 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
    116 // CHECK: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
    117   return vqrdmlsh_laneq_s32(a, b, v, 3);
    118 }
    119 
    120 // CHECK-LABEL: test_vqrdmlshq_laneq_s16
    121 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
    122 // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
    123 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
    124 // CHECK: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
    125   return vqrdmlshq_laneq_s16(a, b, v, 7);
    126 }
    127 
    128 // CHECK-LABEL: test_vqrdmlshq_laneq_s32
    129 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
    130 // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
    131 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
    132 // CHECK: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}})
    133   return vqrdmlshq_laneq_s32(a, b, v, 3);
    134 }
    135 
    136 // CHECK-LABEL: test_vqrdmlshh_s16
    137 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
    138 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    139 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    140 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
    141 // CHECK: extractelement <4 x i16> [[mul]], i64 0
    142 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    143 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    144 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
    145 // CHECK: extractelement <4 x i16> [[sub]], i64 0
    146   return vqrdmlshh_s16(a, b, c);
    147 }
    148 
    149 // CHECK-LABEL: test_vqrdmlshs_s32
    150 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
    151 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
    152 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
    153   return vqrdmlshs_s32(a, b, c);
    154 }
    155 
    156 // CHECK-LABEL: test_vqrdmlshh_lane_s16
    157 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
    158 // CHECK: extractelement <4 x i16> {{%.*}}, i32 3
    159 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    160 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    161 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
    162 // CHECK: extractelement <4 x i16> [[mul]], i64 0
    163 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    164 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    165 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
    166 // CHECK: extractelement <4 x i16> [[sub]], i64 0
    167   return vqrdmlshh_lane_s16(a, b, c, 3);
    168 }
    169 
    170 // CHECK-LABEL: test_vqrdmlshs_lane_s32
    171 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
    172 // CHECK: extractelement <2 x i32> {{%.*}}, i32 1
    173 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
    174 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
    175   return vqrdmlshs_lane_s32(a, b, c, 1);
    176 }
    177 
    178 // CHECK-LABEL: test_vqrdmlshh_laneq_s16
    179 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
    180 // CHECK: extractelement <8 x i16> {{%.*}}, i32 7
    181 // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    182 // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    183 // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
    184 // CHECK: extractelement <4 x i16> [[mul]], i64 0
    185 // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    186 // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0
    187 // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]])
    188 // CHECK: extractelement <4 x i16> [[sub]], i64 0
    189   return vqrdmlshh_laneq_s16(a, b, c, 7);
    190 }
    191 
    192 // CHECK-LABEL: test_vqrdmlshs_laneq_s32
    193 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
    194 // CHECK: extractelement <4 x i32> {{%.*}}, i32 3
    195 // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}})
    196 // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}})
    197   return vqrdmlshs_laneq_s32(a, b, c, 3);
    198 }
    199