Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s
      2 
      3 ;-----------------------------------------------------------------------------
      4 ; RDMA Vector
      5 
      6 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
      7 declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
      8 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
      9 declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
     10 
     11 declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>)
     12 declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>)
     13 declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>)
     14 declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
     15 
     16 declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>)
     17 declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>)
     18 declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>)
     19 declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
     20 
     21 define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
     22 ; CHECK-LABEL: test_vqrdmlah_v4i16:
     23    %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
     24    %retval =  call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc,  <4 x i16> %prod)
     25 ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
     26    ret <4 x i16> %retval
     27 }
     28 
     29 define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
     30 ; CHECK-LABEL: test_vqrdmlah_v8i16:
     31    %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
     32    %retval =  call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod)
     33 ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
     34    ret <8 x i16> %retval
     35 }
     36 
     37 define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
     38 ; CHECK-LABEL: test_vqrdmlah_v2i32:
     39    %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
     40    %retval =  call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod)
     41 ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
     42    ret <2 x i32> %retval
     43 }
     44 
     45 define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
     46 ; CHECK-LABEL: test_vqrdmlah_v4i32:
     47    %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
     48    %retval =  call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod)
     49 ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
     50    ret <4 x i32> %retval
     51 }
     52 
     53 define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
     54 ; CHECK-LABEL: test_vqrdmlsh_v4i16:
     55    %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs,  <4 x i16> %rhs)
     56    %retval =  call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod)
     57 ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
     58    ret <4 x i16> %retval
     59 }
     60 
     61 define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
     62 ; CHECK-LABEL: test_vqrdmlsh_v8i16:
     63    %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
     64    %retval =  call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod)
     65 ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
     66    ret <8 x i16> %retval
     67 }
     68 
     69 define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
     70 ; CHECK-LABEL: test_vqrdmlsh_v2i32:
     71    %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
     72    %retval =  call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod)
     73 ; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
     74    ret <2 x i32> %retval
     75 }
     76 
     77 define <4 x i32> @test_vqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
     78 ; CHECK-LABEL: test_vqrdmlsh_v4i32:
     79    %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
     80    %retval =  call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod)
     81 ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
     82    ret <4 x i32> %retval
     83 }
     84 
     85 ;-----------------------------------------------------------------------------
     86 ; RDMA Scalar
     87 
     88 define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
     89 ; CHECK-LABEL: test_vqrdmlah_lane_s16:
     90 entry:
     91   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
     92   %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
     93   %retval =  call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod)
     94 ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3]
     95   ret <4 x i16> %retval
     96 }
     97 
     98 define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
     99 ; CHECK-LABEL: test_vqrdmlahq_lane_s16:
    100 entry:
    101   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    102   %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
    103   %retval =  call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod)
    104 ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2]
    105   ret <8 x i16> %retval
    106 }
    107 
    108 define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
    109 ; CHECK-LABEL: test_vqrdmlah_lane_s32:
    110 entry:
    111   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
    112   %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
    113   %retval =  call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod)
    114 ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1]
    115   ret <2 x i32> %retval
    116 }
    117 
    118 define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
    119 ; CHECK-LABEL: test_vqrdmlahq_lane_s32:
    120 entry:
    121   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
    122   %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
    123   %retval =  call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod)
    124 ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0]
    125   ret <4 x i32> %retval
    126 }
    127 
    128 define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
    129 ; CHECK-LABEL: test_vqrdmlsh_lane_s16:
    130 entry:
    131   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
    132   %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
    133   %retval =  call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod)
    134 ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3]
    135   ret <4 x i16> %retval
    136 }
    137 
    138 define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
    139 ; CHECK-LABEL: test_vqrdmlshq_lane_s16:
    140 entry:
    141   %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    142   %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
    143   %retval =  call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod)
    144 ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2]
    145   ret <8 x i16> %retval
    146 }
    147 
    148 define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
    149 ; CHECK-LABEL: test_vqrdmlsh_lane_s32:
    150 entry:
    151   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
    152   %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
    153   %retval =  call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod)
    154 ; CHECK: vqrdmlsh.s32  {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1]
    155   ret <2 x i32> %retval
    156 }
    157 
    158 define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
    159 ; CHECK-LABEL: test_vqrdmlshq_lane_s32:
    160 entry:
    161   %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
    162   %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
    163   %retval =  call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod)
    164 ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0]
    165   ret <4 x i32> %retval
    166 }
    167