Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
      2 ; RUN:     < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s
      3 
      4 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) #0 {
      5 ; CHECK-LABEL: test_vmull_high_n_s16:
      6 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
      7 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
      8 ; CHECK-NEXT: ret
      9 entry:
     10   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     11   %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
     12   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
     13   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
     14   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
     15   %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
     16   ret <4 x i32> %vmull15.i.i
     17 }
     18 
     19 define <4 x i32> @test_vmull_high_n_s16_imm(<8 x i16> %a) #0 {
     20 ; CHECK-LABEL: test_vmull_high_n_s16_imm:
     21 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
     22 ; CHECK-NEXT: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
     23 ; CHECK-NEXT: ret
     24 entry:
     25   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     26   %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
     27   ret <4 x i32> %vmull15.i.i
     28 }
     29 
     30 define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) #0 {
     31 ; CHECK-LABEL: test_vmull_high_n_s32:
     32 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
     33 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
     34 ; CHECK-NEXT: ret
     35 entry:
     36   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     37   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
     38   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
     39   %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
     40   ret <2 x i64> %vmull9.i.i
     41 }
     42 
     43 define <2 x i64> @test_vmull_high_n_s32_imm(<4 x i32> %a) #0 {
     44 ; CHECK-LABEL: test_vmull_high_n_s32_imm:
     45 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #1, msl #8
     46 ; CHECK-NEXT: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
     47 ; CHECK-NEXT: ret
     48 entry:
     49   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     50   %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 511, i32 511>)
     51   ret <2 x i64> %vmull9.i.i
     52 }
     53 
     54 define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) #0 {
     55 ; CHECK-LABEL: test_vmull_high_n_u16:
     56 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
     57 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
     58 ; CHECK-NEXT: ret
     59 entry:
     60   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     61   %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
     62   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
     63   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
     64   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
     65   %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
     66   ret <4 x i32> %vmull15.i.i
     67 }
     68 
     69 define <4 x i32> @test_vmull_high_n_u16_imm(<8 x i16> %a) #0 {
     70 ; CHECK-LABEL: test_vmull_high_n_u16_imm:
     71 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #17, lsl #8
     72 ; CHECK-NEXT: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
     73 ; CHECK-NEXT: ret
     74 entry:
     75   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     76   %vmull15.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 4352, i16 4352, i16 4352, i16 4352>)
     77   ret <4 x i32> %vmull15.i.i
     78 }
     79 
     80 define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) #0 {
     81 ; CHECK-LABEL: test_vmull_high_n_u32:
     82 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
     83 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
     84 ; CHECK-NEXT: ret
     85 entry:
     86   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     87   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
     88   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
     89   %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
     90   ret <2 x i64> %vmull9.i.i
     91 }
     92 
     93 define <2 x i64> @test_vmull_high_n_u32_imm(<4 x i32> %a) #0 {
     94 ; CHECK-LABEL: test_vmull_high_n_u32_imm:
     95 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].4s, #1, msl #8
     96 ; CHECK-NEXT: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
     97 ; CHECK-NEXT: ret
     98 entry:
     99   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    100   %vmull9.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 4294966784, i32 4294966784>)
    101   ret <2 x i64> %vmull9.i.i
    102 }
    103 
    104 define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) #0 {
    105 ; CHECK-LABEL: test_vqdmull_high_n_s16:
    106 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
    107 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    108 ; CHECK-NEXT: ret
    109 entry:
    110   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    111   %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
    112   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
    113   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
    114   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
    115   %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    116   ret <4 x i32> %vqdmull15.i.i
    117 }
    118 
    119 define <4 x i32> @test_vqdmull_high_n_s16_imm(<8 x i16> %a) #0 {
    120 ; CHECK-LABEL: test_vqdmull_high_n_s16_imm:
    121 ; CHECK-NEXT: mvni [[REPLICATE:v[0-9]+]].8h, #17, lsl #8
    122 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    123 ; CHECK-NEXT: ret
    124 entry:
    125   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    126   %vqdmull15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 61183, i16 61183, i16 61183, i16 61183>)
    127   ret <4 x i32> %vqdmull15.i.i
    128 }
    129 
    130 define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) #0 {
    131 ; CHECK-LABEL: test_vqdmull_high_n_s32:
    132 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
    133 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    134 ; CHECK-NEXT: ret
    135 entry:
    136   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    137   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
    138   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
    139   %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    140   ret <2 x i64> %vqdmull9.i.i
    141 }
    142 
    143 define <2 x i64> @test_vqdmull_high_n_s32_imm(<4 x i32> %a) #0 {
    144 ; CHECK-LABEL: test_vqdmull_high_n_s32_imm:
    145 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
    146 ; CHECK-NEXT: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    147 ; CHECK-NEXT: ret
    148 entry:
    149   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    150   %vqdmull9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
    151   ret <2 x i64> %vqdmull9.i.i
    152 }
    153 
    154 define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
    155 ; CHECK-LABEL: test_vmlal_high_n_s16:
    156 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
    157 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    158 ; CHECK-NEXT: ret
    159 entry:
    160   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    161   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    162   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    163   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    164   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    165   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    166   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
    167   ret <4 x i32> %add.i.i
    168 }
    169 
    170 define <4 x i32> @test_vmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
    171 ; CHECK-LABEL: test_vmlal_high_n_s16_imm:
    172 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
    173 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    174 ; CHECK-NEXT: ret
    175 entry:
    176   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    177   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
    178   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
    179   ret <4 x i32> %add.i.i
    180 }
    181 
    182 define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
    183 ; CHECK-LABEL: test_vmlal_high_n_s32:
    184 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
    185 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    186 ; CHECK-NEXT: ret
    187 entry:
    188   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    189   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    190   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    191   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    192   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
    193   ret <2 x i64> %add.i.i
    194 }
    195 
    196 define <2 x i64> @test_vmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
    197 ; CHECK-LABEL: test_vmlal_high_n_s32_imm:
    198 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
    199 ; CHECK-NEXT: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    200 ; CHECK-NEXT: ret
    201 entry:
    202   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    203   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
    204   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
    205   ret <2 x i64> %add.i.i
    206 }
    207 
    208 define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
    209 ; CHECK-LABEL: test_vmlal_high_n_u16:
    210 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
    211 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    212 ; CHECK-NEXT: ret
    213 entry:
    214   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    215   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    216   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    217   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    218   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    219   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    220   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
    221   ret <4 x i32> %add.i.i
    222 }
    223 
    224 define <4 x i32> @test_vmlal_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
    225 ; CHECK-LABEL: test_vmlal_high_n_u16_imm:
    226 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
    227 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    228 ; CHECK-NEXT: ret
    229 entry:
    230   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    231   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
    232   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
    233   ret <4 x i32> %add.i.i
    234 }
    235 
    236 define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
    237 ; CHECK-LABEL: test_vmlal_high_n_u32:
    238 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
    239 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    240 ; CHECK-NEXT: ret
    241 entry:
    242   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    243   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    244   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    245   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    246   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
    247   ret <2 x i64> %add.i.i
    248 }
    249 
    250 define <2 x i64> @test_vmlal_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
    251 ; CHECK-LABEL: test_vmlal_high_n_u32_imm:
    252 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
    253 ; CHECK-NEXT: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    254 ; CHECK-NEXT: ret
    255 entry:
    256   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    257   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
    258   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
    259   ret <2 x i64> %add.i.i
    260 }
    261 
    262 define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
    263 ; CHECK-LABEL: test_vqdmlal_high_n_s16:
    264 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
    265 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    266 ; CHECK-NEXT: ret
    267 entry:
    268   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    269   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    270   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    271   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    272   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    273   %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    274   %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
    275   ret <4 x i32> %vqdmlal17.i.i
    276 }
    277 
    278 define <4 x i32> @test_vqdmlal_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
    279 ; CHECK-LABEL: test_vqdmlal_high_n_s16_imm:
    280 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
    281 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    282 ; CHECK-NEXT: ret
    283 entry:
    284   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    285   %vqdmlal15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
    286   %vqdmlal17.i.i = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
    287   ret <4 x i32> %vqdmlal17.i.i
    288 }
    289 
    290 define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
    291 ; CHECK-LABEL: test_vqdmlal_high_n_s32:
    292 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
    293 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    294 ; CHECK-NEXT: ret
    295 entry:
    296   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    297   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    298   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    299   %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    300   %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
    301   ret <2 x i64> %vqdmlal11.i.i
    302 }
    303 
    304 define <2 x i64> @test_vqdmlal_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
    305 ; CHECK-LABEL: test_vqdmlal_high_n_s32_imm:
    306 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
    307 ; CHECK-NEXT: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    308 ; CHECK-NEXT: ret
    309 entry:
    310   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    311   %vqdmlal9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
    312   %vqdmlal11.i.i = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
    313   ret <2 x i64> %vqdmlal11.i.i
    314 }
    315 
    316 define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
    317 ; CHECK-LABEL: test_vmlsl_high_n_s16:
    318 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
    319 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    320 ; CHECK-NEXT: ret
    321 entry:
    322   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    323   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    324   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    325   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    326   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    327   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    328   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
    329   ret <4 x i32> %sub.i.i
    330 }
    331 
    332 define <4 x i32> @test_vmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
    333 ; CHECK-LABEL: test_vmlsl_high_n_s16_imm:
    334 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
    335 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    336 ; CHECK-NEXT: ret
    337 entry:
    338   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    339   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
    340   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
    341   ret <4 x i32> %sub.i.i
    342 }
    343 
    344 define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
    345 ; CHECK-LABEL: test_vmlsl_high_n_s32:
    346 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
    347 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    348 ; CHECK-NEXT: ret
    349 entry:
    350   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    351   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    352   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    353   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    354   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
    355   ret <2 x i64> %sub.i.i
    356 }
    357 
    358 define <2 x i64> @test_vmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
    359 ; CHECK-LABEL: test_vmlsl_high_n_s32_imm:
    360 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
    361 ; CHECK-NEXT: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    362 ; CHECK-NEXT: ret
    363 entry:
    364   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    365   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
    366   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
    367   ret <2 x i64> %sub.i.i
    368 }
    369 
    370 define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
    371 ; CHECK-LABEL: test_vmlsl_high_n_u16:
    372 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
    373 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    374 ; CHECK-NEXT: ret
    375 entry:
    376   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    377   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    378   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    379   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    380   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    381   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    382   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
    383   ret <4 x i32> %sub.i.i
    384 }
    385 
    386 define <4 x i32> @test_vmlsl_high_n_u16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
    387 ; CHECK-LABEL: test_vmlsl_high_n_u16_imm:
    388 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
    389 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    390 ; CHECK-NEXT: ret
    391 entry:
    392   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    393   %vmull2.i.i.i = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
    394   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
    395   ret <4 x i32> %sub.i.i
    396 }
    397 
    398 define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
    399 ; CHECK-LABEL: test_vmlsl_high_n_u32:
    400 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
    401 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    402 ; CHECK-NEXT: ret
    403 entry:
    404   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    405   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    406   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    407   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    408   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
    409   ret <2 x i64> %sub.i.i
    410 }
    411 
    412 define <2 x i64> @test_vmlsl_high_n_u32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
    413 ; CHECK-LABEL: test_vmlsl_high_n_u32_imm:
    414 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
    415 ; CHECK-NEXT: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    416 ; CHECK-NEXT: ret
    417 entry:
    418   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    419   %vmull2.i.i.i = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
    420   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
    421   ret <2 x i64> %sub.i.i
    422 }
    423 
    424 define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) #0 {
    425 ; CHECK-LABEL: test_vqdmlsl_high_n_s16:
    426 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].8h, w0
    427 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    428 ; CHECK-NEXT: ret
    429 entry:
    430   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    431   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    432   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    433   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    434   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    435   %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    436   %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
    437   ret <4 x i32> %vqdmlsl17.i.i
    438 }
    439 
    440 define <4 x i32> @test_vqdmlsl_high_n_s16_imm(<4 x i32> %a, <8 x i16> %b) #0 {
    441 ; CHECK-LABEL: test_vqdmlsl_high_n_s16_imm:
    442 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].8h, #29
    443 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    444 ; CHECK-NEXT: ret
    445 entry:
    446   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    447   %vqdmlsl15.i.i = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> <i16 29, i16 29, i16 29, i16 29>)
    448   %vqdmlsl17.i.i = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
    449   ret <4 x i32> %vqdmlsl17.i.i
    450 }
    451 
    452 define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) #0 {
    453 ; CHECK-LABEL: test_vqdmlsl_high_n_s32:
    454 ; CHECK-NEXT: dup [[REPLICATE:v[0-9]+]].4s, w0
    455 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    456 ; CHECK-NEXT: ret
    457 entry:
    458   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    459   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    460   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    461   %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    462   %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
    463   ret <2 x i64> %vqdmlsl11.i.i
    464 }
    465 
    466 define <2 x i64> @test_vqdmlsl_high_n_s32_imm(<2 x i64> %a, <4 x i32> %b) #0 {
    467 ; CHECK-LABEL: test_vqdmlsl_high_n_s32_imm:
    468 ; CHECK-NEXT: movi [[REPLICATE:v[0-9]+]].4s, #29
    469 ; CHECK-NEXT: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    470 ; CHECK-NEXT: ret
    471 entry:
    472   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    473   %vqdmlsl9.i.i = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> <i32 29, i32 29>)
    474   %vqdmlsl11.i.i = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
    475   ret <2 x i64> %vqdmlsl11.i.i
    476 }
    477 
    478 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 {
    479 ; CHECK-LABEL: test_vmul_n_f32:
    480 ; CHECK-NEXT: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
    481 ; CHECK-NEXT: ret
    482 entry:
    483   %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
    484   %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
    485   %mul.i = fmul <2 x float> %vecinit1.i, %a
    486   ret <2 x float> %mul.i
    487 }
    488 
    489 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 {
    490 ; CHECK-LABEL: test_vmulq_n_f32:
    491 ; CHECK-NEXT: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
    492 ; CHECK-NEXT: ret
    493 entry:
    494   %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
    495   %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
    496   %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
    497   %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
    498   %mul.i = fmul <4 x float> %vecinit3.i, %a
    499   ret <4 x float> %mul.i
    500 }
    501 
    502 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 {
    503 ; CHECK-LABEL: test_vmulq_n_f64:
    504 ; CHECK-NEXT: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    505 ; CHECK-NEXT: ret
    506 entry:
    507   %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
    508   %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
    509   %mul.i = fmul <2 x double> %vecinit1.i, %a
    510   ret <2 x double> %mul.i
    511 }
    512 
    513 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
    514 ; CHECK-LABEL: test_vfma_n_f32:
    515 ; CHECK-NEXT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
    516 ; CHECK-NEXT: ret
    517 entry:
    518   %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
    519   %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
    520   %0 = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
    521   ret <2 x float> %0
    522 }
    523 
    524 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
    525 ; CHECK-LABEL: test_vfmaq_n_f32:
    526 ; CHECK-NEXT: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
    527 ; CHECK-NEXT: ret
    528 entry:
    529   %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
    530   %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
    531   %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
    532   %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
    533   %0 = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
    534   ret <4 x float> %0
    535 }
    536 
    537 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
    538 ; CHECK-LABEL: test_vfms_n_f32:
    539 ; CHECK-NEXT: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
    540 ; CHECK-NEXT: ret
    541 entry:
    542   %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
    543   %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
    544   %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
    545   %1 = call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
    546   ret <2 x float> %1
    547 }
    548 
    549 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
    550 ; CHECK-LABEL: test_vfmsq_n_f32:
    551 ; CHECK-NEXT: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
    552 ; CHECK-NEXT: ret
    553 entry:
    554   %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
    555   %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
    556   %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
    557   %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
    558   %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    559   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
    560   ret <4 x float> %1
    561 }
    562 
    563 attributes #0 = { nounwind }
    564 
    565 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
    566 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
    567 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
    568 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
    569 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
    570 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
    571 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
    572 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
    573 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
    574 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
    575 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
    576 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
    577