Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
      2 
      3 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
      4 
      5 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
      6 
      7 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
      8 
      9 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
     10 
     11 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
     12 
     13 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
     14 
     15 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
     16 
     17 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
     18 
     19 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
     20 
     21 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
     22 
     23 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
     24 
     25 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
     26 
     27 define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
     28 ; CHECK-LABEL: test_vmull_high_n_s16:
     29 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
     30 ; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
     31 entry:
     32   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     33   %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
     34   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
     35   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
     36   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
     37   %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
     38   ret <4 x i32> %vmull15.i.i
     39 }
     40 
     41 define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
     42 ; CHECK-LABEL: test_vmull_high_n_s32:
     43 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
     44 ; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
     45 entry:
     46   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     47   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
     48   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
     49   %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
     50   ret <2 x i64> %vmull9.i.i
     51 }
     52 
     53 define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
     54 ; CHECK-LABEL: test_vmull_high_n_u16:
     55 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
     56 ; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
     57 entry:
     58   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     59   %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
     60   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
     61   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
     62   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
     63   %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
     64   ret <4 x i32> %vmull15.i.i
     65 }
     66 
     67 define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
     68 ; CHECK-LABEL: test_vmull_high_n_u32:
     69 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
     70 ; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
     71 entry:
     72   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     73   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
     74   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
     75   %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
     76   ret <2 x i64> %vmull9.i.i
     77 }
     78 
     79 define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
     80 ; CHECK-LABEL: test_vqdmull_high_n_s16:
     81 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
     82 ; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
     83 entry:
     84   %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     85   %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
     86   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
     87   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
     88   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
     89   %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
     90   ret <4 x i32> %vqdmull15.i.i
     91 }
     92 
     93 define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
     94 ; CHECK-LABEL: test_vqdmull_high_n_s32:
     95 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
     96 ; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
     97 entry:
     98   %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     99   %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
    100   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
    101   %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    102   ret <2 x i64> %vqdmull9.i.i
    103 }
    104 
    105 define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
    106 ; CHECK-LABEL: test_vmlal_high_n_s16:
    107 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
    108 ; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    109 entry:
    110   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    111   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    112   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    113   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    114   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    115   %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    116   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
    117   ret <4 x i32> %add.i.i
    118 }
    119 
    120 define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
    121 ; CHECK-LABEL: test_vmlal_high_n_s32:
    122 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
    123 ; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    124 entry:
    125   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    126   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    127   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    128   %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    129   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
    130   ret <2 x i64> %add.i.i
    131 }
    132 
    133 define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
    134 ; CHECK-LABEL: test_vmlal_high_n_u16:
    135 ; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
    136 ; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
    137 entry:
    138   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    139   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    140   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    141   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    142   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    143   %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    144   %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
    145   ret <4 x i32> %add.i.i
    146 }
    147 
    148 define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
    149 ; CHECK-LABEL: test_vmlal_high_n_u32:
    150 ; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
    151 ; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
    152 entry:
    153   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    154   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    155   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    156   %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    157   %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
    158   ret <2 x i64> %add.i.i
    159 }
    160 
    161 define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
    162 ; CHECK-LABEL: test_vqdmlal_high_n_s16:
    163 ; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    164 entry:
    165   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    166   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    167   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    168   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    169   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    170   %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    171   %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
    172   ret <4 x i32> %vqdmlal17.i.i
    173 }
    174 
    175 define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
    176 ; CHECK-LABEL: test_vqdmlal_high_n_s32:
    177 ; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    178 entry:
    179   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    180   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    181   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    182   %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    183   %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
    184   ret <2 x i64> %vqdmlal11.i.i
    185 }
    186 
    187 define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
    188 ; CHECK-LABEL: test_vmlsl_high_n_s16:
    189 ; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    190 entry:
    191   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    192   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    193   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    194   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    195   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    196   %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    197   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
    198   ret <4 x i32> %sub.i.i
    199 }
    200 
    201 define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
    202 ; CHECK-LABEL: test_vmlsl_high_n_s32:
    203 ; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    204 entry:
    205   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    206   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    207   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    208   %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    209   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
    210   ret <2 x i64> %sub.i.i
    211 }
    212 
    213 define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
    214 ; CHECK-LABEL: test_vmlsl_high_n_u16:
    215 ; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    216 entry:
    217   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    218   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    219   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    220   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    221   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    222   %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    223   %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
    224   ret <4 x i32> %sub.i.i
    225 }
    226 
    227 define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
    228 ; CHECK-LABEL: test_vmlsl_high_n_u32:
    229 ; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    230 entry:
    231   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    232   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    233   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    234   %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    235   %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
    236   ret <2 x i64> %sub.i.i
    237 }
    238 
    239 define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
    240 ; CHECK-LABEL: test_vqdmlsl_high_n_s16:
    241 ; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
    242 entry:
    243   %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    244   %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
    245   %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
    246   %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
    247   %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
    248   %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
    249   %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
    250   ret <4 x i32> %vqdmlsl17.i.i
    251 }
    252 
    253 define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
    254 ; CHECK-LABEL: test_vqdmlsl_high_n_s32:
    255 ; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    256 entry:
    257   %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    258   %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
    259   %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
    260   %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
    261   %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
    262   ret <2 x i64> %vqdmlsl11.i.i
    263 }
    264 
    265 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
    266 ; CHECK-LABEL: test_vmul_n_f32:
    267 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
    268 entry:
    269   %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
    270   %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
    271   %mul.i = fmul <2 x float> %vecinit1.i, %a
    272   ret <2 x float> %mul.i
    273 }
    274 
    275 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
    276 ; CHECK-LABEL: test_vmulq_n_f32:
    277 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
    278 entry:
    279   %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
    280   %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
    281   %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
    282   %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
    283   %mul.i = fmul <4 x float> %vecinit3.i, %a
    284   ret <4 x float> %mul.i
    285 }
    286 
    287 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
    288 ; CHECK-LABEL: test_vmulq_n_f64:
    289 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    290 entry:
    291   %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
    292   %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
    293   %mul.i = fmul <2 x double> %vecinit1.i, %a
    294   ret <2 x double> %mul.i
    295 }
    296 
    297 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
    298 ; CHECK-LABEL: test_vfma_n_f32:
    299 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
    300 entry:
    301   %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
    302   %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
    303   %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
    304   ret <2 x float> %0
    305 }
    306 
    307 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
    308 ; CHECK-LABEL: test_vfmaq_n_f32:
    309 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
    310 entry:
    311   %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
    312   %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
    313   %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
    314   %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
    315   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
    316   ret <4 x float> %0
    317 }
    318 
    319 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
    320 ; CHECK-LABEL: test_vfms_n_f32:
    321 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
    322 entry:
    323   %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
    324   %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
    325   %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
    326   %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
    327   ret <2 x float> %1
    328 }
    329 
    330 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
    331 ; CHECK-LABEL: test_vfmsq_n_f32:
    332 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
    333 entry:
    334   %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
    335   %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
    336   %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
    337   %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
    338   %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    339   %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
    340   ret <4 x float> %1
    341 }
    342