Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
      2 
      3 
      4 define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      5 ;CHECK-LABEL: sabdl8h:
      6 ;CHECK: sabdl.8h
      7         %tmp1 = load <8 x i8>* %A
      8         %tmp2 = load <8 x i8>* %B
      9         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     10         %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
     11         ret <8 x i16> %tmp4
     12 }
     13 
     14 define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     15 ;CHECK-LABEL: sabdl4s:
     16 ;CHECK: sabdl.4s
     17         %tmp1 = load <4 x i16>* %A
     18         %tmp2 = load <4 x i16>* %B
     19         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     20         %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
     21         ret <4 x i32> %tmp4
     22 }
     23 
     24 define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     25 ;CHECK-LABEL: sabdl2d:
     26 ;CHECK: sabdl.2d
     27         %tmp1 = load <2 x i32>* %A
     28         %tmp2 = load <2 x i32>* %B
     29         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     30         %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
     31         ret <2 x i64> %tmp4
     32 }
     33 
     34 define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     35 ;CHECK-LABEL: sabdl2_8h:
     36 ;CHECK: sabdl2.8h
     37         %load1 = load <16 x i8>* %A
     38         %load2 = load <16 x i8>* %B
     39         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     40         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     41         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     42         %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
     43         ret <8 x i16> %tmp4
     44 }
     45 
     46 define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     47 ;CHECK-LABEL: sabdl2_4s:
     48 ;CHECK: sabdl2.4s
     49         %load1 = load <8 x i16>* %A
     50         %load2 = load <8 x i16>* %B
     51         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     52         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     53         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     54         %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
     55         ret <4 x i32> %tmp4
     56 }
     57 
     58 define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     59 ;CHECK-LABEL: sabdl2_2d:
     60 ;CHECK: sabdl2.2d
     61         %load1 = load <4 x i32>* %A
     62         %load2 = load <4 x i32>* %B
     63         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     64         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
     65         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     66         %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
     67         ret <2 x i64> %tmp4
     68 }
     69 
     70 define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     71 ;CHECK-LABEL: uabdl8h:
     72 ;CHECK: uabdl.8h
     73   %tmp1 = load <8 x i8>* %A
     74   %tmp2 = load <8 x i8>* %B
     75   %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     76   %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
     77   ret <8 x i16> %tmp4
     78 }
     79 
     80 define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     81 ;CHECK-LABEL: uabdl4s:
     82 ;CHECK: uabdl.4s
     83   %tmp1 = load <4 x i16>* %A
     84   %tmp2 = load <4 x i16>* %B
     85   %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     86   %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
     87   ret <4 x i32> %tmp4
     88 }
     89 
     90 define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     91 ;CHECK-LABEL: uabdl2d:
     92 ;CHECK: uabdl.2d
     93   %tmp1 = load <2 x i32>* %A
     94   %tmp2 = load <2 x i32>* %B
     95   %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     96   %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
     97   ret <2 x i64> %tmp4
     98 }
     99 
    100 define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    101 ;CHECK-LABEL: uabdl2_8h:
    102 ;CHECK: uabdl2.8h
    103   %load1 = load <16 x i8>* %A
    104   %load2 = load <16 x i8>* %B
    105   %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    106   %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    107 
    108   %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    109   %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
    110   ret <8 x i16> %tmp4
    111 }
    112 
    113 define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    114 ;CHECK-LABEL: uabdl2_4s:
    115 ;CHECK: uabdl2.4s
    116   %load1 = load <8 x i16>* %A
    117   %load2 = load <8 x i16>* %B
    118   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    119   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    120   %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    121   %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
    122   ret <4 x i32> %tmp4
    123 }
    124 
    125 define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    126 ;CHECK-LABEL: uabdl2_2d:
    127 ;CHECK: uabdl2.2d
    128   %load1 = load <4 x i32>* %A
    129   %load2 = load <4 x i32>* %B
    130   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    131   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    132   %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    133   %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
    134   ret <2 x i64> %tmp4
    135 }
    136 
    137 define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
    138 ;CHECK-LABEL: fabd_2s:
    139 ;CHECK: fabd.2s
    140         %tmp1 = load <2 x float>* %A
    141         %tmp2 = load <2 x float>* %B
    142         %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    143         ret <2 x float> %tmp3
    144 }
    145 
    146 define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
    147 ;CHECK-LABEL: fabd_4s:
    148 ;CHECK: fabd.4s
    149         %tmp1 = load <4 x float>* %A
    150         %tmp2 = load <4 x float>* %B
    151         %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    152         ret <4 x float> %tmp3
    153 }
    154 
    155 define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
    156 ;CHECK-LABEL: fabd_2d:
    157 ;CHECK: fabd.2d
    158         %tmp1 = load <2 x double>* %A
    159         %tmp2 = load <2 x double>* %B
    160         %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
    161         ret <2 x double> %tmp3
    162 }
    163 
    164 declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone
    165 declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone
    166 declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone
    167 
    168 define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    169 ;CHECK-LABEL: sabd_8b:
    170 ;CHECK: sabd.8b
    171         %tmp1 = load <8 x i8>* %A
    172         %tmp2 = load <8 x i8>* %B
    173         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    174         ret <8 x i8> %tmp3
    175 }
    176 
    177 define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    178 ;CHECK-LABEL: sabd_16b:
    179 ;CHECK: sabd.16b
    180         %tmp1 = load <16 x i8>* %A
    181         %tmp2 = load <16 x i8>* %B
    182         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    183         ret <16 x i8> %tmp3
    184 }
    185 
    186 define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    187 ;CHECK-LABEL: sabd_4h:
    188 ;CHECK: sabd.4h
    189         %tmp1 = load <4 x i16>* %A
    190         %tmp2 = load <4 x i16>* %B
    191         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    192         ret <4 x i16> %tmp3
    193 }
    194 
    195 define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    196 ;CHECK-LABEL: sabd_8h:
    197 ;CHECK: sabd.8h
    198         %tmp1 = load <8 x i16>* %A
    199         %tmp2 = load <8 x i16>* %B
    200         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    201         ret <8 x i16> %tmp3
    202 }
    203 
    204 define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    205 ;CHECK-LABEL: sabd_2s:
    206 ;CHECK: sabd.2s
    207         %tmp1 = load <2 x i32>* %A
    208         %tmp2 = load <2 x i32>* %B
    209         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    210         ret <2 x i32> %tmp3
    211 }
    212 
    213 define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    214 ;CHECK-LABEL: sabd_4s:
    215 ;CHECK: sabd.4s
    216         %tmp1 = load <4 x i32>* %A
    217         %tmp2 = load <4 x i32>* %B
    218         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    219         ret <4 x i32> %tmp3
    220 }
    221 
    222 declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    223 declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    224 declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    225 declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    226 declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    227 declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    228 
    229 define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    230 ;CHECK-LABEL: uabd_8b:
    231 ;CHECK: uabd.8b
    232         %tmp1 = load <8 x i8>* %A
    233         %tmp2 = load <8 x i8>* %B
    234         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    235         ret <8 x i8> %tmp3
    236 }
    237 
    238 define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    239 ;CHECK-LABEL: uabd_16b:
    240 ;CHECK: uabd.16b
    241         %tmp1 = load <16 x i8>* %A
    242         %tmp2 = load <16 x i8>* %B
    243         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    244         ret <16 x i8> %tmp3
    245 }
    246 
    247 define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    248 ;CHECK-LABEL: uabd_4h:
    249 ;CHECK: uabd.4h
    250         %tmp1 = load <4 x i16>* %A
    251         %tmp2 = load <4 x i16>* %B
    252         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    253         ret <4 x i16> %tmp3
    254 }
    255 
    256 define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    257 ;CHECK-LABEL: uabd_8h:
    258 ;CHECK: uabd.8h
    259         %tmp1 = load <8 x i16>* %A
    260         %tmp2 = load <8 x i16>* %B
    261         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    262         ret <8 x i16> %tmp3
    263 }
    264 
    265 define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    266 ;CHECK-LABEL: uabd_2s:
    267 ;CHECK: uabd.2s
    268         %tmp1 = load <2 x i32>* %A
    269         %tmp2 = load <2 x i32>* %B
    270         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    271         ret <2 x i32> %tmp3
    272 }
    273 
    274 define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    275 ;CHECK-LABEL: uabd_4s:
    276 ;CHECK: uabd.4s
    277         %tmp1 = load <4 x i32>* %A
    278         %tmp2 = load <4 x i32>* %B
    279         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    280         ret <4 x i32> %tmp3
    281 }
    282 
    283 declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    284 declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    285 declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    286 declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    287 declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    288 declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    289 
    290 define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
    291 ;CHECK-LABEL: sqabs_8b:
    292 ;CHECK: sqabs.8b
    293         %tmp1 = load <8 x i8>* %A
    294         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1)
    295         ret <8 x i8> %tmp3
    296 }
    297 
    298 define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
    299 ;CHECK-LABEL: sqabs_16b:
    300 ;CHECK: sqabs.16b
    301         %tmp1 = load <16 x i8>* %A
    302         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1)
    303         ret <16 x i8> %tmp3
    304 }
    305 
    306 define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
    307 ;CHECK-LABEL: sqabs_4h:
    308 ;CHECK: sqabs.4h
    309         %tmp1 = load <4 x i16>* %A
    310         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1)
    311         ret <4 x i16> %tmp3
    312 }
    313 
    314 define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
    315 ;CHECK-LABEL: sqabs_8h:
    316 ;CHECK: sqabs.8h
    317         %tmp1 = load <8 x i16>* %A
    318         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1)
    319         ret <8 x i16> %tmp3
    320 }
    321 
    322 define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
    323 ;CHECK-LABEL: sqabs_2s:
    324 ;CHECK: sqabs.2s
    325         %tmp1 = load <2 x i32>* %A
    326         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1)
    327         ret <2 x i32> %tmp3
    328 }
    329 
    330 define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
    331 ;CHECK-LABEL: sqabs_4s:
    332 ;CHECK: sqabs.4s
    333         %tmp1 = load <4 x i32>* %A
    334         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1)
    335         ret <4 x i32> %tmp3
    336 }
    337 
    338 declare <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone
    339 declare <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone
    340 declare <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone
    341 declare <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone
    342 declare <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone
    343 declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone
    344 
    345 define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
    346 ;CHECK-LABEL: sqneg_8b:
    347 ;CHECK: sqneg.8b
    348         %tmp1 = load <8 x i8>* %A
    349         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1)
    350         ret <8 x i8> %tmp3
    351 }
    352 
    353 define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
    354 ;CHECK-LABEL: sqneg_16b:
    355 ;CHECK: sqneg.16b
    356         %tmp1 = load <16 x i8>* %A
    357         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1)
    358         ret <16 x i8> %tmp3
    359 }
    360 
    361 define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
    362 ;CHECK-LABEL: sqneg_4h:
    363 ;CHECK: sqneg.4h
    364         %tmp1 = load <4 x i16>* %A
    365         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1)
    366         ret <4 x i16> %tmp3
    367 }
    368 
    369 define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
    370 ;CHECK-LABEL: sqneg_8h:
    371 ;CHECK: sqneg.8h
    372         %tmp1 = load <8 x i16>* %A
    373         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1)
    374         ret <8 x i16> %tmp3
    375 }
    376 
    377 define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
    378 ;CHECK-LABEL: sqneg_2s:
    379 ;CHECK: sqneg.2s
    380         %tmp1 = load <2 x i32>* %A
    381         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1)
    382         ret <2 x i32> %tmp3
    383 }
    384 
    385 define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
    386 ;CHECK-LABEL: sqneg_4s:
    387 ;CHECK: sqneg.4s
    388         %tmp1 = load <4 x i32>* %A
    389         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1)
    390         ret <4 x i32> %tmp3
    391 }
    392 
    393 declare <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone
    394 declare <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone
    395 declare <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone
    396 declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone
    397 declare <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone
    398 declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone
    399 
    400 define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
    401 ;CHECK-LABEL: abs_8b:
    402 ;CHECK: abs.8b
    403         %tmp1 = load <8 x i8>* %A
    404         %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1)
    405         ret <8 x i8> %tmp3
    406 }
    407 
    408 define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
    409 ;CHECK-LABEL: abs_16b:
    410 ;CHECK: abs.16b
    411         %tmp1 = load <16 x i8>* %A
    412         %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1)
    413         ret <16 x i8> %tmp3
    414 }
    415 
    416 define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
    417 ;CHECK-LABEL: abs_4h:
    418 ;CHECK: abs.4h
    419         %tmp1 = load <4 x i16>* %A
    420         %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1)
    421         ret <4 x i16> %tmp3
    422 }
    423 
    424 define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
    425 ;CHECK-LABEL: abs_8h:
    426 ;CHECK: abs.8h
    427         %tmp1 = load <8 x i16>* %A
    428         %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1)
    429         ret <8 x i16> %tmp3
    430 }
    431 
    432 define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
    433 ;CHECK-LABEL: abs_2s:
    434 ;CHECK: abs.2s
    435         %tmp1 = load <2 x i32>* %A
    436         %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1)
    437         ret <2 x i32> %tmp3
    438 }
    439 
    440 define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
    441 ;CHECK-LABEL: abs_4s:
    442 ;CHECK: abs.4s
    443         %tmp1 = load <4 x i32>* %A
    444         %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1)
    445         ret <4 x i32> %tmp3
    446 }
    447 
    448 define <1 x i64> @abs_1d(<1 x i64> %A) nounwind {
    449 ; CHECK-LABEL: abs_1d:
    450 ; CHECK: abs d0, d0
    451   %abs = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %A)
    452   ret <1 x i64> %abs
    453 }
    454 
    455 define i64 @abs_1d_honestly(i64 %A) nounwind {
    456 ; CHECK-LABEL: abs_1d_honestly:
    457 ; CHECK: abs d0, d0
    458   %abs = call i64 @llvm.aarch64.neon.abs.i64(i64 %A)
    459   ret i64 %abs
    460 }
    461 
    462 declare <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8>) nounwind readnone
    463 declare <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8>) nounwind readnone
    464 declare <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16>) nounwind readnone
    465 declare <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16>) nounwind readnone
    466 declare <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32>) nounwind readnone
    467 declare <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32>) nounwind readnone
    468 declare <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64>) nounwind readnone
    469 declare i64 @llvm.aarch64.neon.abs.i64(i64) nounwind readnone
    470 
    471 define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
    472 ;CHECK-LABEL: sabal8h:
    473 ;CHECK: sabal.8h
    474         %tmp1 = load <8 x i8>* %A
    475         %tmp2 = load <8 x i8>* %B
    476         %tmp3 = load <8 x i16>* %C
    477         %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    478         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
    479         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
    480         ret <8 x i16> %tmp5
    481 }
    482 
    483 define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
    484 ;CHECK-LABEL: sabal4s:
    485 ;CHECK: sabal.4s
    486         %tmp1 = load <4 x i16>* %A
    487         %tmp2 = load <4 x i16>* %B
    488         %tmp3 = load <4 x i32>* %C
    489         %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    490         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
    491         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
    492         ret <4 x i32> %tmp5
    493 }
    494 
    495 define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
    496 ;CHECK-LABEL: sabal2d:
    497 ;CHECK: sabal.2d
    498         %tmp1 = load <2 x i32>* %A
    499         %tmp2 = load <2 x i32>* %B
    500         %tmp3 = load <2 x i64>* %C
    501         %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    502         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
    503         %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
    504         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
    505         ret <2 x i64> %tmp5
    506 }
    507 
    508 define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
    509 ;CHECK-LABEL: sabal2_8h:
    510 ;CHECK: sabal2.8h
    511         %load1 = load <16 x i8>* %A
    512         %load2 = load <16 x i8>* %B
    513         %tmp3 = load <8 x i16>* %C
    514         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    515         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    516         %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    517         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
    518         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
    519         ret <8 x i16> %tmp5
    520 }
    521 
    522 define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
    523 ;CHECK-LABEL: sabal2_4s:
    524 ;CHECK: sabal2.4s
    525         %load1 = load <8 x i16>* %A
    526         %load2 = load <8 x i16>* %B
    527         %tmp3 = load <4 x i32>* %C
    528         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    529         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    530         %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    531         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
    532         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
    533         ret <4 x i32> %tmp5
    534 }
    535 
    536 define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
    537 ;CHECK-LABEL: sabal2_2d:
    538 ;CHECK: sabal2.2d
    539         %load1 = load <4 x i32>* %A
    540         %load2 = load <4 x i32>* %B
    541         %tmp3 = load <2 x i64>* %C
    542         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    543         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    544         %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    545         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
    546         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
    547         ret <2 x i64> %tmp5
    548 }
    549 
    550 define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
    551 ;CHECK-LABEL: uabal8h:
    552 ;CHECK: uabal.8h
    553         %tmp1 = load <8 x i8>* %A
    554         %tmp2 = load <8 x i8>* %B
    555         %tmp3 = load <8 x i16>* %C
    556         %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    557         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
    558         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
    559         ret <8 x i16> %tmp5
    560 }
    561 
    562 define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
    563 ;CHECK-LABEL: uabal4s:
    564 ;CHECK: uabal.4s
    565         %tmp1 = load <4 x i16>* %A
    566         %tmp2 = load <4 x i16>* %B
    567         %tmp3 = load <4 x i32>* %C
    568         %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    569         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
    570         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
    571         ret <4 x i32> %tmp5
    572 }
    573 
    574 define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
    575 ;CHECK-LABEL: uabal2d:
    576 ;CHECK: uabal.2d
    577         %tmp1 = load <2 x i32>* %A
    578         %tmp2 = load <2 x i32>* %B
    579         %tmp3 = load <2 x i64>* %C
    580         %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    581         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
    582         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
    583         ret <2 x i64> %tmp5
    584 }
    585 
    586 define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
    587 ;CHECK-LABEL: uabal2_8h:
    588 ;CHECK: uabal2.8h
    589         %load1 = load <16 x i8>* %A
    590         %load2 = load <16 x i8>* %B
    591         %tmp3 = load <8 x i16>* %C
    592         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    593         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    594         %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    595         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
    596         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
    597         ret <8 x i16> %tmp5
    598 }
    599 
    600 define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
    601 ;CHECK-LABEL: uabal2_4s:
    602 ;CHECK: uabal2.4s
    603         %load1 = load <8 x i16>* %A
    604         %load2 = load <8 x i16>* %B
    605         %tmp3 = load <4 x i32>* %C
    606         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    607         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    608         %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    609         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
    610         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
    611         ret <4 x i32> %tmp5
    612 }
    613 
    614 define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
    615 ;CHECK-LABEL: uabal2_2d:
    616 ;CHECK: uabal2.2d
    617         %load1 = load <4 x i32>* %A
    618         %load2 = load <4 x i32>* %B
    619         %tmp3 = load <2 x i64>* %C
    620         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    621         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    622         %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    623         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
    624         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
    625         ret <2 x i64> %tmp5
    626 }
    627 
    628 define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
    629 ;CHECK-LABEL: saba_8b:
    630 ;CHECK: saba.8b
    631         %tmp1 = load <8 x i8>* %A
    632         %tmp2 = load <8 x i8>* %B
    633         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    634         %tmp4 = load <8 x i8>* %C
    635         %tmp5 = add <8 x i8> %tmp3, %tmp4
    636         ret <8 x i8> %tmp5
    637 }
    638 
    639 define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
    640 ;CHECK-LABEL: saba_16b:
    641 ;CHECK: saba.16b
    642         %tmp1 = load <16 x i8>* %A
    643         %tmp2 = load <16 x i8>* %B
    644         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    645         %tmp4 = load <16 x i8>* %C
    646         %tmp5 = add <16 x i8> %tmp3, %tmp4
    647         ret <16 x i8> %tmp5
    648 }
    649 
    650 define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
    651 ;CHECK-LABEL: saba_4h:
    652 ;CHECK: saba.4h
    653         %tmp1 = load <4 x i16>* %A
    654         %tmp2 = load <4 x i16>* %B
    655         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    656         %tmp4 = load <4 x i16>* %C
    657         %tmp5 = add <4 x i16> %tmp3, %tmp4
    658         ret <4 x i16> %tmp5
    659 }
    660 
    661 define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
    662 ;CHECK-LABEL: saba_8h:
    663 ;CHECK: saba.8h
    664         %tmp1 = load <8 x i16>* %A
    665         %tmp2 = load <8 x i16>* %B
    666         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    667         %tmp4 = load <8 x i16>* %C
    668         %tmp5 = add <8 x i16> %tmp3, %tmp4
    669         ret <8 x i16> %tmp5
    670 }
    671 
    672 define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
    673 ;CHECK-LABEL: saba_2s:
    674 ;CHECK: saba.2s
    675         %tmp1 = load <2 x i32>* %A
    676         %tmp2 = load <2 x i32>* %B
    677         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    678         %tmp4 = load <2 x i32>* %C
    679         %tmp5 = add <2 x i32> %tmp3, %tmp4
    680         ret <2 x i32> %tmp5
    681 }
    682 
    683 define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
    684 ;CHECK-LABEL: saba_4s:
    685 ;CHECK: saba.4s
    686         %tmp1 = load <4 x i32>* %A
    687         %tmp2 = load <4 x i32>* %B
    688         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    689         %tmp4 = load <4 x i32>* %C
    690         %tmp5 = add <4 x i32> %tmp3, %tmp4
    691         ret <4 x i32> %tmp5
    692 }
    693 
    694 define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
    695 ;CHECK-LABEL: uaba_8b:
    696 ;CHECK: uaba.8b
    697         %tmp1 = load <8 x i8>* %A
    698         %tmp2 = load <8 x i8>* %B
    699         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    700         %tmp4 = load <8 x i8>* %C
    701         %tmp5 = add <8 x i8> %tmp3, %tmp4
    702         ret <8 x i8> %tmp5
    703 }
    704 
    705 define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
    706 ;CHECK-LABEL: uaba_16b:
    707 ;CHECK: uaba.16b
    708         %tmp1 = load <16 x i8>* %A
    709         %tmp2 = load <16 x i8>* %B
    710         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    711         %tmp4 = load <16 x i8>* %C
    712         %tmp5 = add <16 x i8> %tmp3, %tmp4
    713         ret <16 x i8> %tmp5
    714 }
    715 
    716 define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
    717 ;CHECK-LABEL: uaba_4h:
    718 ;CHECK: uaba.4h
    719         %tmp1 = load <4 x i16>* %A
    720         %tmp2 = load <4 x i16>* %B
    721         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    722         %tmp4 = load <4 x i16>* %C
    723         %tmp5 = add <4 x i16> %tmp3, %tmp4
    724         ret <4 x i16> %tmp5
    725 }
    726 
    727 define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
    728 ;CHECK-LABEL: uaba_8h:
    729 ;CHECK: uaba.8h
    730         %tmp1 = load <8 x i16>* %A
    731         %tmp2 = load <8 x i16>* %B
    732         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    733         %tmp4 = load <8 x i16>* %C
    734         %tmp5 = add <8 x i16> %tmp3, %tmp4
    735         ret <8 x i16> %tmp5
    736 }
    737 
    738 define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
    739 ;CHECK-LABEL: uaba_2s:
    740 ;CHECK: uaba.2s
    741         %tmp1 = load <2 x i32>* %A
    742         %tmp2 = load <2 x i32>* %B
    743         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    744         %tmp4 = load <2 x i32>* %C
    745         %tmp5 = add <2 x i32> %tmp3, %tmp4
    746         ret <2 x i32> %tmp5
    747 }
    748 
    749 define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
    750 ;CHECK-LABEL: uaba_4s:
    751 ;CHECK: uaba.4s
    752         %tmp1 = load <4 x i32>* %A
    753         %tmp2 = load <4 x i32>* %B
    754         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    755         %tmp4 = load <4 x i32>* %C
    756         %tmp5 = add <4 x i32> %tmp3, %tmp4
    757         ret <4 x i32> %tmp5
    758 }
    759 
    760 ; Scalar FABD
    761 define float @fabds(float %a, float %b) nounwind {
    762 ; CHECK-LABEL: fabds:
    763 ; CHECK: fabd s0, s0, s1
    764   %vabd.i = tail call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) nounwind
    765   ret float %vabd.i
    766 }
    767 
    768 define double @fabdd(double %a, double %b) nounwind {
    769 ; CHECK-LABEL: fabdd:
    770 ; CHECK: fabd d0, d0, d1
    771   %vabd.i = tail call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) nounwind
    772   ret double %vabd.i
    773 }
    774 
    775 declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone
    776 declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone
    777 
    778 define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
    779 ; CHECK-LABEL: uabdl_from_extract_dup:
    780 ; CHECK-NOT: ext.16b
    781 ; CHECK: uabdl2.2d
    782   %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
    783   %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
    784 
    785   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    786 
    787   %res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
    788   %res1 = zext <2 x i32> %res to <2 x i64>
    789   ret <2 x i64> %res1
    790 }
    791 
    792 define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
    793 ; CHECK-LABEL: sabdl_from_extract_dup:
    794 ; CHECK-NOT: ext.16b
    795 ; CHECK: sabdl2.2d
    796   %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
    797   %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
    798 
    799   %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
    800 
    801   %res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
    802   %res1 = zext <2 x i32> %res to <2 x i64>
    803   ret <2 x i64> %res1
    804 }
    805