Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
      2 
      3 define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK-LABEL: sqshl8b:
      5 ;CHECK: sqshl.8b
      6         %tmp1 = load <8 x i8>* %A
      7         %tmp2 = load <8 x i8>* %B
      8         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
      9         ret <8 x i8> %tmp3
     10 }
     11 
     12 define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK-LABEL: sqshl4h:
     14 ;CHECK: sqshl.4h
     15         %tmp1 = load <4 x i16>* %A
     16         %tmp2 = load <4 x i16>* %B
     17         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     18         ret <4 x i16> %tmp3
     19 }
     20 
     21 define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK-LABEL: sqshl2s:
     23 ;CHECK: sqshl.2s
     24         %tmp1 = load <2 x i32>* %A
     25         %tmp2 = load <2 x i32>* %B
     26         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     27         ret <2 x i32> %tmp3
     28 }
     29 
     30 define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     31 ;CHECK-LABEL: uqshl8b:
     32 ;CHECK: uqshl.8b
     33         %tmp1 = load <8 x i8>* %A
     34         %tmp2 = load <8 x i8>* %B
     35         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     36         ret <8 x i8> %tmp3
     37 }
     38 
     39 define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     40 ;CHECK-LABEL: uqshl4h:
     41 ;CHECK: uqshl.4h
     42         %tmp1 = load <4 x i16>* %A
     43         %tmp2 = load <4 x i16>* %B
     44         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     45         ret <4 x i16> %tmp3
     46 }
     47 
     48 define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     49 ;CHECK-LABEL: uqshl2s:
     50 ;CHECK: uqshl.2s
     51         %tmp1 = load <2 x i32>* %A
     52         %tmp2 = load <2 x i32>* %B
     53         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     54         ret <2 x i32> %tmp3
     55 }
     56 
     57 define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     58 ;CHECK-LABEL: sqshl16b:
     59 ;CHECK: sqshl.16b
     60         %tmp1 = load <16 x i8>* %A
     61         %tmp2 = load <16 x i8>* %B
     62         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     63         ret <16 x i8> %tmp3
     64 }
     65 
     66 define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     67 ;CHECK-LABEL: sqshl8h:
     68 ;CHECK: sqshl.8h
     69         %tmp1 = load <8 x i16>* %A
     70         %tmp2 = load <8 x i16>* %B
     71         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     72         ret <8 x i16> %tmp3
     73 }
     74 
     75 define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     76 ;CHECK-LABEL: sqshl4s:
     77 ;CHECK: sqshl.4s
     78         %tmp1 = load <4 x i32>* %A
     79         %tmp2 = load <4 x i32>* %B
     80         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
     81         ret <4 x i32> %tmp3
     82 }
     83 
     84 define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
     85 ;CHECK-LABEL: sqshl2d:
     86 ;CHECK: sqshl.2d
     87         %tmp1 = load <2 x i64>* %A
     88         %tmp2 = load <2 x i64>* %B
     89         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
     90         ret <2 x i64> %tmp3
     91 }
     92 
     93 define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     94 ;CHECK-LABEL: uqshl16b:
     95 ;CHECK: uqshl.16b
     96         %tmp1 = load <16 x i8>* %A
     97         %tmp2 = load <16 x i8>* %B
     98         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     99         ret <16 x i8> %tmp3
    100 }
    101 
    102 define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    103 ;CHECK-LABEL: uqshl8h:
    104 ;CHECK: uqshl.8h
    105         %tmp1 = load <8 x i16>* %A
    106         %tmp2 = load <8 x i16>* %B
    107         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    108         ret <8 x i16> %tmp3
    109 }
    110 
    111 define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    112 ;CHECK-LABEL: uqshl4s:
    113 ;CHECK: uqshl.4s
    114         %tmp1 = load <4 x i32>* %A
    115         %tmp2 = load <4 x i32>* %B
    116         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    117         ret <4 x i32> %tmp3
    118 }
    119 
    120 define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    121 ;CHECK-LABEL: uqshl2d:
    122 ;CHECK: uqshl.2d
    123         %tmp1 = load <2 x i64>* %A
    124         %tmp2 = load <2 x i64>* %B
    125         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    126         ret <2 x i64> %tmp3
    127 }
    128 
    129 declare <8 x i8>  @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    130 declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    131 declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    132 declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    133 
    134 declare <8 x i8>  @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    135 declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    136 declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    137 declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    138 
    139 declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    140 declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    141 declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    142 declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    143 
    144 declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    145 declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    146 declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    147 declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    148 
    149 define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    150 ;CHECK-LABEL: srshl8b:
    151 ;CHECK: srshl.8b
    152         %tmp1 = load <8 x i8>* %A
    153         %tmp2 = load <8 x i8>* %B
    154         %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    155         ret <8 x i8> %tmp3
    156 }
    157 
    158 define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    159 ;CHECK-LABEL: srshl4h:
    160 ;CHECK: srshl.4h
    161         %tmp1 = load <4 x i16>* %A
    162         %tmp2 = load <4 x i16>* %B
    163         %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    164         ret <4 x i16> %tmp3
    165 }
    166 
    167 define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    168 ;CHECK-LABEL: srshl2s:
    169 ;CHECK: srshl.2s
    170         %tmp1 = load <2 x i32>* %A
    171         %tmp2 = load <2 x i32>* %B
    172         %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    173         ret <2 x i32> %tmp3
    174 }
    175 
    176 define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    177 ;CHECK-LABEL: urshl8b:
    178 ;CHECK: urshl.8b
    179         %tmp1 = load <8 x i8>* %A
    180         %tmp2 = load <8 x i8>* %B
    181         %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    182         ret <8 x i8> %tmp3
    183 }
    184 
    185 define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    186 ;CHECK-LABEL: urshl4h:
    187 ;CHECK: urshl.4h
    188         %tmp1 = load <4 x i16>* %A
    189         %tmp2 = load <4 x i16>* %B
    190         %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    191         ret <4 x i16> %tmp3
    192 }
    193 
    194 define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    195 ;CHECK-LABEL: urshl2s:
    196 ;CHECK: urshl.2s
    197         %tmp1 = load <2 x i32>* %A
    198         %tmp2 = load <2 x i32>* %B
    199         %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    200         ret <2 x i32> %tmp3
    201 }
    202 
    203 define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    204 ;CHECK-LABEL: srshl16b:
    205 ;CHECK: srshl.16b
    206         %tmp1 = load <16 x i8>* %A
    207         %tmp2 = load <16 x i8>* %B
    208         %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    209         ret <16 x i8> %tmp3
    210 }
    211 
    212 define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    213 ;CHECK-LABEL: srshl8h:
    214 ;CHECK: srshl.8h
    215         %tmp1 = load <8 x i16>* %A
    216         %tmp2 = load <8 x i16>* %B
    217         %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    218         ret <8 x i16> %tmp3
    219 }
    220 
    221 define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    222 ;CHECK-LABEL: srshl4s:
    223 ;CHECK: srshl.4s
    224         %tmp1 = load <4 x i32>* %A
    225         %tmp2 = load <4 x i32>* %B
    226         %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    227         ret <4 x i32> %tmp3
    228 }
    229 
    230 define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    231 ;CHECK-LABEL: srshl2d:
    232 ;CHECK: srshl.2d
    233         %tmp1 = load <2 x i64>* %A
    234         %tmp2 = load <2 x i64>* %B
    235         %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    236         ret <2 x i64> %tmp3
    237 }
    238 
    239 define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    240 ;CHECK-LABEL: urshl16b:
    241 ;CHECK: urshl.16b
    242         %tmp1 = load <16 x i8>* %A
    243         %tmp2 = load <16 x i8>* %B
    244         %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    245         ret <16 x i8> %tmp3
    246 }
    247 
    248 define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    249 ;CHECK-LABEL: urshl8h:
    250 ;CHECK: urshl.8h
    251         %tmp1 = load <8 x i16>* %A
    252         %tmp2 = load <8 x i16>* %B
    253         %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    254         ret <8 x i16> %tmp3
    255 }
    256 
    257 define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    258 ;CHECK-LABEL: urshl4s:
    259 ;CHECK: urshl.4s
    260         %tmp1 = load <4 x i32>* %A
    261         %tmp2 = load <4 x i32>* %B
    262         %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    263         ret <4 x i32> %tmp3
    264 }
    265 
    266 define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    267 ;CHECK-LABEL: urshl2d:
    268 ;CHECK: urshl.2d
    269         %tmp1 = load <2 x i64>* %A
    270         %tmp2 = load <2 x i64>* %B
    271         %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    272         ret <2 x i64> %tmp3
    273 }
    274 
    275 declare <8 x i8>  @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    276 declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    277 declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    278 declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    279 
    280 declare <8 x i8>  @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    281 declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    282 declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    283 declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    284 
    285 declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    286 declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    287 declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    288 declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    289 
    290 declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    291 declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    292 declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    293 declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    294 
    295 define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    296 ;CHECK-LABEL: sqrshl8b:
    297 ;CHECK: sqrshl.8b
    298         %tmp1 = load <8 x i8>* %A
    299         %tmp2 = load <8 x i8>* %B
    300         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    301         ret <8 x i8> %tmp3
    302 }
    303 
    304 define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    305 ;CHECK-LABEL: sqrshl4h:
    306 ;CHECK: sqrshl.4h
    307         %tmp1 = load <4 x i16>* %A
    308         %tmp2 = load <4 x i16>* %B
    309         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    310         ret <4 x i16> %tmp3
    311 }
    312 
    313 define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    314 ;CHECK-LABEL: sqrshl2s:
    315 ;CHECK: sqrshl.2s
    316         %tmp1 = load <2 x i32>* %A
    317         %tmp2 = load <2 x i32>* %B
    318         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    319         ret <2 x i32> %tmp3
    320 }
    321 
    322 define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    323 ;CHECK-LABEL: uqrshl8b:
    324 ;CHECK: uqrshl.8b
    325         %tmp1 = load <8 x i8>* %A
    326         %tmp2 = load <8 x i8>* %B
    327         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    328         ret <8 x i8> %tmp3
    329 }
    330 
    331 define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    332 ;CHECK-LABEL: uqrshl4h:
    333 ;CHECK: uqrshl.4h
    334         %tmp1 = load <4 x i16>* %A
    335         %tmp2 = load <4 x i16>* %B
    336         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    337         ret <4 x i16> %tmp3
    338 }
    339 
    340 define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    341 ;CHECK-LABEL: uqrshl2s:
    342 ;CHECK: uqrshl.2s
    343         %tmp1 = load <2 x i32>* %A
    344         %tmp2 = load <2 x i32>* %B
    345         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    346         ret <2 x i32> %tmp3
    347 }
    348 
    349 define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    350 ;CHECK-LABEL: sqrshl16b:
    351 ;CHECK: sqrshl.16b
    352         %tmp1 = load <16 x i8>* %A
    353         %tmp2 = load <16 x i8>* %B
    354         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    355         ret <16 x i8> %tmp3
    356 }
    357 
    358 define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    359 ;CHECK-LABEL: sqrshl8h:
    360 ;CHECK: sqrshl.8h
    361         %tmp1 = load <8 x i16>* %A
    362         %tmp2 = load <8 x i16>* %B
    363         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    364         ret <8 x i16> %tmp3
    365 }
    366 
    367 define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    368 ;CHECK-LABEL: sqrshl4s:
    369 ;CHECK: sqrshl.4s
    370         %tmp1 = load <4 x i32>* %A
    371         %tmp2 = load <4 x i32>* %B
    372         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    373         ret <4 x i32> %tmp3
    374 }
    375 
    376 define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    377 ;CHECK-LABEL: sqrshl2d:
    378 ;CHECK: sqrshl.2d
    379         %tmp1 = load <2 x i64>* %A
    380         %tmp2 = load <2 x i64>* %B
    381         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    382         ret <2 x i64> %tmp3
    383 }
    384 
    385 define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    386 ;CHECK-LABEL: uqrshl16b:
    387 ;CHECK: uqrshl.16b
    388         %tmp1 = load <16 x i8>* %A
    389         %tmp2 = load <16 x i8>* %B
    390         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    391         ret <16 x i8> %tmp3
    392 }
    393 
    394 define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    395 ;CHECK-LABEL: uqrshl8h:
    396 ;CHECK: uqrshl.8h
    397         %tmp1 = load <8 x i16>* %A
    398         %tmp2 = load <8 x i16>* %B
    399         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    400         ret <8 x i16> %tmp3
    401 }
    402 
    403 define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    404 ;CHECK-LABEL: uqrshl4s:
    405 ;CHECK: uqrshl.4s
    406         %tmp1 = load <4 x i32>* %A
    407         %tmp2 = load <4 x i32>* %B
    408         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    409         ret <4 x i32> %tmp3
    410 }
    411 
    412 define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    413 ;CHECK-LABEL: uqrshl2d:
    414 ;CHECK: uqrshl.2d
    415         %tmp1 = load <2 x i64>* %A
    416         %tmp2 = load <2 x i64>* %B
    417         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    418         ret <2 x i64> %tmp3
    419 }
    420 
    421 declare <8 x i8>  @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    422 declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    423 declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    424 declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    425 
    426 declare <8 x i8>  @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    427 declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    428 declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    429 declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    430 
    431 declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    432 declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    433 declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    434 declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    435 
    436 declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    437 declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    438 declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    439 declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    440 
    441 define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
    442 ;CHECK-LABEL: urshr8b:
    443 ;CHECK: urshr.8b
    444         %tmp1 = load <8 x i8>* %A
    445         %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
    446         ret <8 x i8> %tmp3
    447 }
    448 
    449 define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
    450 ;CHECK-LABEL: urshr4h:
    451 ;CHECK: urshr.4h
    452         %tmp1 = load <4 x i16>* %A
    453         %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
    454         ret <4 x i16> %tmp3
    455 }
    456 
    457 define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
    458 ;CHECK-LABEL: urshr2s:
    459 ;CHECK: urshr.2s
    460         %tmp1 = load <2 x i32>* %A
    461         %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
    462         ret <2 x i32> %tmp3
    463 }
    464 
    465 define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
    466 ;CHECK-LABEL: urshr16b:
    467 ;CHECK: urshr.16b
    468         %tmp1 = load <16 x i8>* %A
    469         %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
    470         ret <16 x i8> %tmp3
    471 }
    472 
    473 define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
    474 ;CHECK-LABEL: urshr8h:
    475 ;CHECK: urshr.8h
    476         %tmp1 = load <8 x i16>* %A
    477         %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
    478         ret <8 x i16> %tmp3
    479 }
    480 
    481 define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
    482 ;CHECK-LABEL: urshr4s:
    483 ;CHECK: urshr.4s
    484         %tmp1 = load <4 x i32>* %A
    485         %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
    486         ret <4 x i32> %tmp3
    487 }
    488 
    489 define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
    490 ;CHECK-LABEL: urshr2d:
    491 ;CHECK: urshr.2d
    492         %tmp1 = load <2 x i64>* %A
    493         %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
    494         ret <2 x i64> %tmp3
    495 }
    496 
    497 define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
    498 ;CHECK-LABEL: srshr8b:
    499 ;CHECK: srshr.8b
    500         %tmp1 = load <8 x i8>* %A
    501         %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
    502         ret <8 x i8> %tmp3
    503 }
    504 
    505 define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
    506 ;CHECK-LABEL: srshr4h:
    507 ;CHECK: srshr.4h
    508         %tmp1 = load <4 x i16>* %A
    509         %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
    510         ret <4 x i16> %tmp3
    511 }
    512 
    513 define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
    514 ;CHECK-LABEL: srshr2s:
    515 ;CHECK: srshr.2s
    516         %tmp1 = load <2 x i32>* %A
    517         %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
    518         ret <2 x i32> %tmp3
    519 }
    520 
    521 define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
    522 ;CHECK-LABEL: srshr16b:
    523 ;CHECK: srshr.16b
    524         %tmp1 = load <16 x i8>* %A
    525         %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
    526         ret <16 x i8> %tmp3
    527 }
    528 
    529 define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
    530 ;CHECK-LABEL: srshr8h:
    531 ;CHECK: srshr.8h
    532         %tmp1 = load <8 x i16>* %A
    533         %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
    534         ret <8 x i16> %tmp3
    535 }
    536 
    537 define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
    538 ;CHECK-LABEL: srshr4s:
    539 ;CHECK: srshr.4s
    540         %tmp1 = load <4 x i32>* %A
    541         %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
    542         ret <4 x i32> %tmp3
    543 }
    544 
    545 define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
    546 ;CHECK-LABEL: srshr2d:
    547 ;CHECK: srshr.2d
    548         %tmp1 = load <2 x i64>* %A
    549         %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
    550         ret <2 x i64> %tmp3
    551 }
    552 
    553 define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
    554 ;CHECK-LABEL: sqshlu8b:
    555 ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
    556         %tmp1 = load <8 x i8>* %A
    557         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
    558         ret <8 x i8> %tmp3
    559 }
    560 
    561 define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
    562 ;CHECK-LABEL: sqshlu4h:
    563 ;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
    564         %tmp1 = load <4 x i16>* %A
    565         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
    566         ret <4 x i16> %tmp3
    567 }
    568 
    569 define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
    570 ;CHECK-LABEL: sqshlu2s:
    571 ;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
    572         %tmp1 = load <2 x i32>* %A
    573         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
    574         ret <2 x i32> %tmp3
    575 }
    576 
    577 define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
    578 ;CHECK-LABEL: sqshlu16b:
    579 ;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
    580         %tmp1 = load <16 x i8>* %A
    581         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
    582         ret <16 x i8> %tmp3
    583 }
    584 
    585 define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
    586 ;CHECK-LABEL: sqshlu8h:
    587 ;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
    588         %tmp1 = load <8 x i16>* %A
    589         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
    590         ret <8 x i16> %tmp3
    591 }
    592 
    593 define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
    594 ;CHECK-LABEL: sqshlu4s:
    595 ;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
    596         %tmp1 = load <4 x i32>* %A
    597         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
    598         ret <4 x i32> %tmp3
    599 }
    600 
    601 define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
    602 ;CHECK-LABEL: sqshlu2d:
    603 ;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
    604         %tmp1 = load <2 x i64>* %A
    605         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
    606         ret <2 x i64> %tmp3
    607 }
    608 
    609 declare <8 x i8>  @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    610 declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    611 declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    612 declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    613 
    614 declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    615 declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    616 declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    617 declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    618 
    619 define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
    620 ;CHECK-LABEL: rshrn8b:
    621 ;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
    622         %tmp1 = load <8 x i16>* %A
    623         %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
    624         ret <8 x i8> %tmp3
    625 }
    626 
    627 define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
    628 ;CHECK-LABEL: rshrn4h:
    629 ;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
    630         %tmp1 = load <4 x i32>* %A
    631         %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
    632         ret <4 x i16> %tmp3
    633 }
    634 
    635 define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
    636 ;CHECK-LABEL: rshrn2s:
    637 ;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
    638         %tmp1 = load <2 x i64>* %A
    639         %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
    640         ret <2 x i32> %tmp3
    641 }
    642 
    643 define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
    644 ;CHECK-LABEL: rshrn16b:
    645 ;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
    646         %out = load <8 x i8>* %ret
    647         %tmp1 = load <8 x i16>* %A
    648         %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
    649         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    650         ret <16 x i8> %tmp4
    651 }
    652 
    653 define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
    654 ;CHECK-LABEL: rshrn8h:
    655 ;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
    656         %out = load <4 x i16>* %ret
    657         %tmp1 = load <4 x i32>* %A
    658         %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
    659         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    660         ret <8 x i16> %tmp4
    661 }
    662 
    663 define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
    664 ;CHECK-LABEL: rshrn4s:
    665 ;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
    666         %out = load <2 x i32>* %ret
    667         %tmp1 = load <2 x i64>* %A
    668         %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
    669         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    670         ret <4 x i32> %tmp4
    671 }
    672 
    673 declare <8 x i8>  @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
    674 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
    675 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
    676 
    677 define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
    678 ;CHECK-LABEL: shrn8b:
    679 ;CHECK: shrn.8b v0, {{v[0-9]+}}, #1
    680         %tmp1 = load <8 x i16>* %A
    681         %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    682         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
    683         ret <8 x i8> %tmp3
    684 }
    685 
    686 define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind {
    687 ;CHECK-LABEL: shrn4h:
    688 ;CHECK: shrn.4h v0, {{v[0-9]+}}, #1
    689         %tmp1 = load <4 x i32>* %A
    690         %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
    691         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
    692         ret <4 x i16> %tmp3
    693 }
    694 
    695 define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind {
    696 ;CHECK-LABEL: shrn2s:
    697 ;CHECK: shrn.2s v0, {{v[0-9]+}}, #1
    698         %tmp1 = load <2 x i64>* %A
    699         %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
    700         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
    701         ret <2 x i32> %tmp3
    702 }
    703 
    704 define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
    705 ;CHECK-LABEL: shrn16b:
    706 ;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1
    707         %out = load <8 x i8>* %ret
    708         %tmp1 = load <8 x i16>* %A
    709         %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    710         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
    711         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    712         ret <16 x i8> %tmp4
    713 }
    714 
    715 define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
    716 ;CHECK-LABEL: shrn8h:
    717 ;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1
    718         %out = load <4 x i16>* %ret
    719         %tmp1 = load <4 x i32>* %A
    720         %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
    721         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
    722         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    723         ret <8 x i16> %tmp4
    724 }
    725 
    726 define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
    727 ;CHECK-LABEL: shrn4s:
    728 ;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1
    729         %out = load <2 x i32>* %ret
    730         %tmp1 = load <2 x i64>* %A
    731         %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
    732         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
    733         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    734         ret <4 x i32> %tmp4
    735 }
    736 
    737 declare <8 x i8>  @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
    738 declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
    739 declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
    740 
    741 define i32 @sqshrn1s(i64 %A) nounwind {
    742 ; CHECK-LABEL: sqshrn1s:
    743 ; CHECK: sqshrn {{s[0-9]+}}, d0, #1
    744   %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
    745   ret i32 %tmp
    746 }
    747 
    748 define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
    749 ;CHECK-LABEL: sqshrn8b:
    750 ;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
    751         %tmp1 = load <8 x i16>* %A
    752         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
    753         ret <8 x i8> %tmp3
    754 }
    755 
    756 define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
    757 ;CHECK-LABEL: sqshrn4h:
    758 ;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
    759         %tmp1 = load <4 x i32>* %A
    760         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
    761         ret <4 x i16> %tmp3
    762 }
    763 
    764 define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
    765 ;CHECK-LABEL: sqshrn2s:
    766 ;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
    767         %tmp1 = load <2 x i64>* %A
    768         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
    769         ret <2 x i32> %tmp3
    770 }
    771 
    772 
    773 define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
    774 ;CHECK-LABEL: sqshrn16b:
    775 ;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
    776         %out = load <8 x i8>* %ret
    777         %tmp1 = load <8 x i16>* %A
    778         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
    779         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    780         ret <16 x i8> %tmp4
    781 }
    782 
    783 define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
    784 ;CHECK-LABEL: sqshrn8h:
    785 ;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
    786         %out = load <4 x i16>* %ret
    787         %tmp1 = load <4 x i32>* %A
    788         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
    789         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    790         ret <8 x i16> %tmp4
    791 }
    792 
    793 define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
    794 ;CHECK-LABEL: sqshrn4s:
    795 ;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
    796         %out = load <2 x i32>* %ret
    797         %tmp1 = load <2 x i64>* %A
    798         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
    799         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    800         ret <4 x i32> %tmp4
    801 }
    802 
    803 declare i32  @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
    804 declare <8 x i8>  @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
    805 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
    806 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
    807 
    808 define i32 @sqshrun1s(i64 %A) nounwind {
    809 ; CHECK-LABEL: sqshrun1s:
    810 ; CHECK: sqshrun {{s[0-9]+}}, d0, #1
    811   %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
    812   ret i32 %tmp
    813 }
    814 
    815 define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
    816 ;CHECK-LABEL: sqshrun8b:
    817 ;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
    818         %tmp1 = load <8 x i16>* %A
    819         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
    820         ret <8 x i8> %tmp3
    821 }
    822 
    823 define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
    824 ;CHECK-LABEL: sqshrun4h:
    825 ;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
    826         %tmp1 = load <4 x i32>* %A
    827         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
    828         ret <4 x i16> %tmp3
    829 }
    830 
    831 define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
    832 ;CHECK-LABEL: sqshrun2s:
    833 ;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
    834         %tmp1 = load <2 x i64>* %A
    835         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
    836         ret <2 x i32> %tmp3
    837 }
    838 
    839 define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
    840 ;CHECK-LABEL: sqshrun16b:
    841 ;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
    842         %out = load <8 x i8>* %ret
    843         %tmp1 = load <8 x i16>* %A
    844         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
    845         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    846         ret <16 x i8> %tmp4
    847 }
    848 
    849 define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
    850 ;CHECK-LABEL: sqshrun8h:
    851 ;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
    852         %out = load <4 x i16>* %ret
    853         %tmp1 = load <4 x i32>* %A
    854         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
    855         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    856         ret <8 x i16> %tmp4
    857 }
    858 
    859 define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
    860 ;CHECK-LABEL: sqshrun4s:
    861 ;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
    862         %out = load <2 x i32>* %ret
    863         %tmp1 = load <2 x i64>* %A
    864         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
    865         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    866         ret <4 x i32> %tmp4
    867 }
    868 
    869 declare i32  @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
    870 declare <8 x i8>  @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
    871 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
    872 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
    873 
    874 define i32 @sqrshrn1s(i64 %A) nounwind {
    875 ; CHECK-LABEL: sqrshrn1s:
    876 ; CHECK: sqrshrn {{s[0-9]+}}, d0, #1
    877   %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
    878   ret i32 %tmp
    879 }
    880 
    881 define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
    882 ;CHECK-LABEL: sqrshrn8b:
    883 ;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
    884         %tmp1 = load <8 x i16>* %A
    885         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
    886         ret <8 x i8> %tmp3
    887 }
    888 
    889 define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
    890 ;CHECK-LABEL: sqrshrn4h:
    891 ;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
    892         %tmp1 = load <4 x i32>* %A
    893         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
    894         ret <4 x i16> %tmp3
    895 }
    896 
    897 define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
    898 ;CHECK-LABEL: sqrshrn2s:
    899 ;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
    900         %tmp1 = load <2 x i64>* %A
    901         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
    902         ret <2 x i32> %tmp3
    903 }
    904 
    905 define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
    906 ;CHECK-LABEL: sqrshrn16b:
    907 ;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
    908         %out = load <8 x i8>* %ret
    909         %tmp1 = load <8 x i16>* %A
    910         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
    911         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    912         ret <16 x i8> %tmp4
    913 }
    914 
    915 define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
    916 ;CHECK-LABEL: sqrshrn8h:
    917 ;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
    918         %out = load <4 x i16>* %ret
    919         %tmp1 = load <4 x i32>* %A
    920         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
    921         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    922         ret <8 x i16> %tmp4
    923 }
    924 
    925 define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
    926 ;CHECK-LABEL: sqrshrn4s:
    927 ;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
    928         %out = load <2 x i32>* %ret
    929         %tmp1 = load <2 x i64>* %A
    930         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
    931         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    932         ret <4 x i32> %tmp4
    933 }
    934 
    935 declare i32  @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
    936 declare <8 x i8>  @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
    937 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
    938 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
    939 
    940 define i32 @sqrshrun1s(i64 %A) nounwind {
    941 ; CHECK-LABEL: sqrshrun1s:
    942 ; CHECK: sqrshrun {{s[0-9]+}}, d0, #1
    943   %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
    944   ret i32 %tmp
    945 }
    946 
    947 define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
    948 ;CHECK-LABEL: sqrshrun8b:
    949 ;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
    950         %tmp1 = load <8 x i16>* %A
    951         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
    952         ret <8 x i8> %tmp3
    953 }
    954 
    955 define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
    956 ;CHECK-LABEL: sqrshrun4h:
    957 ;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
    958         %tmp1 = load <4 x i32>* %A
    959         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
    960         ret <4 x i16> %tmp3
    961 }
    962 
    963 define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
    964 ;CHECK-LABEL: sqrshrun2s:
    965 ;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
    966         %tmp1 = load <2 x i64>* %A
    967         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
    968         ret <2 x i32> %tmp3
    969 }
    970 
    971 define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
    972 ;CHECK-LABEL: sqrshrun16b:
    973 ;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
    974         %out = load <8 x i8>* %ret
    975         %tmp1 = load <8 x i16>* %A
    976         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
    977         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    978         ret <16 x i8> %tmp4
    979 }
    980 
    981 define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
    982 ;CHECK-LABEL: sqrshrun8h:
    983 ;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
    984         %out = load <4 x i16>* %ret
    985         %tmp1 = load <4 x i32>* %A
    986         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
    987         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    988         ret <8 x i16> %tmp4
    989 }
    990 
    991 define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
    992 ;CHECK-LABEL: sqrshrun4s:
    993 ;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
    994         %out = load <2 x i32>* %ret
    995         %tmp1 = load <2 x i64>* %A
    996         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
    997         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    998         ret <4 x i32> %tmp4
    999 }
   1000 
   1001 declare i32  @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
   1002 declare <8 x i8>  @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
   1003 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
   1004 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
   1005 
   1006 define i32 @uqrshrn1s(i64 %A) nounwind {
   1007 ; CHECK-LABEL: uqrshrn1s:
   1008 ; CHECK: uqrshrn {{s[0-9]+}}, d0, #1
   1009   %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
   1010   ret i32 %tmp
   1011 }
   1012 
   1013 define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
   1014 ;CHECK-LABEL: uqrshrn8b:
   1015 ;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
   1016         %tmp1 = load <8 x i16>* %A
   1017         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
   1018         ret <8 x i8> %tmp3
   1019 }
   1020 
   1021 define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
   1022 ;CHECK-LABEL: uqrshrn4h:
   1023 ;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
   1024         %tmp1 = load <4 x i32>* %A
   1025         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
   1026         ret <4 x i16> %tmp3
   1027 }
   1028 
   1029 define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
   1030 ;CHECK-LABEL: uqrshrn2s:
   1031 ;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
   1032         %tmp1 = load <2 x i64>* %A
   1033         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
   1034         ret <2 x i32> %tmp3
   1035 }
   1036 
   1037 define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
   1038 ;CHECK-LABEL: uqrshrn16b:
   1039 ;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
   1040         %out = load <8 x i8>* %ret
   1041         %tmp1 = load <8 x i16>* %A
   1042         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
   1043         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1044         ret <16 x i8> %tmp4
   1045 }
   1046 
   1047 define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
   1048 ;CHECK-LABEL: uqrshrn8h:
   1049 ;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
   1050         %out = load <4 x i16>* %ret
   1051         %tmp1 = load <4 x i32>* %A
   1052         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
   1053         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1054         ret <8 x i16> %tmp4
   1055 }
   1056 
   1057 define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
   1058 ;CHECK-LABEL: uqrshrn4s:
   1059 ;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
   1060         %out = load <2 x i32>* %ret
   1061         %tmp1 = load <2 x i64>* %A
   1062         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
   1063         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1064         ret <4 x i32> %tmp4
   1065 }
   1066 
   1067 declare i32  @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
   1068 declare <8 x i8>  @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
   1069 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
   1070 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
   1071 
   1072 define i32 @uqshrn1s(i64 %A) nounwind {
   1073 ; CHECK-LABEL: uqshrn1s:
   1074 ; CHECK: uqshrn {{s[0-9]+}}, d0, #1
   1075   %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
   1076   ret i32 %tmp
   1077 }
   1078 
   1079 define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
   1080 ;CHECK-LABEL: uqshrn8b:
   1081 ;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
   1082         %tmp1 = load <8 x i16>* %A
   1083         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
   1084         ret <8 x i8> %tmp3
   1085 }
   1086 
   1087 define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
   1088 ;CHECK-LABEL: uqshrn4h:
   1089 ;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
   1090         %tmp1 = load <4 x i32>* %A
   1091         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
   1092         ret <4 x i16> %tmp3
   1093 }
   1094 
   1095 define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
   1096 ;CHECK-LABEL: uqshrn2s:
   1097 ;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
   1098         %tmp1 = load <2 x i64>* %A
   1099         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
   1100         ret <2 x i32> %tmp3
   1101 }
   1102 
   1103 define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
   1104 ;CHECK-LABEL: uqshrn16b:
   1105 ;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
   1106         %out = load <8 x i8>* %ret
   1107         %tmp1 = load <8 x i16>* %A
   1108         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
   1109         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1110         ret <16 x i8> %tmp4
   1111 }
   1112 
   1113 define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
   1114 ;CHECK-LABEL: uqshrn8h:
   1115 ;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
   1116   %out = load <4 x i16>* %ret
   1117   %tmp1 = load <4 x i32>* %A
   1118   %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
   1119   %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1120   ret <8 x i16> %tmp4
   1121 }
   1122 
   1123 define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
   1124 ;CHECK-LABEL: uqshrn4s:
   1125 ;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
   1126   %out = load <2 x i32>* %ret
   1127   %tmp1 = load <2 x i64>* %A
   1128   %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
   1129   %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1130   ret <4 x i32> %tmp4
   1131 }
   1132 
   1133 declare i32  @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
   1134 declare <8 x i8>  @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
   1135 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
   1136 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
   1137 
   1138 define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
   1139 ;CHECK-LABEL: ushll8h:
   1140 ;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
   1141         %tmp1 = load <8 x i8>* %A
   1142         %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
   1143         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1144         ret <8 x i16> %tmp3
   1145 }
   1146 
   1147 define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind {
   1148 ;CHECK-LABEL: ushll4s:
   1149 ;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
   1150         %tmp1 = load <4 x i16>* %A
   1151         %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
   1152         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
   1153         ret <4 x i32> %tmp3
   1154 }
   1155 
   1156 define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
   1157 ;CHECK-LABEL: ushll2d:
   1158 ;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
   1159         %tmp1 = load <2 x i32>* %A
   1160         %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
   1161         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
   1162         ret <2 x i64> %tmp3
   1163 }
   1164 
   1165 define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
   1166 ;CHECK-LABEL: ushll2_8h:
   1167 ;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1
   1168         %load1 = load <16 x i8>* %A
   1169         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1170         %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
   1171         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1172         ret <8 x i16> %tmp3
   1173 }
   1174 
   1175 define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
   1176 ;CHECK-LABEL: ushll2_4s:
   1177 ;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1
   1178         %load1 = load <8 x i16>* %A
   1179         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1180         %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
   1181         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
   1182         ret <4 x i32> %tmp3
   1183 }
   1184 
   1185 define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
   1186 ;CHECK-LABEL: ushll2_2d:
   1187 ;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1
   1188         %load1 = load <4 x i32>* %A
   1189         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   1190         %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
   1191         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
   1192         ret <2 x i64> %tmp3
   1193 }
   1194 
   1195 define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
   1196 ;CHECK-LABEL: sshll8h:
   1197 ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
   1198         %tmp1 = load <8 x i8>* %A
   1199         %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
   1200         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1201         ret <8 x i16> %tmp3
   1202 }
   1203 
   1204 define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind {
   1205 ;CHECK-LABEL: sshll4s:
   1206 ;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
   1207         %tmp1 = load <4 x i16>* %A
   1208         %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
   1209         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
   1210         ret <4 x i32> %tmp3
   1211 }
   1212 
   1213 define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
   1214 ;CHECK-LABEL: sshll2d:
   1215 ;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
   1216         %tmp1 = load <2 x i32>* %A
   1217         %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
   1218         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
   1219         ret <2 x i64> %tmp3
   1220 }
   1221 
   1222 define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
   1223 ;CHECK-LABEL: sshll2_8h:
   1224 ;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1
   1225         %load1 = load <16 x i8>* %A
   1226         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1227         %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
   1228         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1229         ret <8 x i16> %tmp3
   1230 }
   1231 
   1232 define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
   1233 ;CHECK-LABEL: sshll2_4s:
   1234 ;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1
   1235         %load1 = load <8 x i16>* %A
   1236         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1237         %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
   1238         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
   1239         ret <4 x i32> %tmp3
   1240 }
   1241 
   1242 define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
   1243 ;CHECK-LABEL: sshll2_2d:
   1244 ;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1
   1245         %load1 = load <4 x i32>* %A
   1246         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   1247         %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
   1248         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
   1249         ret <2 x i64> %tmp3
   1250 }
   1251 
   1252 define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
   1253 ;CHECK-LABEL: sqshli8b:
   1254 ;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
   1255         %tmp1 = load <8 x i8>* %A
   1256         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
   1257         ret <8 x i8> %tmp3
   1258 }
   1259 
   1260 define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
   1261 ;CHECK-LABEL: sqshli4h:
   1262 ;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
   1263         %tmp1 = load <4 x i16>* %A
   1264         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
   1265         ret <4 x i16> %tmp3
   1266 }
   1267 
   1268 define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
   1269 ;CHECK-LABEL: sqshli2s:
   1270 ;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
   1271         %tmp1 = load <2 x i32>* %A
   1272         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
   1273         ret <2 x i32> %tmp3
   1274 }
   1275 
   1276 define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
   1277 ;CHECK-LABEL: sqshli16b:
   1278 ;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
   1279         %tmp1 = load <16 x i8>* %A
   1280         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
   1281         ret <16 x i8> %tmp3
   1282 }
   1283 
   1284 define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
   1285 ;CHECK-LABEL: sqshli8h:
   1286 ;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
   1287         %tmp1 = load <8 x i16>* %A
   1288         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   1289         ret <8 x i16> %tmp3
   1290 }
   1291 
   1292 define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
   1293 ;CHECK-LABEL: sqshli4s:
   1294 ;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
   1295         %tmp1 = load <4 x i32>* %A
   1296         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
   1297         ret <4 x i32> %tmp3
   1298 }
   1299 
   1300 define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
   1301 ;CHECK-LABEL: sqshli2d:
   1302 ;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
   1303         %tmp1 = load <2 x i64>* %A
   1304         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
   1305         ret <2 x i64> %tmp3
   1306 }
   1307 
   1308 define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
   1309 ;CHECK-LABEL: uqshli8b:
   1310 ;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
   1311         %tmp1 = load <8 x i8>* %A
   1312         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
   1313         ret <8 x i8> %tmp3
   1314 }
   1315 
   1316 define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind {
   1317 ;CHECK-LABEL: uqshli8b_1:
   1318 ;CHECK: movi.8b [[REG:v[0-9]+]], #0x8
   1319 ;CHECK: uqshl.8b v0, v0, [[REG]]
   1320         %tmp1 = load <8 x i8>* %A
   1321         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
   1322         ret <8 x i8> %tmp3
   1323 }
   1324 
   1325 define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
   1326 ;CHECK-LABEL: uqshli4h:
   1327 ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
   1328         %tmp1 = load <4 x i16>* %A
   1329         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
   1330         ret <4 x i16> %tmp3
   1331 }
   1332 
   1333 define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
   1334 ;CHECK-LABEL: uqshli2s:
   1335 ;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
   1336         %tmp1 = load <2 x i32>* %A
   1337         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
   1338         ret <2 x i32> %tmp3
   1339 }
   1340 
   1341 define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
   1342 ;CHECK-LABEL: uqshli16b:
   1343 ;CHECK: uqshl.16b
   1344         %tmp1 = load <16 x i8>* %A
   1345         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
   1346         ret <16 x i8> %tmp3
   1347 }
   1348 
   1349 define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
   1350 ;CHECK-LABEL: uqshli8h:
   1351 ;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
   1352         %tmp1 = load <8 x i16>* %A
   1353         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   1354         ret <8 x i16> %tmp3
   1355 }
   1356 
   1357 define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
   1358 ;CHECK-LABEL: uqshli4s:
   1359 ;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
   1360         %tmp1 = load <4 x i32>* %A
   1361         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
   1362         ret <4 x i32> %tmp3
   1363 }
   1364 
   1365 define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
   1366 ;CHECK-LABEL: uqshli2d:
   1367 ;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
   1368         %tmp1 = load <2 x i64>* %A
   1369         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
   1370         ret <2 x i64> %tmp3
   1371 }
   1372 
   1373 define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   1374 ;CHECK-LABEL: ursra8b:
   1375 ;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
   1376         %tmp1 = load <8 x i8>* %A
   1377         %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
   1378         %tmp4 = load <8 x i8>* %B
   1379         %tmp5 = add <8 x i8> %tmp3, %tmp4
   1380         ret <8 x i8> %tmp5
   1381 }
   1382 
   1383 define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   1384 ;CHECK-LABEL: ursra4h:
   1385 ;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
   1386         %tmp1 = load <4 x i16>* %A
   1387         %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
   1388         %tmp4 = load <4 x i16>* %B
   1389         %tmp5 = add <4 x i16> %tmp3, %tmp4
   1390         ret <4 x i16> %tmp5
   1391 }
   1392 
   1393 define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   1394 ;CHECK-LABEL: ursra2s:
   1395 ;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
   1396         %tmp1 = load <2 x i32>* %A
   1397         %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
   1398         %tmp4 = load <2 x i32>* %B
   1399         %tmp5 = add <2 x i32> %tmp3, %tmp4
   1400         ret <2 x i32> %tmp5
   1401 }
   1402 
   1403 define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   1404 ;CHECK-LABEL: ursra16b:
   1405 ;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
   1406         %tmp1 = load <16 x i8>* %A
   1407         %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
   1408         %tmp4 = load <16 x i8>* %B
   1409         %tmp5 = add <16 x i8> %tmp3, %tmp4
   1410          ret <16 x i8> %tmp5
   1411 }
   1412 
   1413 define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   1414 ;CHECK-LABEL: ursra8h:
   1415 ;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
   1416         %tmp1 = load <8 x i16>* %A
   1417         %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
   1418         %tmp4 = load <8 x i16>* %B
   1419         %tmp5 = add <8 x i16> %tmp3, %tmp4
   1420          ret <8 x i16> %tmp5
   1421 }
   1422 
   1423 define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   1424 ;CHECK-LABEL: ursra4s:
   1425 ;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
   1426         %tmp1 = load <4 x i32>* %A
   1427         %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
   1428         %tmp4 = load <4 x i32>* %B
   1429         %tmp5 = add <4 x i32> %tmp3, %tmp4
   1430          ret <4 x i32> %tmp5
   1431 }
   1432 
   1433 define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
   1434 ;CHECK-LABEL: ursra2d:
   1435 ;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
   1436         %tmp1 = load <2 x i64>* %A
   1437         %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
   1438         %tmp4 = load <2 x i64>* %B
   1439         %tmp5 = add <2 x i64> %tmp3, %tmp4
   1440          ret <2 x i64> %tmp5
   1441 }
   1442 
   1443 define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   1444 ;CHECK-LABEL: srsra8b:
   1445 ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
   1446         %tmp1 = load <8 x i8>* %A
   1447         %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
   1448         %tmp4 = load <8 x i8>* %B
   1449         %tmp5 = add <8 x i8> %tmp3, %tmp4
   1450         ret <8 x i8> %tmp5
   1451 }
   1452 
   1453 define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   1454 ;CHECK-LABEL: srsra4h:
   1455 ;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
   1456         %tmp1 = load <4 x i16>* %A
   1457         %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
   1458         %tmp4 = load <4 x i16>* %B
   1459         %tmp5 = add <4 x i16> %tmp3, %tmp4
   1460         ret <4 x i16> %tmp5
   1461 }
   1462 
   1463 define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   1464 ;CHECK-LABEL: srsra2s:
   1465 ;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
   1466         %tmp1 = load <2 x i32>* %A
   1467         %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
   1468         %tmp4 = load <2 x i32>* %B
   1469         %tmp5 = add <2 x i32> %tmp3, %tmp4
   1470         ret <2 x i32> %tmp5
   1471 }
   1472 
   1473 define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   1474 ;CHECK-LABEL: srsra16b:
   1475 ;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
   1476         %tmp1 = load <16 x i8>* %A
   1477         %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
   1478         %tmp4 = load <16 x i8>* %B
   1479         %tmp5 = add <16 x i8> %tmp3, %tmp4
   1480          ret <16 x i8> %tmp5
   1481 }
   1482 
   1483 define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   1484 ;CHECK-LABEL: srsra8h:
   1485 ;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
   1486         %tmp1 = load <8 x i16>* %A
   1487         %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
   1488         %tmp4 = load <8 x i16>* %B
   1489         %tmp5 = add <8 x i16> %tmp3, %tmp4
   1490          ret <8 x i16> %tmp5
   1491 }
   1492 
   1493 define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   1494 ;CHECK-LABEL: srsra4s:
   1495 ;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
   1496         %tmp1 = load <4 x i32>* %A
   1497         %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
   1498         %tmp4 = load <4 x i32>* %B
   1499         %tmp5 = add <4 x i32> %tmp3, %tmp4
   1500          ret <4 x i32> %tmp5
   1501 }
   1502 
   1503 define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
   1504 ;CHECK-LABEL: srsra2d:
   1505 ;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
   1506         %tmp1 = load <2 x i64>* %A
   1507         %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
   1508         %tmp4 = load <2 x i64>* %B
   1509         %tmp5 = add <2 x i64> %tmp3, %tmp4
   1510          ret <2 x i64> %tmp5
   1511 }
   1512 
   1513 define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   1514 ;CHECK-LABEL: usra8b:
   1515 ;CHECK: usra.8b v0, {{v[0-9]+}}, #1
   1516         %tmp1 = load <8 x i8>* %A
   1517         %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1518         %tmp4 = load <8 x i8>* %B
   1519         %tmp5 = add <8 x i8> %tmp3, %tmp4
   1520         ret <8 x i8> %tmp5
   1521 }
   1522 
   1523 define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   1524 ;CHECK-LABEL: usra4h:
   1525 ;CHECK: usra.4h v0, {{v[0-9]+}}, #1
   1526         %tmp1 = load <4 x i16>* %A
   1527         %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
   1528         %tmp4 = load <4 x i16>* %B
   1529         %tmp5 = add <4 x i16> %tmp3, %tmp4
   1530         ret <4 x i16> %tmp5
   1531 }
   1532 
   1533 define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   1534 ;CHECK-LABEL: usra2s:
   1535 ;CHECK: usra.2s v0, {{v[0-9]+}}, #1
   1536         %tmp1 = load <2 x i32>* %A
   1537         %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
   1538         %tmp4 = load <2 x i32>* %B
   1539         %tmp5 = add <2 x i32> %tmp3, %tmp4
   1540         ret <2 x i32> %tmp5
   1541 }
   1542 
   1543 define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   1544 ;CHECK-LABEL: usra16b:
   1545 ;CHECK: usra.16b v0, {{v[0-9]+}}, #1
   1546         %tmp1 = load <16 x i8>* %A
   1547         %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1548         %tmp4 = load <16 x i8>* %B
   1549         %tmp5 = add <16 x i8> %tmp3, %tmp4
   1550          ret <16 x i8> %tmp5
   1551 }
   1552 
   1553 define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   1554 ;CHECK-LABEL: usra8h:
   1555 ;CHECK: usra.8h v0, {{v[0-9]+}}, #1
   1556         %tmp1 = load <8 x i16>* %A
   1557         %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1558         %tmp4 = load <8 x i16>* %B
   1559         %tmp5 = add <8 x i16> %tmp3, %tmp4
   1560          ret <8 x i16> %tmp5
   1561 }
   1562 
   1563 define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   1564 ;CHECK-LABEL: usra4s:
   1565 ;CHECK: usra.4s v0, {{v[0-9]+}}, #1
   1566         %tmp1 = load <4 x i32>* %A
   1567         %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
   1568         %tmp4 = load <4 x i32>* %B
   1569         %tmp5 = add <4 x i32> %tmp3, %tmp4
   1570          ret <4 x i32> %tmp5
   1571 }
   1572 
   1573 define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
   1574 ;CHECK-LABEL: usra2d:
   1575 ;CHECK: usra.2d v0, {{v[0-9]+}}, #1
   1576         %tmp1 = load <2 x i64>* %A
   1577         %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
   1578         %tmp4 = load <2 x i64>* %B
   1579         %tmp5 = add <2 x i64> %tmp3, %tmp4
   1580          ret <2 x i64> %tmp5
   1581 }
   1582 
   1583 define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   1584 ;CHECK-LABEL: ssra8b:
   1585 ;CHECK: ssra.8b v0, {{v[0-9]+}}, #1
   1586         %tmp1 = load <8 x i8>* %A
   1587         %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1588         %tmp4 = load <8 x i8>* %B
   1589         %tmp5 = add <8 x i8> %tmp3, %tmp4
   1590         ret <8 x i8> %tmp5
   1591 }
   1592 
   1593 define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   1594 ;CHECK-LABEL: ssra4h:
   1595 ;CHECK: ssra.4h v0, {{v[0-9]+}}, #1
   1596         %tmp1 = load <4 x i16>* %A
   1597         %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
   1598         %tmp4 = load <4 x i16>* %B
   1599         %tmp5 = add <4 x i16> %tmp3, %tmp4
   1600         ret <4 x i16> %tmp5
   1601 }
   1602 
   1603 define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   1604 ;CHECK-LABEL: ssra2s:
   1605 ;CHECK: ssra.2s v0, {{v[0-9]+}}, #1
   1606         %tmp1 = load <2 x i32>* %A
   1607         %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
   1608         %tmp4 = load <2 x i32>* %B
   1609         %tmp5 = add <2 x i32> %tmp3, %tmp4
   1610         ret <2 x i32> %tmp5
   1611 }
   1612 
   1613 define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   1614 ;CHECK-LABEL: ssra16b:
   1615 ;CHECK: ssra.16b v0, {{v[0-9]+}}, #1
   1616         %tmp1 = load <16 x i8>* %A
   1617         %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1618         %tmp4 = load <16 x i8>* %B
   1619         %tmp5 = add <16 x i8> %tmp3, %tmp4
   1620          ret <16 x i8> %tmp5
   1621 }
   1622 
   1623 define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   1624 ;CHECK-LABEL: ssra8h:
   1625 ;CHECK: ssra.8h v0, {{v[0-9]+}}, #1
   1626         %tmp1 = load <8 x i16>* %A
   1627         %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1628         %tmp4 = load <8 x i16>* %B
   1629         %tmp5 = add <8 x i16> %tmp3, %tmp4
   1630          ret <8 x i16> %tmp5
   1631 }
   1632 
   1633 define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   1634 ;CHECK-LABEL: ssra4s:
   1635 ;CHECK: ssra.4s v0, {{v[0-9]+}}, #1
   1636         %tmp1 = load <4 x i32>* %A
   1637         %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
   1638         %tmp4 = load <4 x i32>* %B
   1639         %tmp5 = add <4 x i32> %tmp3, %tmp4
   1640          ret <4 x i32> %tmp5
   1641 }
   1642 
   1643 define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
   1644 ;CHECK-LABEL: ssra2d:
   1645 ;CHECK: ssra.2d v0, {{v[0-9]+}}, #1
   1646         %tmp1 = load <2 x i64>* %A
   1647         %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
   1648         %tmp4 = load <2 x i64>* %B
   1649         %tmp5 = add <2 x i64> %tmp3, %tmp4
   1650          ret <2 x i64> %tmp5
   1651 }
   1652 
   1653 define <8 x i8> @shr_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   1654 ;CHECK-LABEL: shr_orr8b:
   1655 ;CHECK: shr.8b v0, {{v[0-9]+}}, #1
   1656 ;CHECK-NEXT: orr.8b
   1657 ;CHECK-NEXT: ret
   1658         %tmp1 = load <8 x i8>* %A
   1659         %tmp4 = load <8 x i8>* %B
   1660         %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1661         %tmp5 = or <8 x i8> %tmp3, %tmp4
   1662         ret <8 x i8> %tmp5
   1663 }
   1664 
   1665 define <4 x i16> @shr_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   1666 ;CHECK-LABEL: shr_orr4h:
   1667 ;CHECK: shr.4h v0, {{v[0-9]+}}, #1
   1668 ;CHECK-NEXT: orr.8b
   1669 ;CHECK-NEXT: ret
   1670         %tmp1 = load <4 x i16>* %A
   1671         %tmp4 = load <4 x i16>* %B
   1672         %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
   1673         %tmp5 = or <4 x i16> %tmp3, %tmp4
   1674         ret <4 x i16> %tmp5
   1675 }
   1676 
   1677 define <2 x i32> @shr_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   1678 ;CHECK-LABEL: shr_orr2s:
   1679 ;CHECK: shr.2s v0, {{v[0-9]+}}, #1
   1680 ;CHECK-NEXT: orr.8b
   1681 ;CHECK-NEXT: ret
   1682         %tmp1 = load <2 x i32>* %A
   1683         %tmp4 = load <2 x i32>* %B
   1684         %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
   1685         %tmp5 = or <2 x i32> %tmp3, %tmp4
   1686         ret <2 x i32> %tmp5
   1687 }
   1688 
   1689 define <16 x i8> @shr_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   1690 ;CHECK-LABEL: shr_orr16b:
   1691 ;CHECK: shr.16b v0, {{v[0-9]+}}, #1
   1692 ;CHECK-NEXT: orr.16b
   1693 ;CHECK-NEXT: ret
   1694         %tmp1 = load <16 x i8>* %A
   1695         %tmp4 = load <16 x i8>* %B
   1696         %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1697         %tmp5 = or <16 x i8> %tmp3, %tmp4
   1698          ret <16 x i8> %tmp5
   1699 }
   1700 
   1701 define <8 x i16> @shr_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   1702 ;CHECK-LABEL: shr_orr8h:
   1703 ;CHECK: shr.8h v0, {{v[0-9]+}}, #1
   1704 ;CHECK-NEXT: orr.16b
   1705 ;CHECK-NEXT: ret
   1706         %tmp1 = load <8 x i16>* %A
   1707         %tmp4 = load <8 x i16>* %B
   1708         %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1709         %tmp5 = or <8 x i16> %tmp3, %tmp4
   1710          ret <8 x i16> %tmp5
   1711 }
   1712 
   1713 define <4 x i32> @shr_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   1714 ;CHECK-LABEL: shr_orr4s:
   1715 ;CHECK: shr.4s v0, {{v[0-9]+}}, #1
   1716 ;CHECK-NEXT: orr.16b
   1717 ;CHECK-NEXT: ret
   1718         %tmp1 = load <4 x i32>* %A
   1719         %tmp4 = load <4 x i32>* %B
   1720         %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
   1721         %tmp5 = or <4 x i32> %tmp3, %tmp4
   1722          ret <4 x i32> %tmp5
   1723 }
   1724 
   1725 define <2 x i64> @shr_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
   1726 ;CHECK-LABEL: shr_orr2d:
   1727 ;CHECK: shr.2d v0, {{v[0-9]+}}, #1
   1728 ;CHECK-NEXT: orr.16b
   1729 ;CHECK-NEXT: ret
   1730         %tmp1 = load <2 x i64>* %A
   1731         %tmp4 = load <2 x i64>* %B
   1732         %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
   1733         %tmp5 = or <2 x i64> %tmp3, %tmp4
   1734          ret <2 x i64> %tmp5
   1735 }
   1736 
   1737 define <8 x i8> @shl_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   1738 ;CHECK-LABEL: shl_orr8b:
   1739 ;CHECK: shl.8b v0, {{v[0-9]+}}, #1
   1740 ;CHECK-NEXT: orr.8b
   1741 ;CHECK-NEXT: ret
   1742         %tmp1 = load <8 x i8>* %A
   1743         %tmp4 = load <8 x i8>* %B
   1744         %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1745         %tmp5 = or <8 x i8> %tmp3, %tmp4
   1746         ret <8 x i8> %tmp5
   1747 }
   1748 
   1749 define <4 x i16> @shl_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   1750 ;CHECK-LABEL: shl_orr4h:
   1751 ;CHECK: shl.4h v0, {{v[0-9]+}}, #1
   1752 ;CHECK-NEXT: orr.8b
   1753 ;CHECK-NEXT: ret
   1754         %tmp1 = load <4 x i16>* %A
   1755         %tmp4 = load <4 x i16>* %B
   1756         %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
   1757         %tmp5 = or <4 x i16> %tmp3, %tmp4
   1758         ret <4 x i16> %tmp5
   1759 }
   1760 
   1761 define <2 x i32> @shl_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   1762 ;CHECK-LABEL: shl_orr2s:
   1763 ;CHECK: shl.2s v0, {{v[0-9]+}}, #1
   1764 ;CHECK-NEXT: orr.8b
   1765 ;CHECK-NEXT: ret
   1766         %tmp1 = load <2 x i32>* %A
   1767         %tmp4 = load <2 x i32>* %B
   1768         %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
   1769         %tmp5 = or <2 x i32> %tmp3, %tmp4
   1770         ret <2 x i32> %tmp5
   1771 }
   1772 
   1773 define <16 x i8> @shl_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   1774 ;CHECK-LABEL: shl_orr16b:
   1775 ;CHECK: shl.16b v0, {{v[0-9]+}}, #1
   1776 ;CHECK-NEXT: orr.16b
   1777 ;CHECK-NEXT: ret
   1778         %tmp1 = load <16 x i8>* %A
   1779         %tmp4 = load <16 x i8>* %B
   1780         %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   1781         %tmp5 = or <16 x i8> %tmp3, %tmp4
   1782          ret <16 x i8> %tmp5
   1783 }
   1784 
   1785 define <8 x i16> @shl_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   1786 ;CHECK-LABEL: shl_orr8h:
   1787 ;CHECK: shl.8h v0, {{v[0-9]+}}, #1
   1788 ;CHECK-NEXT: orr.16b
   1789 ;CHECK-NEXT: ret
   1790         %tmp1 = load <8 x i16>* %A
   1791         %tmp4 = load <8 x i16>* %B
   1792         %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   1793         %tmp5 = or <8 x i16> %tmp3, %tmp4
   1794          ret <8 x i16> %tmp5
   1795 }
   1796 
   1797 define <4 x i32> @shl_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   1798 ;CHECK-LABEL: shl_orr4s:
   1799 ;CHECK: shl.4s v0, {{v[0-9]+}}, #1
   1800 ;CHECK-NEXT: orr.16b
   1801 ;CHECK-NEXT: ret
   1802         %tmp1 = load <4 x i32>* %A
   1803         %tmp4 = load <4 x i32>* %B
   1804         %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
   1805         %tmp5 = or <4 x i32> %tmp3, %tmp4
   1806          ret <4 x i32> %tmp5
   1807 }
   1808 
   1809 define <2 x i64> @shl_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
   1810 ;CHECK-LABEL: shl_orr2d:
   1811 ;CHECK: shl.2d v0, {{v[0-9]+}}, #1
   1812 ;CHECK-NEXT: orr.16b
   1813 ;CHECK-NEXT: ret
   1814         %tmp1 = load <2 x i64>* %A
   1815         %tmp4 = load <2 x i64>* %B
   1816         %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
   1817         %tmp5 = or <2 x i64> %tmp3, %tmp4
   1818          ret <2 x i64> %tmp5
   1819 }
   1820 
   1821 define <8 x i16> @shll(<8 x i8> %in) {
   1822 ; CHECK-LABEL: shll:
   1823 ; CHECK: shll.8h v0, {{v[0-9]+}}, #8
   1824   %ext = zext <8 x i8> %in to <8 x i16>
   1825   %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   1826   ret <8 x i16> %res
   1827 }
   1828 
   1829 define <4 x i32> @shll_high(<8 x i16> %in) {
   1830 ; CHECK-LABEL: shll_high
   1831 ; CHECK: shll2.4s v0, {{v[0-9]+}}, #16
   1832   %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1833   %ext = zext <4 x i16> %extract to <4 x i32>
   1834   %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
   1835   ret <4 x i32> %res
   1836 }
   1837 
   1838 define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
   1839 ;CHECK-LABEL: sli8b:
   1840 ;CHECK: sli.8b v0, {{v[0-9]+}}, #1
   1841         %tmp1 = load <8 x i8>* %A
   1842         %tmp2 = load <8 x i8>* %B
   1843         %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
   1844         ret <8 x i8> %tmp3
   1845 }
   1846 
   1847 define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   1848 ;CHECK-LABEL: sli4h:
   1849 ;CHECK: sli.4h v0, {{v[0-9]+}}, #1
   1850         %tmp1 = load <4 x i16>* %A
   1851         %tmp2 = load <4 x i16>* %B
   1852         %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
   1853         ret <4 x i16> %tmp3
   1854 }
   1855 
   1856 define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
   1857 ;CHECK-LABEL: sli2s:
   1858 ;CHECK: sli.2s v0, {{v[0-9]+}}, #1
   1859         %tmp1 = load <2 x i32>* %A
   1860         %tmp2 = load <2 x i32>* %B
   1861         %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
   1862         ret <2 x i32> %tmp3
   1863 }
   1864 
   1865 define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
   1866 ;CHECK-LABEL: sli1d:
   1867 ;CHECK: sli d0, {{d[0-9]+}}, #1
   1868         %tmp1 = load <1 x i64>* %A
   1869         %tmp2 = load <1 x i64>* %B
   1870         %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
   1871         ret <1 x i64> %tmp3
   1872 }
   1873 
   1874 define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
   1875 ;CHECK-LABEL: sli16b:
   1876 ;CHECK: sli.16b v0, {{v[0-9]+}}, #1
   1877         %tmp1 = load <16 x i8>* %A
   1878         %tmp2 = load <16 x i8>* %B
   1879         %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
   1880         ret <16 x i8> %tmp3
   1881 }
   1882 
   1883 define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
   1884 ;CHECK-LABEL: sli8h:
   1885 ;CHECK: sli.8h v0, {{v[0-9]+}}, #1
   1886         %tmp1 = load <8 x i16>* %A
   1887         %tmp2 = load <8 x i16>* %B
   1888         %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
   1889         ret <8 x i16> %tmp3
   1890 }
   1891 
   1892 define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   1893 ;CHECK-LABEL: sli4s:
   1894 ;CHECK: sli.4s v0, {{v[0-9]+}}, #1
   1895         %tmp1 = load <4 x i32>* %A
   1896         %tmp2 = load <4 x i32>* %B
   1897         %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
   1898         ret <4 x i32> %tmp3
   1899 }
   1900 
   1901 define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
   1902 ;CHECK-LABEL: sli2d:
   1903 ;CHECK: sli.2d v0, {{v[0-9]+}}, #1
   1904         %tmp1 = load <2 x i64>* %A
   1905         %tmp2 = load <2 x i64>* %B
   1906         %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
   1907         ret <2 x i64> %tmp3
   1908 }
   1909 
   1910 declare <8 x i8>  @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
   1911 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
   1912 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
   1913 declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
   1914 
   1915 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
   1916 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
   1917 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
   1918 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
   1919 
   1920 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
   1921 ; CHECK-LABEL: ashr_v1i64:
   1922 ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
   1923 ; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
   1924   %c = ashr <1 x i64> %a, %b
   1925   ret <1 x i64> %c
   1926 }
   1927