Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vqshls8:
      5 ;CHECK: vqshl.s8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>* %B
      8 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK: vqshls16:
     14 ;CHECK: vqshl.s16
     15 	%tmp1 = load <4 x i16>* %A
     16 	%tmp2 = load <4 x i16>* %B
     17 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     18 	ret <4 x i16> %tmp3
     19 }
     20 
     21 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK: vqshls32:
     23 ;CHECK: vqshl.s32
     24 	%tmp1 = load <2 x i32>* %A
     25 	%tmp2 = load <2 x i32>* %B
     26 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     27 	ret <2 x i32> %tmp3
     28 }
     29 
     30 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     31 ;CHECK: vqshls64:
     32 ;CHECK: vqshl.s64
     33 	%tmp1 = load <1 x i64>* %A
     34 	%tmp2 = load <1 x i64>* %B
     35 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
     36 	ret <1 x i64> %tmp3
     37 }
     38 
     39 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     40 ;CHECK: vqshlu8:
     41 ;CHECK: vqshl.u8
     42 	%tmp1 = load <8 x i8>* %A
     43 	%tmp2 = load <8 x i8>* %B
     44 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     45 	ret <8 x i8> %tmp3
     46 }
     47 
     48 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     49 ;CHECK: vqshlu16:
     50 ;CHECK: vqshl.u16
     51 	%tmp1 = load <4 x i16>* %A
     52 	%tmp2 = load <4 x i16>* %B
     53 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     54 	ret <4 x i16> %tmp3
     55 }
     56 
     57 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     58 ;CHECK: vqshlu32:
     59 ;CHECK: vqshl.u32
     60 	%tmp1 = load <2 x i32>* %A
     61 	%tmp2 = load <2 x i32>* %B
     62 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     63 	ret <2 x i32> %tmp3
     64 }
     65 
     66 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     67 ;CHECK: vqshlu64:
     68 ;CHECK: vqshl.u64
     69 	%tmp1 = load <1 x i64>* %A
     70 	%tmp2 = load <1 x i64>* %B
     71 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
     72 	ret <1 x i64> %tmp3
     73 }
     74 
     75 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     76 ;CHECK: vqshlQs8:
     77 ;CHECK: vqshl.s8
     78 	%tmp1 = load <16 x i8>* %A
     79 	%tmp2 = load <16 x i8>* %B
     80 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     81 	ret <16 x i8> %tmp3
     82 }
     83 
     84 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     85 ;CHECK: vqshlQs16:
     86 ;CHECK: vqshl.s16
     87 	%tmp1 = load <8 x i16>* %A
     88 	%tmp2 = load <8 x i16>* %B
     89 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     90 	ret <8 x i16> %tmp3
     91 }
     92 
     93 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     94 ;CHECK: vqshlQs32:
     95 ;CHECK: vqshl.s32
     96 	%tmp1 = load <4 x i32>* %A
     97 	%tmp2 = load <4 x i32>* %B
     98 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
     99 	ret <4 x i32> %tmp3
    100 }
    101 
    102 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    103 ;CHECK: vqshlQs64:
    104 ;CHECK: vqshl.s64
    105 	%tmp1 = load <2 x i64>* %A
    106 	%tmp2 = load <2 x i64>* %B
    107 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    108 	ret <2 x i64> %tmp3
    109 }
    110 
    111 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    112 ;CHECK: vqshlQu8:
    113 ;CHECK: vqshl.u8
    114 	%tmp1 = load <16 x i8>* %A
    115 	%tmp2 = load <16 x i8>* %B
    116 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    117 	ret <16 x i8> %tmp3
    118 }
    119 
    120 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    121 ;CHECK: vqshlQu16:
    122 ;CHECK: vqshl.u16
    123 	%tmp1 = load <8 x i16>* %A
    124 	%tmp2 = load <8 x i16>* %B
    125 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    126 	ret <8 x i16> %tmp3
    127 }
    128 
    129 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    130 ;CHECK: vqshlQu32:
    131 ;CHECK: vqshl.u32
    132 	%tmp1 = load <4 x i32>* %A
    133 	%tmp2 = load <4 x i32>* %B
    134 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    135 	ret <4 x i32> %tmp3
    136 }
    137 
    138 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    139 ;CHECK: vqshlQu64:
    140 ;CHECK: vqshl.u64
    141 	%tmp1 = load <2 x i64>* %A
    142 	%tmp2 = load <2 x i64>* %B
    143 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    144 	ret <2 x i64> %tmp3
    145 }
    146 
    147 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
    148 ;CHECK: vqshls_n8:
    149 ;CHECK: vqshl.s8{{.*#7}}
    150 	%tmp1 = load <8 x i8>* %A
    151 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
    152 	ret <8 x i8> %tmp2
    153 }
    154 
    155 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
    156 ;CHECK: vqshls_n16:
    157 ;CHECK: vqshl.s16{{.*#15}}
    158 	%tmp1 = load <4 x i16>* %A
    159 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
    160 	ret <4 x i16> %tmp2
    161 }
    162 
    163 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
    164 ;CHECK: vqshls_n32:
    165 ;CHECK: vqshl.s32{{.*#31}}
    166 	%tmp1 = load <2 x i32>* %A
    167 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
    168 	ret <2 x i32> %tmp2
    169 }
    170 
    171 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
    172 ;CHECK: vqshls_n64:
    173 ;CHECK: vqshl.s64{{.*#63}}
    174 	%tmp1 = load <1 x i64>* %A
    175 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
    176 	ret <1 x i64> %tmp2
    177 }
    178 
    179 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
    180 ;CHECK: vqshlu_n8:
    181 ;CHECK: vqshl.u8{{.*#7}}
    182 	%tmp1 = load <8 x i8>* %A
    183 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
    184 	ret <8 x i8> %tmp2
    185 }
    186 
    187 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
    188 ;CHECK: vqshlu_n16:
    189 ;CHECK: vqshl.u16{{.*#15}}
    190 	%tmp1 = load <4 x i16>* %A
    191 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
    192 	ret <4 x i16> %tmp2
    193 }
    194 
    195 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
    196 ;CHECK: vqshlu_n32:
    197 ;CHECK: vqshl.u32{{.*#31}}
    198 	%tmp1 = load <2 x i32>* %A
    199 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
    200 	ret <2 x i32> %tmp2
    201 }
    202 
    203 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
    204 ;CHECK: vqshlu_n64:
    205 ;CHECK: vqshl.u64{{.*#63}}
    206 	%tmp1 = load <1 x i64>* %A
    207 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
    208 	ret <1 x i64> %tmp2
    209 }
    210 
    211 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
    212 ;CHECK: vqshlsu_n8:
    213 ;CHECK: vqshlu.s8
    214 	%tmp1 = load <8 x i8>* %A
    215 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
    216 	ret <8 x i8> %tmp2
    217 }
    218 
    219 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
    220 ;CHECK: vqshlsu_n16:
    221 ;CHECK: vqshlu.s16
    222 	%tmp1 = load <4 x i16>* %A
    223 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
    224 	ret <4 x i16> %tmp2
    225 }
    226 
    227 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
    228 ;CHECK: vqshlsu_n32:
    229 ;CHECK: vqshlu.s32
    230 	%tmp1 = load <2 x i32>* %A
    231 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
    232 	ret <2 x i32> %tmp2
    233 }
    234 
    235 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
    236 ;CHECK: vqshlsu_n64:
    237 ;CHECK: vqshlu.s64
    238 	%tmp1 = load <1 x i64>* %A
    239 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
    240 	ret <1 x i64> %tmp2
    241 }
    242 
    243 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
    244 ;CHECK: vqshlQs_n8:
    245 ;CHECK: vqshl.s8{{.*#7}}
    246 	%tmp1 = load <16 x i8>* %A
    247 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
    248 	ret <16 x i8> %tmp2
    249 }
    250 
    251 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
    252 ;CHECK: vqshlQs_n16:
    253 ;CHECK: vqshl.s16{{.*#15}}
    254 	%tmp1 = load <8 x i16>* %A
    255 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
    256 	ret <8 x i16> %tmp2
    257 }
    258 
    259 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
    260 ;CHECK: vqshlQs_n32:
    261 ;CHECK: vqshl.s32{{.*#31}}
    262 	%tmp1 = load <4 x i32>* %A
    263 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
    264 	ret <4 x i32> %tmp2
    265 }
    266 
    267 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
    268 ;CHECK: vqshlQs_n64:
    269 ;CHECK: vqshl.s64{{.*#63}}
    270 	%tmp1 = load <2 x i64>* %A
    271 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
    272 	ret <2 x i64> %tmp2
    273 }
    274 
    275 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
    276 ;CHECK: vqshlQu_n8:
    277 ;CHECK: vqshl.u8{{.*#7}}
    278 	%tmp1 = load <16 x i8>* %A
    279 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
    280 	ret <16 x i8> %tmp2
    281 }
    282 
    283 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
    284 ;CHECK: vqshlQu_n16:
    285 ;CHECK: vqshl.u16{{.*#15}}
    286 	%tmp1 = load <8 x i16>* %A
    287 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
    288 	ret <8 x i16> %tmp2
    289 }
    290 
    291 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
    292 ;CHECK: vqshlQu_n32:
    293 ;CHECK: vqshl.u32{{.*#31}}
    294 	%tmp1 = load <4 x i32>* %A
    295 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
    296 	ret <4 x i32> %tmp2
    297 }
    298 
    299 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
    300 ;CHECK: vqshlQu_n64:
    301 ;CHECK: vqshl.u64{{.*#63}}
    302 	%tmp1 = load <2 x i64>* %A
    303 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
    304 	ret <2 x i64> %tmp2
    305 }
    306 
    307 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
    308 ;CHECK: vqshlQsu_n8:
    309 ;CHECK: vqshlu.s8
    310 	%tmp1 = load <16 x i8>* %A
    311 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
    312 	ret <16 x i8> %tmp2
    313 }
    314 
    315 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
    316 ;CHECK: vqshlQsu_n16:
    317 ;CHECK: vqshlu.s16
    318 	%tmp1 = load <8 x i16>* %A
    319 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
    320 	ret <8 x i16> %tmp2
    321 }
    322 
    323 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
    324 ;CHECK: vqshlQsu_n32:
    325 ;CHECK: vqshlu.s32
    326 	%tmp1 = load <4 x i32>* %A
    327 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
    328 	ret <4 x i32> %tmp2
    329 }
    330 
    331 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
    332 ;CHECK: vqshlQsu_n64:
    333 ;CHECK: vqshlu.s64
    334 	%tmp1 = load <2 x i64>* %A
    335 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
    336 	ret <2 x i64> %tmp2
    337 }
    338 
    339 declare <8 x i8>  @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    340 declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    341 declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    342 declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    343 
    344 declare <8 x i8>  @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    345 declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    346 declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    347 declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    348 
    349 declare <8 x i8>  @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    350 declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    351 declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    352 declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    353 
    354 declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    355 declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    356 declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    357 declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    358 
    359 declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    360 declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    361 declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    362 declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    363 
    364 declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    365 declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    366 declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    367 declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    368 
    369 define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    370 ;CHECK: vqrshls8:
    371 ;CHECK: vqrshl.s8
    372 	%tmp1 = load <8 x i8>* %A
    373 	%tmp2 = load <8 x i8>* %B
    374 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    375 	ret <8 x i8> %tmp3
    376 }
    377 
    378 define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    379 ;CHECK: vqrshls16:
    380 ;CHECK: vqrshl.s16
    381 	%tmp1 = load <4 x i16>* %A
    382 	%tmp2 = load <4 x i16>* %B
    383 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    384 	ret <4 x i16> %tmp3
    385 }
    386 
    387 define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    388 ;CHECK: vqrshls32:
    389 ;CHECK: vqrshl.s32
    390 	%tmp1 = load <2 x i32>* %A
    391 	%tmp2 = load <2 x i32>* %B
    392 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    393 	ret <2 x i32> %tmp3
    394 }
    395 
    396 define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
    397 ;CHECK: vqrshls64:
    398 ;CHECK: vqrshl.s64
    399 	%tmp1 = load <1 x i64>* %A
    400 	%tmp2 = load <1 x i64>* %B
    401 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
    402 	ret <1 x i64> %tmp3
    403 }
    404 
    405 define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    406 ;CHECK: vqrshlu8:
    407 ;CHECK: vqrshl.u8
    408 	%tmp1 = load <8 x i8>* %A
    409 	%tmp2 = load <8 x i8>* %B
    410 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    411 	ret <8 x i8> %tmp3
    412 }
    413 
    414 define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    415 ;CHECK: vqrshlu16:
    416 ;CHECK: vqrshl.u16
    417 	%tmp1 = load <4 x i16>* %A
    418 	%tmp2 = load <4 x i16>* %B
    419 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    420 	ret <4 x i16> %tmp3
    421 }
    422 
    423 define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    424 ;CHECK: vqrshlu32:
    425 ;CHECK: vqrshl.u32
    426 	%tmp1 = load <2 x i32>* %A
    427 	%tmp2 = load <2 x i32>* %B
    428 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    429 	ret <2 x i32> %tmp3
    430 }
    431 
    432 define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
    433 ;CHECK: vqrshlu64:
    434 ;CHECK: vqrshl.u64
    435 	%tmp1 = load <1 x i64>* %A
    436 	%tmp2 = load <1 x i64>* %B
    437 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
    438 	ret <1 x i64> %tmp3
    439 }
    440 
    441 define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    442 ;CHECK: vqrshlQs8:
    443 ;CHECK: vqrshl.s8
    444 	%tmp1 = load <16 x i8>* %A
    445 	%tmp2 = load <16 x i8>* %B
    446 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    447 	ret <16 x i8> %tmp3
    448 }
    449 
    450 define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    451 ;CHECK: vqrshlQs16:
    452 ;CHECK: vqrshl.s16
    453 	%tmp1 = load <8 x i16>* %A
    454 	%tmp2 = load <8 x i16>* %B
    455 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    456 	ret <8 x i16> %tmp3
    457 }
    458 
    459 define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    460 ;CHECK: vqrshlQs32:
    461 ;CHECK: vqrshl.s32
    462 	%tmp1 = load <4 x i32>* %A
    463 	%tmp2 = load <4 x i32>* %B
    464 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    465 	ret <4 x i32> %tmp3
    466 }
    467 
    468 define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    469 ;CHECK: vqrshlQs64:
    470 ;CHECK: vqrshl.s64
    471 	%tmp1 = load <2 x i64>* %A
    472 	%tmp2 = load <2 x i64>* %B
    473 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    474 	ret <2 x i64> %tmp3
    475 }
    476 
    477 define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    478 ;CHECK: vqrshlQu8:
    479 ;CHECK: vqrshl.u8
    480 	%tmp1 = load <16 x i8>* %A
    481 	%tmp2 = load <16 x i8>* %B
    482 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    483 	ret <16 x i8> %tmp3
    484 }
    485 
    486 define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    487 ;CHECK: vqrshlQu16:
    488 ;CHECK: vqrshl.u16
    489 	%tmp1 = load <8 x i16>* %A
    490 	%tmp2 = load <8 x i16>* %B
    491 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    492 	ret <8 x i16> %tmp3
    493 }
    494 
    495 define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    496 ;CHECK: vqrshlQu32:
    497 ;CHECK: vqrshl.u32
    498 	%tmp1 = load <4 x i32>* %A
    499 	%tmp2 = load <4 x i32>* %B
    500 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    501 	ret <4 x i32> %tmp3
    502 }
    503 
    504 define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    505 ;CHECK: vqrshlQu64:
    506 ;CHECK: vqrshl.u64
    507 	%tmp1 = load <2 x i64>* %A
    508 	%tmp2 = load <2 x i64>* %B
    509 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
    510 	ret <2 x i64> %tmp3
    511 }
    512 
    513 declare <8 x i8>  @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    514 declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    515 declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    516 declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    517 
    518 declare <8 x i8>  @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    519 declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    520 declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    521 declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    522 
    523 declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    524 declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    525 declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    526 declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    527 
    528 declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    529 declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    530 declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    531 declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    532