1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4 ;CHECK: vqshls8: 5 ;CHECK: vqshl.s8 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = load <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10 } 11 12 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13 ;CHECK: vqshls16: 14 ;CHECK: vqshl.s16 15 %tmp1 = load <4 x i16>* %A 16 %tmp2 = load <4 x i16>* %B 17 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 18 ret <4 x i16> %tmp3 19 } 20 21 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22 ;CHECK: vqshls32: 23 ;CHECK: vqshl.s32 24 %tmp1 = load <2 x i32>* %A 25 %tmp2 = load <2 x i32>* %B 26 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 27 ret <2 x i32> %tmp3 28 } 29 30 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 31 ;CHECK: vqshls64: 32 ;CHECK: vqshl.s64 33 %tmp1 = load <1 x i64>* %A 34 %tmp2 = load <1 x i64>* %B 35 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 36 ret <1 x i64> %tmp3 37 } 38 39 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 40 ;CHECK: vqshlu8: 41 ;CHECK: vqshl.u8 42 %tmp1 = load <8 x i8>* %A 43 %tmp2 = load <8 x i8>* %B 44 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 45 ret <8 x i8> %tmp3 46 } 47 48 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 49 ;CHECK: vqshlu16: 50 ;CHECK: vqshl.u16 51 %tmp1 = load <4 x i16>* %A 52 %tmp2 = load <4 x i16>* %B 53 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 54 ret <4 x i16> %tmp3 55 } 56 57 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 58 ;CHECK: vqshlu32: 59 ;CHECK: vqshl.u32 60 %tmp1 = load <2 x i32>* %A 61 %tmp2 = load <2 x i32>* %B 62 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 63 ret <2 x i32> %tmp3 64 } 65 66 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 67 ;CHECK: vqshlu64: 68 ;CHECK: vqshl.u64 69 %tmp1 = load <1 x i64>* %A 70 %tmp2 = load <1 x i64>* %B 71 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 72 ret <1 x i64> %tmp3 73 } 74 75 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 76 ;CHECK: vqshlQs8: 77 ;CHECK: vqshl.s8 78 %tmp1 = load <16 x i8>* %A 79 %tmp2 = load <16 x i8>* %B 80 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 81 ret <16 x i8> %tmp3 82 } 83 84 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 85 ;CHECK: vqshlQs16: 86 ;CHECK: vqshl.s16 87 %tmp1 = load <8 x i16>* %A 88 %tmp2 = load <8 x i16>* %B 89 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 90 ret <8 x i16> %tmp3 91 } 92 93 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 94 ;CHECK: vqshlQs32: 95 ;CHECK: vqshl.s32 96 %tmp1 = load <4 x i32>* %A 97 %tmp2 = load <4 x i32>* %B 98 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 99 ret <4 x i32> %tmp3 100 } 101 102 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 103 ;CHECK: vqshlQs64: 104 ;CHECK: vqshl.s64 105 %tmp1 = load <2 x i64>* %A 106 %tmp2 = load <2 x i64>* %B 107 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 108 ret <2 x i64> %tmp3 109 } 110 111 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 112 ;CHECK: vqshlQu8: 113 ;CHECK: vqshl.u8 114 %tmp1 = load <16 x i8>* %A 115 %tmp2 = load <16 x i8>* %B 116 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 117 ret <16 x i8> %tmp3 118 } 119 120 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 121 ;CHECK: vqshlQu16: 122 ;CHECK: vqshl.u16 123 %tmp1 = load <8 x i16>* %A 124 %tmp2 = load <8 x i16>* %B 125 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 126 ret <8 x i16> %tmp3 127 } 128 129 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 130 ;CHECK: vqshlQu32: 131 ;CHECK: vqshl.u32 132 %tmp1 = load <4 x i32>* %A 133 %tmp2 = load <4 x i32>* %B 134 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 135 ret <4 x i32> %tmp3 136 } 137 138 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 139 ;CHECK: vqshlQu64: 140 ;CHECK: vqshl.u64 141 %tmp1 = load <2 x i64>* %A 142 %tmp2 = load <2 x i64>* %B 143 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 144 ret <2 x i64> %tmp3 145 } 146 147 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind { 148 ;CHECK: vqshls_n8: 149 ;CHECK: vqshl.s8{{.*#7}} 150 %tmp1 = load <8 x i8>* %A 151 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 152 ret <8 x i8> %tmp2 153 } 154 155 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind { 156 ;CHECK: vqshls_n16: 157 ;CHECK: vqshl.s16{{.*#15}} 158 %tmp1 = load <4 x i16>* %A 159 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) 160 ret <4 x i16> %tmp2 161 } 162 163 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind { 164 ;CHECK: vqshls_n32: 165 ;CHECK: vqshl.s32{{.*#31}} 166 %tmp1 = load <2 x i32>* %A 167 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) 168 ret <2 x i32> %tmp2 169 } 170 171 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind { 172 ;CHECK: vqshls_n64: 173 ;CHECK: vqshl.s64{{.*#63}} 174 %tmp1 = load <1 x i64>* %A 175 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) 176 ret <1 x i64> %tmp2 177 } 178 179 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind { 180 ;CHECK: vqshlu_n8: 181 ;CHECK: vqshl.u8{{.*#7}} 182 %tmp1 = load <8 x i8>* %A 183 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 184 ret <8 x i8> %tmp2 185 } 186 187 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind { 188 ;CHECK: vqshlu_n16: 189 ;CHECK: vqshl.u16{{.*#15}} 190 %tmp1 = load <4 x i16>* %A 191 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) 192 ret <4 x i16> %tmp2 193 } 194 195 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind { 196 ;CHECK: vqshlu_n32: 197 ;CHECK: vqshl.u32{{.*#31}} 198 %tmp1 = load <2 x i32>* %A 199 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) 200 ret <2 x i32> %tmp2 201 } 202 203 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind { 204 ;CHECK: vqshlu_n64: 205 ;CHECK: vqshl.u64{{.*#63}} 206 %tmp1 = load <1 x i64>* %A 207 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) 208 ret <1 x i64> %tmp2 209 } 210 211 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind { 212 ;CHECK: vqshlsu_n8: 213 ;CHECK: vqshlu.s8 214 %tmp1 = load <8 x i8>* %A 215 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 216 ret <8 x i8> %tmp2 217 } 218 219 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind { 220 ;CHECK: vqshlsu_n16: 221 ;CHECK: vqshlu.s16 222 %tmp1 = load <4 x i16>* %A 223 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) 224 ret <4 x i16> %tmp2 225 } 226 227 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind { 228 ;CHECK: vqshlsu_n32: 229 ;CHECK: vqshlu.s32 230 %tmp1 = load <2 x i32>* %A 231 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) 232 ret <2 x i32> %tmp2 233 } 234 235 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind { 236 ;CHECK: vqshlsu_n64: 237 ;CHECK: vqshlu.s64 238 %tmp1 = load <1 x i64>* %A 239 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) 240 ret <1 x i64> %tmp2 241 } 242 243 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind { 244 ;CHECK: vqshlQs_n8: 245 ;CHECK: vqshl.s8{{.*#7}} 246 %tmp1 = load <16 x i8>* %A 247 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 248 ret <16 x i8> %tmp2 249 } 250 251 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind { 252 ;CHECK: vqshlQs_n16: 253 ;CHECK: vqshl.s16{{.*#15}} 254 %tmp1 = load <8 x i16>* %A 255 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) 256 ret <8 x i16> %tmp2 257 } 258 259 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind { 260 ;CHECK: vqshlQs_n32: 261 ;CHECK: vqshl.s32{{.*#31}} 262 %tmp1 = load <4 x i32>* %A 263 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) 264 ret <4 x i32> %tmp2 265 } 266 267 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind { 268 ;CHECK: vqshlQs_n64: 269 ;CHECK: vqshl.s64{{.*#63}} 270 %tmp1 = load <2 x i64>* %A 271 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) 272 ret <2 x i64> %tmp2 273 } 274 275 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind { 276 ;CHECK: vqshlQu_n8: 277 ;CHECK: vqshl.u8{{.*#7}} 278 %tmp1 = load <16 x i8>* %A 279 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 280 ret <16 x i8> %tmp2 281 } 282 283 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind { 284 ;CHECK: vqshlQu_n16: 285 ;CHECK: vqshl.u16{{.*#15}} 286 %tmp1 = load <8 x i16>* %A 287 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) 288 ret <8 x i16> %tmp2 289 } 290 291 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind { 292 ;CHECK: vqshlQu_n32: 293 ;CHECK: vqshl.u32{{.*#31}} 294 %tmp1 = load <4 x i32>* %A 295 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) 296 ret <4 x i32> %tmp2 297 } 298 299 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind { 300 ;CHECK: vqshlQu_n64: 301 ;CHECK: vqshl.u64{{.*#63}} 302 %tmp1 = load <2 x i64>* %A 303 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) 304 ret <2 x i64> %tmp2 305 } 306 307 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind { 308 ;CHECK: vqshlQsu_n8: 309 ;CHECK: vqshlu.s8 310 %tmp1 = load <16 x i8>* %A 311 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 312 ret <16 x i8> %tmp2 313 } 314 315 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind { 316 ;CHECK: vqshlQsu_n16: 317 ;CHECK: vqshlu.s16 318 %tmp1 = load <8 x i16>* %A 319 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) 320 ret <8 x i16> %tmp2 321 } 322 323 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind { 324 ;CHECK: vqshlQsu_n32: 325 ;CHECK: vqshlu.s32 326 %tmp1 = load <4 x i32>* %A 327 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) 328 ret <4 x i32> %tmp2 329 } 330 331 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind { 332 ;CHECK: vqshlQsu_n64: 333 ;CHECK: vqshlu.s64 334 %tmp1 = load <2 x i64>* %A 335 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) 336 ret <2 x i64> %tmp2 337 } 338 339 declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 340 declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 341 declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 342 declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 343 344 declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 345 declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 346 declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 347 declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 348 349 declare <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 350 declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 351 declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 352 declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 353 354 declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 355 declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 356 declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 357 declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 358 359 declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 360 declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 361 declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 362 declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 363 364 declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 365 declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 366 declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 367 declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 368 369 define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 370 ;CHECK: vqrshls8: 371 ;CHECK: vqrshl.s8 372 %tmp1 = load <8 x i8>* %A 373 %tmp2 = load <8 x i8>* %B 374 %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 375 ret <8 x i8> %tmp3 376 } 377 378 define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 379 ;CHECK: vqrshls16: 380 ;CHECK: vqrshl.s16 381 %tmp1 = load <4 x i16>* %A 382 %tmp2 = load <4 x i16>* %B 383 %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 384 ret <4 x i16> %tmp3 385 } 386 387 define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 388 ;CHECK: vqrshls32: 389 ;CHECK: vqrshl.s32 390 %tmp1 = load <2 x i32>* %A 391 %tmp2 = load <2 x i32>* %B 392 %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 393 ret <2 x i32> %tmp3 394 } 395 396 define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 397 ;CHECK: vqrshls64: 398 ;CHECK: vqrshl.s64 399 %tmp1 = load <1 x i64>* %A 400 %tmp2 = load <1 x i64>* %B 401 %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 402 ret <1 x i64> %tmp3 403 } 404 405 define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 406 ;CHECK: vqrshlu8: 407 ;CHECK: vqrshl.u8 408 %tmp1 = load <8 x i8>* %A 409 %tmp2 = load <8 x i8>* %B 410 %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 411 ret <8 x i8> %tmp3 412 } 413 414 define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 415 ;CHECK: vqrshlu16: 416 ;CHECK: vqrshl.u16 417 %tmp1 = load <4 x i16>* %A 418 %tmp2 = load <4 x i16>* %B 419 %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 420 ret <4 x i16> %tmp3 421 } 422 423 define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 424 ;CHECK: vqrshlu32: 425 ;CHECK: vqrshl.u32 426 %tmp1 = load <2 x i32>* %A 427 %tmp2 = load <2 x i32>* %B 428 %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 429 ret <2 x i32> %tmp3 430 } 431 432 define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 433 ;CHECK: vqrshlu64: 434 ;CHECK: vqrshl.u64 435 %tmp1 = load <1 x i64>* %A 436 %tmp2 = load <1 x i64>* %B 437 %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 438 ret <1 x i64> %tmp3 439 } 440 441 define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 442 ;CHECK: vqrshlQs8: 443 ;CHECK: vqrshl.s8 444 %tmp1 = load <16 x i8>* %A 445 %tmp2 = load <16 x i8>* %B 446 %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 447 ret <16 x i8> %tmp3 448 } 449 450 define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 451 ;CHECK: vqrshlQs16: 452 ;CHECK: vqrshl.s16 453 %tmp1 = load <8 x i16>* %A 454 %tmp2 = load <8 x i16>* %B 455 %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 456 ret <8 x i16> %tmp3 457 } 458 459 define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 460 ;CHECK: vqrshlQs32: 461 ;CHECK: vqrshl.s32 462 %tmp1 = load <4 x i32>* %A 463 %tmp2 = load <4 x i32>* %B 464 %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 465 ret <4 x i32> %tmp3 466 } 467 468 define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 469 ;CHECK: vqrshlQs64: 470 ;CHECK: vqrshl.s64 471 %tmp1 = load <2 x i64>* %A 472 %tmp2 = load <2 x i64>* %B 473 %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 474 ret <2 x i64> %tmp3 475 } 476 477 define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 478 ;CHECK: vqrshlQu8: 479 ;CHECK: vqrshl.u8 480 %tmp1 = load <16 x i8>* %A 481 %tmp2 = load <16 x i8>* %B 482 %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 483 ret <16 x i8> %tmp3 484 } 485 486 define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 487 ;CHECK: vqrshlQu16: 488 ;CHECK: vqrshl.u16 489 %tmp1 = load <8 x i16>* %A 490 %tmp2 = load <8 x i16>* %B 491 %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 492 ret <8 x i16> %tmp3 493 } 494 495 define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 496 ;CHECK: vqrshlQu32: 497 ;CHECK: vqrshl.u32 498 %tmp1 = load <4 x i32>* %A 499 %tmp2 = load <4 x i32>* %B 500 %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 501 ret <4 x i32> %tmp3 502 } 503 504 define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 505 ;CHECK: vqrshlQu64: 506 ;CHECK: vqrshl.u64 507 %tmp1 = load <2 x i64>* %A 508 %tmp2 = load <2 x i64>* %B 509 %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 510 ret <2 x i64> %tmp3 511 } 512 513 declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 514 declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 515 declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 516 declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 517 518 declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 519 declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 520 declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 521 declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 522 523 declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 524 declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 525 declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 526 declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 527 528 declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 529 declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 530 declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 531 declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 532