1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s 2 3 define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4 ;CHECK-LABEL: sqshl8b: 5 ;CHECK: sqshl.8b 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = load <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10 } 11 12 define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13 ;CHECK-LABEL: sqshl4h: 14 ;CHECK: sqshl.4h 15 %tmp1 = load <4 x i16>* %A 16 %tmp2 = load <4 x i16>* %B 17 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 18 ret <4 x i16> %tmp3 19 } 20 21 define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22 ;CHECK-LABEL: sqshl2s: 23 ;CHECK: sqshl.2s 24 %tmp1 = load <2 x i32>* %A 25 %tmp2 = load <2 x i32>* %B 26 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 27 ret <2 x i32> %tmp3 28 } 29 30 define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 31 ;CHECK-LABEL: uqshl8b: 32 ;CHECK: uqshl.8b 33 %tmp1 = load <8 x i8>* %A 34 %tmp2 = load <8 x i8>* %B 35 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 36 ret <8 x i8> %tmp3 37 } 38 39 define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 40 ;CHECK-LABEL: uqshl4h: 41 ;CHECK: uqshl.4h 42 %tmp1 = load <4 x i16>* %A 43 %tmp2 = load <4 x i16>* %B 44 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 45 ret <4 x i16> %tmp3 46 } 47 48 define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 49 ;CHECK-LABEL: uqshl2s: 50 ;CHECK: uqshl.2s 51 %tmp1 = load <2 x i32>* %A 52 %tmp2 = load <2 x i32>* %B 53 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 54 ret <2 x i32> %tmp3 55 } 56 57 define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 58 ;CHECK-LABEL: sqshl16b: 59 ;CHECK: sqshl.16b 60 %tmp1 = load <16 x i8>* %A 61 %tmp2 = load <16 x i8>* %B 62 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 63 ret <16 x i8> %tmp3 64 } 65 66 define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 67 ;CHECK-LABEL: sqshl8h: 68 ;CHECK: sqshl.8h 69 %tmp1 = load <8 x i16>* %A 70 %tmp2 = load <8 x i16>* %B 71 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 72 ret <8 x i16> %tmp3 73 } 74 75 define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 76 ;CHECK-LABEL: sqshl4s: 77 ;CHECK: sqshl.4s 78 %tmp1 = load <4 x i32>* %A 79 %tmp2 = load <4 x i32>* %B 80 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 81 ret <4 x i32> %tmp3 82 } 83 84 define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 85 ;CHECK-LABEL: sqshl2d: 86 ;CHECK: sqshl.2d 87 %tmp1 = load <2 x i64>* %A 88 %tmp2 = load <2 x i64>* %B 89 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 90 ret <2 x i64> %tmp3 91 } 92 93 define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 94 ;CHECK-LABEL: uqshl16b: 95 ;CHECK: uqshl.16b 96 %tmp1 = load <16 x i8>* %A 97 %tmp2 = load <16 x i8>* %B 98 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 99 ret <16 x i8> %tmp3 100 } 101 102 define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 103 ;CHECK-LABEL: uqshl8h: 104 ;CHECK: uqshl.8h 105 %tmp1 = load <8 x i16>* %A 106 %tmp2 = load <8 x i16>* %B 107 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 108 ret <8 x i16> %tmp3 109 } 110 111 define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 112 ;CHECK-LABEL: uqshl4s: 113 ;CHECK: uqshl.4s 114 %tmp1 = load <4 x i32>* %A 115 %tmp2 = load <4 x i32>* %B 116 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 117 ret <4 x i32> %tmp3 118 } 119 120 define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 121 ;CHECK-LABEL: uqshl2d: 122 ;CHECK: uqshl.2d 123 %tmp1 = load <2 x i64>* %A 124 %tmp2 = load <2 x i64>* %B 125 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 126 ret <2 x i64> %tmp3 127 } 128 129 declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 130 declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 131 declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 132 declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 133 134 declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 135 declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 136 declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 137 declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 138 139 declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 140 declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 141 declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 142 declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 143 144 declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 145 declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 146 declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 147 declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 148 149 define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 150 ;CHECK-LABEL: srshl8b: 151 ;CHECK: srshl.8b 152 %tmp1 = load <8 x i8>* %A 153 %tmp2 = load <8 x i8>* %B 154 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 155 ret <8 x i8> %tmp3 156 } 157 158 define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 159 ;CHECK-LABEL: srshl4h: 160 ;CHECK: srshl.4h 161 %tmp1 = load <4 x i16>* %A 162 %tmp2 = load <4 x i16>* %B 163 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 164 ret <4 x i16> %tmp3 165 } 166 167 define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 168 ;CHECK-LABEL: srshl2s: 169 ;CHECK: srshl.2s 170 %tmp1 = load <2 x i32>* %A 171 %tmp2 = load <2 x i32>* %B 172 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 173 ret <2 x i32> %tmp3 174 } 175 176 define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 177 ;CHECK-LABEL: urshl8b: 178 ;CHECK: urshl.8b 179 %tmp1 = load <8 x i8>* %A 180 %tmp2 = load <8 x i8>* %B 181 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 182 ret <8 x i8> %tmp3 183 } 184 185 define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 186 ;CHECK-LABEL: urshl4h: 187 ;CHECK: urshl.4h 188 %tmp1 = load <4 x i16>* %A 189 %tmp2 = load <4 x i16>* %B 190 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 191 ret <4 x i16> %tmp3 192 } 193 194 define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 195 ;CHECK-LABEL: urshl2s: 196 ;CHECK: urshl.2s 197 %tmp1 = load <2 x i32>* %A 198 %tmp2 = load <2 x i32>* %B 199 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 200 ret <2 x i32> %tmp3 201 } 202 203 define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 204 ;CHECK-LABEL: srshl16b: 205 ;CHECK: srshl.16b 206 %tmp1 = load <16 x i8>* %A 207 %tmp2 = load <16 x i8>* %B 208 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 209 ret <16 x i8> %tmp3 210 } 211 212 define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 213 ;CHECK-LABEL: srshl8h: 214 ;CHECK: srshl.8h 215 %tmp1 = load <8 x i16>* %A 216 %tmp2 = load <8 x i16>* %B 217 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 218 ret <8 x i16> %tmp3 219 } 220 221 define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 222 ;CHECK-LABEL: srshl4s: 223 ;CHECK: srshl.4s 224 %tmp1 = load <4 x i32>* %A 225 %tmp2 = load <4 x i32>* %B 226 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 227 ret <4 x i32> %tmp3 228 } 229 230 define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 231 ;CHECK-LABEL: srshl2d: 232 ;CHECK: srshl.2d 233 %tmp1 = load <2 x i64>* %A 234 %tmp2 = load <2 x i64>* %B 235 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 236 ret <2 x i64> %tmp3 237 } 238 239 define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 240 ;CHECK-LABEL: urshl16b: 241 ;CHECK: urshl.16b 242 %tmp1 = load <16 x i8>* %A 243 %tmp2 = load <16 x i8>* %B 244 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 245 ret <16 x i8> %tmp3 246 } 247 248 define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 249 ;CHECK-LABEL: urshl8h: 250 ;CHECK: urshl.8h 251 %tmp1 = load <8 x i16>* %A 252 %tmp2 = load <8 x i16>* %B 253 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 254 ret <8 x i16> %tmp3 255 } 256 257 define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 258 ;CHECK-LABEL: urshl4s: 259 ;CHECK: urshl.4s 260 %tmp1 = load <4 x i32>* %A 261 %tmp2 = load <4 x i32>* %B 262 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 263 ret <4 x i32> %tmp3 264 } 265 266 define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 267 ;CHECK-LABEL: urshl2d: 268 ;CHECK: urshl.2d 269 %tmp1 = load <2 x i64>* %A 270 %tmp2 = load <2 x i64>* %B 271 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 272 ret <2 x i64> %tmp3 273 } 274 275 declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 276 declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 277 declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 278 declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 279 280 declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 281 declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 282 declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 283 declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 284 285 declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 286 declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 287 declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 288 declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 289 290 declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 291 declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 292 declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 293 declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 294 295 define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 296 ;CHECK-LABEL: sqrshl8b: 297 ;CHECK: sqrshl.8b 298 %tmp1 = load <8 x i8>* %A 299 %tmp2 = load <8 x i8>* %B 300 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 301 ret <8 x i8> %tmp3 302 } 303 304 define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 305 ;CHECK-LABEL: sqrshl4h: 306 ;CHECK: sqrshl.4h 307 %tmp1 = load <4 x i16>* %A 308 %tmp2 = load <4 x i16>* %B 309 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 310 ret <4 x i16> %tmp3 311 } 312 313 define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 314 ;CHECK-LABEL: sqrshl2s: 315 ;CHECK: sqrshl.2s 316 %tmp1 = load <2 x i32>* %A 317 %tmp2 = load <2 x i32>* %B 318 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 319 ret <2 x i32> %tmp3 320 } 321 322 define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 323 ;CHECK-LABEL: uqrshl8b: 324 ;CHECK: uqrshl.8b 325 %tmp1 = load <8 x i8>* %A 326 %tmp2 = load <8 x i8>* %B 327 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 328 ret <8 x i8> %tmp3 329 } 330 331 define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 332 ;CHECK-LABEL: uqrshl4h: 333 ;CHECK: uqrshl.4h 334 %tmp1 = load <4 x i16>* %A 335 %tmp2 = load <4 x i16>* %B 336 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 337 ret <4 x i16> %tmp3 338 } 339 340 define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 341 ;CHECK-LABEL: uqrshl2s: 342 ;CHECK: uqrshl.2s 343 %tmp1 = load <2 x i32>* %A 344 %tmp2 = load <2 x i32>* %B 345 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 346 ret <2 x i32> %tmp3 347 } 348 349 define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 350 ;CHECK-LABEL: sqrshl16b: 351 ;CHECK: sqrshl.16b 352 %tmp1 = load <16 x i8>* %A 353 %tmp2 = load <16 x i8>* %B 354 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 355 ret <16 x i8> %tmp3 356 } 357 358 define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 359 ;CHECK-LABEL: sqrshl8h: 360 ;CHECK: sqrshl.8h 361 %tmp1 = load <8 x i16>* %A 362 %tmp2 = load <8 x i16>* %B 363 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 364 ret <8 x i16> %tmp3 365 } 366 367 define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 368 ;CHECK-LABEL: sqrshl4s: 369 ;CHECK: sqrshl.4s 370 %tmp1 = load <4 x i32>* %A 371 %tmp2 = load <4 x i32>* %B 372 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 373 ret <4 x i32> %tmp3 374 } 375 376 define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 377 ;CHECK-LABEL: sqrshl2d: 378 ;CHECK: sqrshl.2d 379 %tmp1 = load <2 x i64>* %A 380 %tmp2 = load <2 x i64>* %B 381 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 382 ret <2 x i64> %tmp3 383 } 384 385 define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 386 ;CHECK-LABEL: uqrshl16b: 387 ;CHECK: uqrshl.16b 388 %tmp1 = load <16 x i8>* %A 389 %tmp2 = load <16 x i8>* %B 390 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 391 ret <16 x i8> %tmp3 392 } 393 394 define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 395 ;CHECK-LABEL: uqrshl8h: 396 ;CHECK: uqrshl.8h 397 %tmp1 = load <8 x i16>* %A 398 %tmp2 = load <8 x i16>* %B 399 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 400 ret <8 x i16> %tmp3 401 } 402 403 define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 404 ;CHECK-LABEL: uqrshl4s: 405 ;CHECK: uqrshl.4s 406 %tmp1 = load <4 x i32>* %A 407 %tmp2 = load <4 x i32>* %B 408 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 409 ret <4 x i32> %tmp3 410 } 411 412 define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 413 ;CHECK-LABEL: uqrshl2d: 414 ;CHECK: uqrshl.2d 415 %tmp1 = load <2 x i64>* %A 416 %tmp2 = load <2 x i64>* %B 417 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 418 ret <2 x i64> %tmp3 419 } 420 421 declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 422 declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 423 declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 424 declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 425 426 declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 427 declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 428 declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 429 declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 430 431 declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 432 declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 433 declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 434 declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 435 436 declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 437 declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 438 declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 439 declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 440 441 define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind { 442 ;CHECK-LABEL: urshr8b: 443 ;CHECK: urshr.8b 444 %tmp1 = load <8 x i8>* %A 445 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 446 ret <8 x i8> %tmp3 447 } 448 449 define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind { 450 ;CHECK-LABEL: urshr4h: 451 ;CHECK: urshr.4h 452 %tmp1 = load <4 x i16>* %A 453 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 454 ret <4 x i16> %tmp3 455 } 456 457 define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind { 458 ;CHECK-LABEL: urshr2s: 459 ;CHECK: urshr.2s 460 %tmp1 = load <2 x i32>* %A 461 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 462 ret <2 x i32> %tmp3 463 } 464 465 define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind { 466 ;CHECK-LABEL: urshr16b: 467 ;CHECK: urshr.16b 468 %tmp1 = load <16 x i8>* %A 469 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 470 ret <16 x i8> %tmp3 471 } 472 473 define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind { 474 ;CHECK-LABEL: urshr8h: 475 ;CHECK: urshr.8h 476 %tmp1 = load <8 x i16>* %A 477 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 478 ret <8 x i16> %tmp3 479 } 480 481 define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind { 482 ;CHECK-LABEL: urshr4s: 483 ;CHECK: urshr.4s 484 %tmp1 = load <4 x i32>* %A 485 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 486 ret <4 x i32> %tmp3 487 } 488 489 define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind { 490 ;CHECK-LABEL: urshr2d: 491 ;CHECK: urshr.2d 492 %tmp1 = load <2 x i64>* %A 493 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 494 ret <2 x i64> %tmp3 495 } 496 497 define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind { 498 ;CHECK-LABEL: srshr8b: 499 ;CHECK: srshr.8b 500 %tmp1 = load <8 x i8>* %A 501 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 502 ret <8 x i8> %tmp3 503 } 504 505 define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind { 506 ;CHECK-LABEL: srshr4h: 507 ;CHECK: srshr.4h 508 %tmp1 = load <4 x i16>* %A 509 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 510 ret <4 x i16> %tmp3 511 } 512 513 define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind { 514 ;CHECK-LABEL: srshr2s: 515 ;CHECK: srshr.2s 516 %tmp1 = load <2 x i32>* %A 517 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 518 ret <2 x i32> %tmp3 519 } 520 521 define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind { 522 ;CHECK-LABEL: srshr16b: 523 ;CHECK: srshr.16b 524 %tmp1 = load <16 x i8>* %A 525 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 526 ret <16 x i8> %tmp3 527 } 528 529 define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind { 530 ;CHECK-LABEL: srshr8h: 531 ;CHECK: srshr.8h 532 %tmp1 = load <8 x i16>* %A 533 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 534 ret <8 x i16> %tmp3 535 } 536 537 define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind { 538 ;CHECK-LABEL: srshr4s: 539 ;CHECK: srshr.4s 540 %tmp1 = load <4 x i32>* %A 541 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 542 ret <4 x i32> %tmp3 543 } 544 545 define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind { 546 ;CHECK-LABEL: srshr2d: 547 ;CHECK: srshr.2d 548 %tmp1 = load <2 x i64>* %A 549 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 550 ret <2 x i64> %tmp3 551 } 552 553 define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind { 554 ;CHECK-LABEL: sqshlu8b: 555 ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1 556 %tmp1 = load <8 x i8>* %A 557 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 558 ret <8 x i8> %tmp3 559 } 560 561 define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind { 562 ;CHECK-LABEL: sqshlu4h: 563 ;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1 564 %tmp1 = load <4 x i16>* %A 565 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 566 ret <4 x i16> %tmp3 567 } 568 569 define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind { 570 ;CHECK-LABEL: sqshlu2s: 571 ;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1 572 %tmp1 = load <2 x i32>* %A 573 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>) 574 ret <2 x i32> %tmp3 575 } 576 577 define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind { 578 ;CHECK-LABEL: sqshlu16b: 579 ;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1 580 %tmp1 = load <16 x i8>* %A 581 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 582 ret <16 x i8> %tmp3 583 } 584 585 define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind { 586 ;CHECK-LABEL: sqshlu8h: 587 ;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1 588 %tmp1 = load <8 x i16>* %A 589 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 590 ret <8 x i16> %tmp3 591 } 592 593 define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind { 594 ;CHECK-LABEL: sqshlu4s: 595 ;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1 596 %tmp1 = load <4 x i32>* %A 597 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 598 ret <4 x i32> %tmp3 599 } 600 601 define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind { 602 ;CHECK-LABEL: sqshlu2d: 603 ;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1 604 %tmp1 = load <2 x i64>* %A 605 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>) 606 ret <2 x i64> %tmp3 607 } 608 609 declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 610 declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 611 declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 612 declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 613 614 declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 615 declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 616 declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 617 declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 618 619 define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind { 620 ;CHECK-LABEL: rshrn8b: 621 ;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1 622 %tmp1 = load <8 x i16>* %A 623 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) 624 ret <8 x i8> %tmp3 625 } 626 627 define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind { 628 ;CHECK-LABEL: rshrn4h: 629 ;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1 630 %tmp1 = load <4 x i32>* %A 631 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) 632 ret <4 x i16> %tmp3 633 } 634 635 define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind { 636 ;CHECK-LABEL: rshrn2s: 637 ;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1 638 %tmp1 = load <2 x i64>* %A 639 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) 640 ret <2 x i32> %tmp3 641 } 642 643 define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind { 644 ;CHECK-LABEL: rshrn16b: 645 ;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1 646 %out = load <8 x i8>* %ret 647 %tmp1 = load <8 x i16>* %A 648 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) 649 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 650 ret <16 x i8> %tmp4 651 } 652 653 define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 654 ;CHECK-LABEL: rshrn8h: 655 ;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1 656 %out = load <4 x i16>* %ret 657 %tmp1 = load <4 x i32>* %A 658 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) 659 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 660 ret <8 x i16> %tmp4 661 } 662 663 define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 664 ;CHECK-LABEL: rshrn4s: 665 ;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1 666 %out = load <2 x i32>* %ret 667 %tmp1 = load <2 x i64>* %A 668 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) 669 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 670 ret <4 x i32> %tmp4 671 } 672 673 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone 674 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone 675 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone 676 677 define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind { 678 ;CHECK-LABEL: shrn8b: 679 ;CHECK: shrn.8b v0, {{v[0-9]+}}, #1 680 %tmp1 = load <8 x i16>* %A 681 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 682 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> 683 ret <8 x i8> %tmp3 684 } 685 686 define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind { 687 ;CHECK-LABEL: shrn4h: 688 ;CHECK: shrn.4h v0, {{v[0-9]+}}, #1 689 %tmp1 = load <4 x i32>* %A 690 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 691 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> 692 ret <4 x i16> %tmp3 693 } 694 695 define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind { 696 ;CHECK-LABEL: shrn2s: 697 ;CHECK: shrn.2s v0, {{v[0-9]+}}, #1 698 %tmp1 = load <2 x i64>* %A 699 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 700 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> 701 ret <2 x i32> %tmp3 702 } 703 704 define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 705 ;CHECK-LABEL: shrn16b: 706 ;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1 707 %out = load <8 x i8>* %ret 708 %tmp1 = load <8 x i16>* %A 709 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 710 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> 711 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 712 ret <16 x i8> %tmp4 713 } 714 715 define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 716 ;CHECK-LABEL: shrn8h: 717 ;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1 718 %out = load <4 x i16>* %ret 719 %tmp1 = load <4 x i32>* %A 720 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 721 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> 722 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 723 ret <8 x i16> %tmp4 724 } 725 726 define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 727 ;CHECK-LABEL: shrn4s: 728 ;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1 729 %out = load <2 x i32>* %ret 730 %tmp1 = load <2 x i64>* %A 731 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 732 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> 733 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 734 ret <4 x i32> %tmp4 735 } 736 737 declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone 738 declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone 739 declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone 740 741 define i32 @sqshrn1s(i64 %A) nounwind { 742 ; CHECK-LABEL: sqshrn1s: 743 ; CHECK: sqshrn {{s[0-9]+}}, d0, #1 744 %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1) 745 ret i32 %tmp 746 } 747 748 define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind { 749 ;CHECK-LABEL: sqshrn8b: 750 ;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1 751 %tmp1 = load <8 x i16>* %A 752 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) 753 ret <8 x i8> %tmp3 754 } 755 756 define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind { 757 ;CHECK-LABEL: sqshrn4h: 758 ;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1 759 %tmp1 = load <4 x i32>* %A 760 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) 761 ret <4 x i16> %tmp3 762 } 763 764 define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind { 765 ;CHECK-LABEL: sqshrn2s: 766 ;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1 767 %tmp1 = load <2 x i64>* %A 768 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) 769 ret <2 x i32> %tmp3 770 } 771 772 773 define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 774 ;CHECK-LABEL: sqshrn16b: 775 ;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1 776 %out = load <8 x i8>* %ret 777 %tmp1 = load <8 x i16>* %A 778 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) 779 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 780 ret <16 x i8> %tmp4 781 } 782 783 define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 784 ;CHECK-LABEL: sqshrn8h: 785 ;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1 786 %out = load <4 x i16>* %ret 787 %tmp1 = load <4 x i32>* %A 788 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) 789 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 790 ret <8 x i16> %tmp4 791 } 792 793 define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 794 ;CHECK-LABEL: sqshrn4s: 795 ;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1 796 %out = load <2 x i32>* %ret 797 %tmp1 = load <2 x i64>* %A 798 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) 799 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 800 ret <4 x i32> %tmp4 801 } 802 803 declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone 804 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone 805 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone 806 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone 807 808 define i32 @sqshrun1s(i64 %A) nounwind { 809 ; CHECK-LABEL: sqshrun1s: 810 ; CHECK: sqshrun {{s[0-9]+}}, d0, #1 811 %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1) 812 ret i32 %tmp 813 } 814 815 define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind { 816 ;CHECK-LABEL: sqshrun8b: 817 ;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1 818 %tmp1 = load <8 x i16>* %A 819 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) 820 ret <8 x i8> %tmp3 821 } 822 823 define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind { 824 ;CHECK-LABEL: sqshrun4h: 825 ;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1 826 %tmp1 = load <4 x i32>* %A 827 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) 828 ret <4 x i16> %tmp3 829 } 830 831 define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind { 832 ;CHECK-LABEL: sqshrun2s: 833 ;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1 834 %tmp1 = load <2 x i64>* %A 835 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) 836 ret <2 x i32> %tmp3 837 } 838 839 define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 840 ;CHECK-LABEL: sqshrun16b: 841 ;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1 842 %out = load <8 x i8>* %ret 843 %tmp1 = load <8 x i16>* %A 844 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) 845 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 846 ret <16 x i8> %tmp4 847 } 848 849 define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 850 ;CHECK-LABEL: sqshrun8h: 851 ;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1 852 %out = load <4 x i16>* %ret 853 %tmp1 = load <4 x i32>* %A 854 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) 855 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 856 ret <8 x i16> %tmp4 857 } 858 859 define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 860 ;CHECK-LABEL: sqshrun4s: 861 ;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1 862 %out = load <2 x i32>* %ret 863 %tmp1 = load <2 x i64>* %A 864 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) 865 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 866 ret <4 x i32> %tmp4 867 } 868 869 declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone 870 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone 871 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone 872 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone 873 874 define i32 @sqrshrn1s(i64 %A) nounwind { 875 ; CHECK-LABEL: sqrshrn1s: 876 ; CHECK: sqrshrn {{s[0-9]+}}, d0, #1 877 %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1) 878 ret i32 %tmp 879 } 880 881 define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind { 882 ;CHECK-LABEL: sqrshrn8b: 883 ;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1 884 %tmp1 = load <8 x i16>* %A 885 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 886 ret <8 x i8> %tmp3 887 } 888 889 define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind { 890 ;CHECK-LABEL: sqrshrn4h: 891 ;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1 892 %tmp1 = load <4 x i32>* %A 893 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 894 ret <4 x i16> %tmp3 895 } 896 897 define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind { 898 ;CHECK-LABEL: sqrshrn2s: 899 ;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1 900 %tmp1 = load <2 x i64>* %A 901 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 902 ret <2 x i32> %tmp3 903 } 904 905 define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 906 ;CHECK-LABEL: sqrshrn16b: 907 ;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1 908 %out = load <8 x i8>* %ret 909 %tmp1 = load <8 x i16>* %A 910 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 911 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 912 ret <16 x i8> %tmp4 913 } 914 915 define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 916 ;CHECK-LABEL: sqrshrn8h: 917 ;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1 918 %out = load <4 x i16>* %ret 919 %tmp1 = load <4 x i32>* %A 920 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 921 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 922 ret <8 x i16> %tmp4 923 } 924 925 define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 926 ;CHECK-LABEL: sqrshrn4s: 927 ;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1 928 %out = load <2 x i32>* %ret 929 %tmp1 = load <2 x i64>* %A 930 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 931 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 932 ret <4 x i32> %tmp4 933 } 934 935 declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone 936 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone 937 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone 938 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone 939 940 define i32 @sqrshrun1s(i64 %A) nounwind { 941 ; CHECK-LABEL: sqrshrun1s: 942 ; CHECK: sqrshrun {{s[0-9]+}}, d0, #1 943 %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1) 944 ret i32 %tmp 945 } 946 947 define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind { 948 ;CHECK-LABEL: sqrshrun8b: 949 ;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1 950 %tmp1 = load <8 x i16>* %A 951 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) 952 ret <8 x i8> %tmp3 953 } 954 955 define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind { 956 ;CHECK-LABEL: sqrshrun4h: 957 ;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1 958 %tmp1 = load <4 x i32>* %A 959 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) 960 ret <4 x i16> %tmp3 961 } 962 963 define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind { 964 ;CHECK-LABEL: sqrshrun2s: 965 ;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1 966 %tmp1 = load <2 x i64>* %A 967 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) 968 ret <2 x i32> %tmp3 969 } 970 971 define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 972 ;CHECK-LABEL: sqrshrun16b: 973 ;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1 974 %out = load <8 x i8>* %ret 975 %tmp1 = load <8 x i16>* %A 976 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) 977 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 978 ret <16 x i8> %tmp4 979 } 980 981 define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 982 ;CHECK-LABEL: sqrshrun8h: 983 ;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1 984 %out = load <4 x i16>* %ret 985 %tmp1 = load <4 x i32>* %A 986 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) 987 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 988 ret <8 x i16> %tmp4 989 } 990 991 define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 992 ;CHECK-LABEL: sqrshrun4s: 993 ;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1 994 %out = load <2 x i32>* %ret 995 %tmp1 = load <2 x i64>* %A 996 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) 997 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 998 ret <4 x i32> %tmp4 999 } 1000 1001 declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone 1002 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone 1003 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone 1004 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone 1005 1006 define i32 @uqrshrn1s(i64 %A) nounwind { 1007 ; CHECK-LABEL: uqrshrn1s: 1008 ; CHECK: uqrshrn {{s[0-9]+}}, d0, #1 1009 %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1) 1010 ret i32 %tmp 1011 } 1012 1013 define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind { 1014 ;CHECK-LABEL: uqrshrn8b: 1015 ;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1 1016 %tmp1 = load <8 x i16>* %A 1017 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 1018 ret <8 x i8> %tmp3 1019 } 1020 1021 define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind { 1022 ;CHECK-LABEL: uqrshrn4h: 1023 ;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1 1024 %tmp1 = load <4 x i32>* %A 1025 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 1026 ret <4 x i16> %tmp3 1027 } 1028 1029 define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind { 1030 ;CHECK-LABEL: uqrshrn2s: 1031 ;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1 1032 %tmp1 = load <2 x i64>* %A 1033 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 1034 ret <2 x i32> %tmp3 1035 } 1036 1037 define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 1038 ;CHECK-LABEL: uqrshrn16b: 1039 ;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1 1040 %out = load <8 x i8>* %ret 1041 %tmp1 = load <8 x i16>* %A 1042 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 1043 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1044 ret <16 x i8> %tmp4 1045 } 1046 1047 define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 1048 ;CHECK-LABEL: uqrshrn8h: 1049 ;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1 1050 %out = load <4 x i16>* %ret 1051 %tmp1 = load <4 x i32>* %A 1052 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 1053 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1054 ret <8 x i16> %tmp4 1055 } 1056 1057 define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 1058 ;CHECK-LABEL: uqrshrn4s: 1059 ;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1 1060 %out = load <2 x i32>* %ret 1061 %tmp1 = load <2 x i64>* %A 1062 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 1063 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1064 ret <4 x i32> %tmp4 1065 } 1066 1067 declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone 1068 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone 1069 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone 1070 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone 1071 1072 define i32 @uqshrn1s(i64 %A) nounwind { 1073 ; CHECK-LABEL: uqshrn1s: 1074 ; CHECK: uqshrn {{s[0-9]+}}, d0, #1 1075 %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1) 1076 ret i32 %tmp 1077 } 1078 1079 define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind { 1080 ;CHECK-LABEL: uqshrn8b: 1081 ;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1 1082 %tmp1 = load <8 x i16>* %A 1083 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) 1084 ret <8 x i8> %tmp3 1085 } 1086 1087 define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind { 1088 ;CHECK-LABEL: uqshrn4h: 1089 ;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1 1090 %tmp1 = load <4 x i32>* %A 1091 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) 1092 ret <4 x i16> %tmp3 1093 } 1094 1095 define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind { 1096 ;CHECK-LABEL: uqshrn2s: 1097 ;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1 1098 %tmp1 = load <2 x i64>* %A 1099 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) 1100 ret <2 x i32> %tmp3 1101 } 1102 1103 define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 1104 ;CHECK-LABEL: uqshrn16b: 1105 ;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1 1106 %out = load <8 x i8>* %ret 1107 %tmp1 = load <8 x i16>* %A 1108 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) 1109 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1110 ret <16 x i8> %tmp4 1111 } 1112 1113 define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 1114 ;CHECK-LABEL: uqshrn8h: 1115 ;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1 1116 %out = load <4 x i16>* %ret 1117 %tmp1 = load <4 x i32>* %A 1118 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) 1119 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1120 ret <8 x i16> %tmp4 1121 } 1122 1123 define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 1124 ;CHECK-LABEL: uqshrn4s: 1125 ;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1 1126 %out = load <2 x i32>* %ret 1127 %tmp1 = load <2 x i64>* %A 1128 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) 1129 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1130 ret <4 x i32> %tmp4 1131 } 1132 1133 declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone 1134 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone 1135 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone 1136 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone 1137 1138 define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind { 1139 ;CHECK-LABEL: ushll8h: 1140 ;CHECK: ushll.8h v0, {{v[0-9]+}}, #1 1141 %tmp1 = load <8 x i8>* %A 1142 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> 1143 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1144 ret <8 x i16> %tmp3 1145 } 1146 1147 define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind { 1148 ;CHECK-LABEL: ushll4s: 1149 ;CHECK: ushll.4s v0, {{v[0-9]+}}, #1 1150 %tmp1 = load <4 x i16>* %A 1151 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> 1152 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1153 ret <4 x i32> %tmp3 1154 } 1155 1156 define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind { 1157 ;CHECK-LABEL: ushll2d: 1158 ;CHECK: ushll.2d v0, {{v[0-9]+}}, #1 1159 %tmp1 = load <2 x i32>* %A 1160 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> 1161 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1162 ret <2 x i64> %tmp3 1163 } 1164 1165 define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind { 1166 ;CHECK-LABEL: ushll2_8h: 1167 ;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1 1168 %load1 = load <16 x i8>* %A 1169 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1170 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> 1171 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1172 ret <8 x i16> %tmp3 1173 } 1174 1175 define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind { 1176 ;CHECK-LABEL: ushll2_4s: 1177 ;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1 1178 %load1 = load <8 x i16>* %A 1179 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1180 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> 1181 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1182 ret <4 x i32> %tmp3 1183 } 1184 1185 define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind { 1186 ;CHECK-LABEL: ushll2_2d: 1187 ;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1 1188 %load1 = load <4 x i32>* %A 1189 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1190 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> 1191 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1192 ret <2 x i64> %tmp3 1193 } 1194 1195 define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind { 1196 ;CHECK-LABEL: sshll8h: 1197 ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1 1198 %tmp1 = load <8 x i8>* %A 1199 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> 1200 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1201 ret <8 x i16> %tmp3 1202 } 1203 1204 define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind { 1205 ;CHECK-LABEL: sshll4s: 1206 ;CHECK: sshll.4s v0, {{v[0-9]+}}, #1 1207 %tmp1 = load <4 x i16>* %A 1208 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> 1209 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1210 ret <4 x i32> %tmp3 1211 } 1212 1213 define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind { 1214 ;CHECK-LABEL: sshll2d: 1215 ;CHECK: sshll.2d v0, {{v[0-9]+}}, #1 1216 %tmp1 = load <2 x i32>* %A 1217 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> 1218 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1219 ret <2 x i64> %tmp3 1220 } 1221 1222 define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind { 1223 ;CHECK-LABEL: sshll2_8h: 1224 ;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1 1225 %load1 = load <16 x i8>* %A 1226 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1227 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> 1228 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1229 ret <8 x i16> %tmp3 1230 } 1231 1232 define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind { 1233 ;CHECK-LABEL: sshll2_4s: 1234 ;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1 1235 %load1 = load <8 x i16>* %A 1236 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1237 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> 1238 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1239 ret <4 x i32> %tmp3 1240 } 1241 1242 define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind { 1243 ;CHECK-LABEL: sshll2_2d: 1244 ;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1 1245 %load1 = load <4 x i32>* %A 1246 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1247 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> 1248 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1249 ret <2 x i64> %tmp3 1250 } 1251 1252 define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind { 1253 ;CHECK-LABEL: sqshli8b: 1254 ;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1 1255 %tmp1 = load <8 x i8>* %A 1256 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1257 ret <8 x i8> %tmp3 1258 } 1259 1260 define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind { 1261 ;CHECK-LABEL: sqshli4h: 1262 ;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1 1263 %tmp1 = load <4 x i16>* %A 1264 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 1265 ret <4 x i16> %tmp3 1266 } 1267 1268 define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind { 1269 ;CHECK-LABEL: sqshli2s: 1270 ;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1 1271 %tmp1 = load <2 x i32>* %A 1272 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>) 1273 ret <2 x i32> %tmp3 1274 } 1275 1276 define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind { 1277 ;CHECK-LABEL: sqshli16b: 1278 ;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1 1279 %tmp1 = load <16 x i8>* %A 1280 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1281 ret <16 x i8> %tmp3 1282 } 1283 1284 define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind { 1285 ;CHECK-LABEL: sqshli8h: 1286 ;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1 1287 %tmp1 = load <8 x i16>* %A 1288 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 1289 ret <8 x i16> %tmp3 1290 } 1291 1292 define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind { 1293 ;CHECK-LABEL: sqshli4s: 1294 ;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1 1295 %tmp1 = load <4 x i32>* %A 1296 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 1297 ret <4 x i32> %tmp3 1298 } 1299 1300 define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind { 1301 ;CHECK-LABEL: sqshli2d: 1302 ;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1 1303 %tmp1 = load <2 x i64>* %A 1304 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>) 1305 ret <2 x i64> %tmp3 1306 } 1307 1308 define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind { 1309 ;CHECK-LABEL: uqshli8b: 1310 ;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1 1311 %tmp1 = load <8 x i8>* %A 1312 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1313 ret <8 x i8> %tmp3 1314 } 1315 1316 define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind { 1317 ;CHECK-LABEL: uqshli8b_1: 1318 ;CHECK: movi.8b [[REG:v[0-9]+]], #0x8 1319 ;CHECK: uqshl.8b v0, v0, [[REG]] 1320 %tmp1 = load <8 x i8>* %A 1321 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>) 1322 ret <8 x i8> %tmp3 1323 } 1324 1325 define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind { 1326 ;CHECK-LABEL: uqshli4h: 1327 ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1 1328 %tmp1 = load <4 x i16>* %A 1329 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 1330 ret <4 x i16> %tmp3 1331 } 1332 1333 define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind { 1334 ;CHECK-LABEL: uqshli2s: 1335 ;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1 1336 %tmp1 = load <2 x i32>* %A 1337 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>) 1338 ret <2 x i32> %tmp3 1339 } 1340 1341 define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind { 1342 ;CHECK-LABEL: uqshli16b: 1343 ;CHECK: uqshl.16b 1344 %tmp1 = load <16 x i8>* %A 1345 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1346 ret <16 x i8> %tmp3 1347 } 1348 1349 define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind { 1350 ;CHECK-LABEL: uqshli8h: 1351 ;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1 1352 %tmp1 = load <8 x i16>* %A 1353 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 1354 ret <8 x i16> %tmp3 1355 } 1356 1357 define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind { 1358 ;CHECK-LABEL: uqshli4s: 1359 ;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1 1360 %tmp1 = load <4 x i32>* %A 1361 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 1362 ret <4 x i32> %tmp3 1363 } 1364 1365 define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind { 1366 ;CHECK-LABEL: uqshli2d: 1367 ;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1 1368 %tmp1 = load <2 x i64>* %A 1369 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>) 1370 ret <2 x i64> %tmp3 1371 } 1372 1373 define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1374 ;CHECK-LABEL: ursra8b: 1375 ;CHECK: ursra.8b v0, {{v[0-9]+}}, #1 1376 %tmp1 = load <8 x i8>* %A 1377 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1378 %tmp4 = load <8 x i8>* %B 1379 %tmp5 = add <8 x i8> %tmp3, %tmp4 1380 ret <8 x i8> %tmp5 1381 } 1382 1383 define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1384 ;CHECK-LABEL: ursra4h: 1385 ;CHECK: ursra.4h v0, {{v[0-9]+}}, #1 1386 %tmp1 = load <4 x i16>* %A 1387 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 1388 %tmp4 = load <4 x i16>* %B 1389 %tmp5 = add <4 x i16> %tmp3, %tmp4 1390 ret <4 x i16> %tmp5 1391 } 1392 1393 define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1394 ;CHECK-LABEL: ursra2s: 1395 ;CHECK: ursra.2s v0, {{v[0-9]+}}, #1 1396 %tmp1 = load <2 x i32>* %A 1397 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 1398 %tmp4 = load <2 x i32>* %B 1399 %tmp5 = add <2 x i32> %tmp3, %tmp4 1400 ret <2 x i32> %tmp5 1401 } 1402 1403 define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1404 ;CHECK-LABEL: ursra16b: 1405 ;CHECK: ursra.16b v0, {{v[0-9]+}}, #1 1406 %tmp1 = load <16 x i8>* %A 1407 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1408 %tmp4 = load <16 x i8>* %B 1409 %tmp5 = add <16 x i8> %tmp3, %tmp4 1410 ret <16 x i8> %tmp5 1411 } 1412 1413 define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1414 ;CHECK-LABEL: ursra8h: 1415 ;CHECK: ursra.8h v0, {{v[0-9]+}}, #1 1416 %tmp1 = load <8 x i16>* %A 1417 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 1418 %tmp4 = load <8 x i16>* %B 1419 %tmp5 = add <8 x i16> %tmp3, %tmp4 1420 ret <8 x i16> %tmp5 1421 } 1422 1423 define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1424 ;CHECK-LABEL: ursra4s: 1425 ;CHECK: ursra.4s v0, {{v[0-9]+}}, #1 1426 %tmp1 = load <4 x i32>* %A 1427 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 1428 %tmp4 = load <4 x i32>* %B 1429 %tmp5 = add <4 x i32> %tmp3, %tmp4 1430 ret <4 x i32> %tmp5 1431 } 1432 1433 define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1434 ;CHECK-LABEL: ursra2d: 1435 ;CHECK: ursra.2d v0, {{v[0-9]+}}, #1 1436 %tmp1 = load <2 x i64>* %A 1437 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 1438 %tmp4 = load <2 x i64>* %B 1439 %tmp5 = add <2 x i64> %tmp3, %tmp4 1440 ret <2 x i64> %tmp5 1441 } 1442 1443 define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1444 ;CHECK-LABEL: srsra8b: 1445 ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1 1446 %tmp1 = load <8 x i8>* %A 1447 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1448 %tmp4 = load <8 x i8>* %B 1449 %tmp5 = add <8 x i8> %tmp3, %tmp4 1450 ret <8 x i8> %tmp5 1451 } 1452 1453 define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1454 ;CHECK-LABEL: srsra4h: 1455 ;CHECK: srsra.4h v0, {{v[0-9]+}}, #1 1456 %tmp1 = load <4 x i16>* %A 1457 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 1458 %tmp4 = load <4 x i16>* %B 1459 %tmp5 = add <4 x i16> %tmp3, %tmp4 1460 ret <4 x i16> %tmp5 1461 } 1462 1463 define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1464 ;CHECK-LABEL: srsra2s: 1465 ;CHECK: srsra.2s v0, {{v[0-9]+}}, #1 1466 %tmp1 = load <2 x i32>* %A 1467 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 1468 %tmp4 = load <2 x i32>* %B 1469 %tmp5 = add <2 x i32> %tmp3, %tmp4 1470 ret <2 x i32> %tmp5 1471 } 1472 1473 define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1474 ;CHECK-LABEL: srsra16b: 1475 ;CHECK: srsra.16b v0, {{v[0-9]+}}, #1 1476 %tmp1 = load <16 x i8>* %A 1477 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1478 %tmp4 = load <16 x i8>* %B 1479 %tmp5 = add <16 x i8> %tmp3, %tmp4 1480 ret <16 x i8> %tmp5 1481 } 1482 1483 define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1484 ;CHECK-LABEL: srsra8h: 1485 ;CHECK: srsra.8h v0, {{v[0-9]+}}, #1 1486 %tmp1 = load <8 x i16>* %A 1487 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 1488 %tmp4 = load <8 x i16>* %B 1489 %tmp5 = add <8 x i16> %tmp3, %tmp4 1490 ret <8 x i16> %tmp5 1491 } 1492 1493 define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1494 ;CHECK-LABEL: srsra4s: 1495 ;CHECK: srsra.4s v0, {{v[0-9]+}}, #1 1496 %tmp1 = load <4 x i32>* %A 1497 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 1498 %tmp4 = load <4 x i32>* %B 1499 %tmp5 = add <4 x i32> %tmp3, %tmp4 1500 ret <4 x i32> %tmp5 1501 } 1502 1503 define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1504 ;CHECK-LABEL: srsra2d: 1505 ;CHECK: srsra.2d v0, {{v[0-9]+}}, #1 1506 %tmp1 = load <2 x i64>* %A 1507 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 1508 %tmp4 = load <2 x i64>* %B 1509 %tmp5 = add <2 x i64> %tmp3, %tmp4 1510 ret <2 x i64> %tmp5 1511 } 1512 1513 define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1514 ;CHECK-LABEL: usra8b: 1515 ;CHECK: usra.8b v0, {{v[0-9]+}}, #1 1516 %tmp1 = load <8 x i8>* %A 1517 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1518 %tmp4 = load <8 x i8>* %B 1519 %tmp5 = add <8 x i8> %tmp3, %tmp4 1520 ret <8 x i8> %tmp5 1521 } 1522 1523 define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1524 ;CHECK-LABEL: usra4h: 1525 ;CHECK: usra.4h v0, {{v[0-9]+}}, #1 1526 %tmp1 = load <4 x i16>* %A 1527 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1528 %tmp4 = load <4 x i16>* %B 1529 %tmp5 = add <4 x i16> %tmp3, %tmp4 1530 ret <4 x i16> %tmp5 1531 } 1532 1533 define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1534 ;CHECK-LABEL: usra2s: 1535 ;CHECK: usra.2s v0, {{v[0-9]+}}, #1 1536 %tmp1 = load <2 x i32>* %A 1537 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1> 1538 %tmp4 = load <2 x i32>* %B 1539 %tmp5 = add <2 x i32> %tmp3, %tmp4 1540 ret <2 x i32> %tmp5 1541 } 1542 1543 define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1544 ;CHECK-LABEL: usra16b: 1545 ;CHECK: usra.16b v0, {{v[0-9]+}}, #1 1546 %tmp1 = load <16 x i8>* %A 1547 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1548 %tmp4 = load <16 x i8>* %B 1549 %tmp5 = add <16 x i8> %tmp3, %tmp4 1550 ret <16 x i8> %tmp5 1551 } 1552 1553 define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1554 ;CHECK-LABEL: usra8h: 1555 ;CHECK: usra.8h v0, {{v[0-9]+}}, #1 1556 %tmp1 = load <8 x i16>* %A 1557 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1558 %tmp4 = load <8 x i16>* %B 1559 %tmp5 = add <8 x i16> %tmp3, %tmp4 1560 ret <8 x i16> %tmp5 1561 } 1562 1563 define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1564 ;CHECK-LABEL: usra4s: 1565 ;CHECK: usra.4s v0, {{v[0-9]+}}, #1 1566 %tmp1 = load <4 x i32>* %A 1567 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1568 %tmp4 = load <4 x i32>* %B 1569 %tmp5 = add <4 x i32> %tmp3, %tmp4 1570 ret <4 x i32> %tmp5 1571 } 1572 1573 define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1574 ;CHECK-LABEL: usra2d: 1575 ;CHECK: usra.2d v0, {{v[0-9]+}}, #1 1576 %tmp1 = load <2 x i64>* %A 1577 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 1578 %tmp4 = load <2 x i64>* %B 1579 %tmp5 = add <2 x i64> %tmp3, %tmp4 1580 ret <2 x i64> %tmp5 1581 } 1582 1583 define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1584 ;CHECK-LABEL: ssra8b: 1585 ;CHECK: ssra.8b v0, {{v[0-9]+}}, #1 1586 %tmp1 = load <8 x i8>* %A 1587 %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1588 %tmp4 = load <8 x i8>* %B 1589 %tmp5 = add <8 x i8> %tmp3, %tmp4 1590 ret <8 x i8> %tmp5 1591 } 1592 1593 define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1594 ;CHECK-LABEL: ssra4h: 1595 ;CHECK: ssra.4h v0, {{v[0-9]+}}, #1 1596 %tmp1 = load <4 x i16>* %A 1597 %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1598 %tmp4 = load <4 x i16>* %B 1599 %tmp5 = add <4 x i16> %tmp3, %tmp4 1600 ret <4 x i16> %tmp5 1601 } 1602 1603 define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1604 ;CHECK-LABEL: ssra2s: 1605 ;CHECK: ssra.2s v0, {{v[0-9]+}}, #1 1606 %tmp1 = load <2 x i32>* %A 1607 %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1> 1608 %tmp4 = load <2 x i32>* %B 1609 %tmp5 = add <2 x i32> %tmp3, %tmp4 1610 ret <2 x i32> %tmp5 1611 } 1612 1613 define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1614 ;CHECK-LABEL: ssra16b: 1615 ;CHECK: ssra.16b v0, {{v[0-9]+}}, #1 1616 %tmp1 = load <16 x i8>* %A 1617 %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1618 %tmp4 = load <16 x i8>* %B 1619 %tmp5 = add <16 x i8> %tmp3, %tmp4 1620 ret <16 x i8> %tmp5 1621 } 1622 1623 define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1624 ;CHECK-LABEL: ssra8h: 1625 ;CHECK: ssra.8h v0, {{v[0-9]+}}, #1 1626 %tmp1 = load <8 x i16>* %A 1627 %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1628 %tmp4 = load <8 x i16>* %B 1629 %tmp5 = add <8 x i16> %tmp3, %tmp4 1630 ret <8 x i16> %tmp5 1631 } 1632 1633 define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1634 ;CHECK-LABEL: ssra4s: 1635 ;CHECK: ssra.4s v0, {{v[0-9]+}}, #1 1636 %tmp1 = load <4 x i32>* %A 1637 %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1638 %tmp4 = load <4 x i32>* %B 1639 %tmp5 = add <4 x i32> %tmp3, %tmp4 1640 ret <4 x i32> %tmp5 1641 } 1642 1643 define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1644 ;CHECK-LABEL: ssra2d: 1645 ;CHECK: ssra.2d v0, {{v[0-9]+}}, #1 1646 %tmp1 = load <2 x i64>* %A 1647 %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1> 1648 %tmp4 = load <2 x i64>* %B 1649 %tmp5 = add <2 x i64> %tmp3, %tmp4 1650 ret <2 x i64> %tmp5 1651 } 1652 1653 define <8 x i8> @shr_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1654 ;CHECK-LABEL: shr_orr8b: 1655 ;CHECK: shr.8b v0, {{v[0-9]+}}, #1 1656 ;CHECK-NEXT: orr.8b 1657 ;CHECK-NEXT: ret 1658 %tmp1 = load <8 x i8>* %A 1659 %tmp4 = load <8 x i8>* %B 1660 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1661 %tmp5 = or <8 x i8> %tmp3, %tmp4 1662 ret <8 x i8> %tmp5 1663 } 1664 1665 define <4 x i16> @shr_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1666 ;CHECK-LABEL: shr_orr4h: 1667 ;CHECK: shr.4h v0, {{v[0-9]+}}, #1 1668 ;CHECK-NEXT: orr.8b 1669 ;CHECK-NEXT: ret 1670 %tmp1 = load <4 x i16>* %A 1671 %tmp4 = load <4 x i16>* %B 1672 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1673 %tmp5 = or <4 x i16> %tmp3, %tmp4 1674 ret <4 x i16> %tmp5 1675 } 1676 1677 define <2 x i32> @shr_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1678 ;CHECK-LABEL: shr_orr2s: 1679 ;CHECK: shr.2s v0, {{v[0-9]+}}, #1 1680 ;CHECK-NEXT: orr.8b 1681 ;CHECK-NEXT: ret 1682 %tmp1 = load <2 x i32>* %A 1683 %tmp4 = load <2 x i32>* %B 1684 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1> 1685 %tmp5 = or <2 x i32> %tmp3, %tmp4 1686 ret <2 x i32> %tmp5 1687 } 1688 1689 define <16 x i8> @shr_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1690 ;CHECK-LABEL: shr_orr16b: 1691 ;CHECK: shr.16b v0, {{v[0-9]+}}, #1 1692 ;CHECK-NEXT: orr.16b 1693 ;CHECK-NEXT: ret 1694 %tmp1 = load <16 x i8>* %A 1695 %tmp4 = load <16 x i8>* %B 1696 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1697 %tmp5 = or <16 x i8> %tmp3, %tmp4 1698 ret <16 x i8> %tmp5 1699 } 1700 1701 define <8 x i16> @shr_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1702 ;CHECK-LABEL: shr_orr8h: 1703 ;CHECK: shr.8h v0, {{v[0-9]+}}, #1 1704 ;CHECK-NEXT: orr.16b 1705 ;CHECK-NEXT: ret 1706 %tmp1 = load <8 x i16>* %A 1707 %tmp4 = load <8 x i16>* %B 1708 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1709 %tmp5 = or <8 x i16> %tmp3, %tmp4 1710 ret <8 x i16> %tmp5 1711 } 1712 1713 define <4 x i32> @shr_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1714 ;CHECK-LABEL: shr_orr4s: 1715 ;CHECK: shr.4s v0, {{v[0-9]+}}, #1 1716 ;CHECK-NEXT: orr.16b 1717 ;CHECK-NEXT: ret 1718 %tmp1 = load <4 x i32>* %A 1719 %tmp4 = load <4 x i32>* %B 1720 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1721 %tmp5 = or <4 x i32> %tmp3, %tmp4 1722 ret <4 x i32> %tmp5 1723 } 1724 1725 define <2 x i64> @shr_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1726 ;CHECK-LABEL: shr_orr2d: 1727 ;CHECK: shr.2d v0, {{v[0-9]+}}, #1 1728 ;CHECK-NEXT: orr.16b 1729 ;CHECK-NEXT: ret 1730 %tmp1 = load <2 x i64>* %A 1731 %tmp4 = load <2 x i64>* %B 1732 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 1733 %tmp5 = or <2 x i64> %tmp3, %tmp4 1734 ret <2 x i64> %tmp5 1735 } 1736 1737 define <8 x i8> @shl_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1738 ;CHECK-LABEL: shl_orr8b: 1739 ;CHECK: shl.8b v0, {{v[0-9]+}}, #1 1740 ;CHECK-NEXT: orr.8b 1741 ;CHECK-NEXT: ret 1742 %tmp1 = load <8 x i8>* %A 1743 %tmp4 = load <8 x i8>* %B 1744 %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1745 %tmp5 = or <8 x i8> %tmp3, %tmp4 1746 ret <8 x i8> %tmp5 1747 } 1748 1749 define <4 x i16> @shl_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1750 ;CHECK-LABEL: shl_orr4h: 1751 ;CHECK: shl.4h v0, {{v[0-9]+}}, #1 1752 ;CHECK-NEXT: orr.8b 1753 ;CHECK-NEXT: ret 1754 %tmp1 = load <4 x i16>* %A 1755 %tmp4 = load <4 x i16>* %B 1756 %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1757 %tmp5 = or <4 x i16> %tmp3, %tmp4 1758 ret <4 x i16> %tmp5 1759 } 1760 1761 define <2 x i32> @shl_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1762 ;CHECK-LABEL: shl_orr2s: 1763 ;CHECK: shl.2s v0, {{v[0-9]+}}, #1 1764 ;CHECK-NEXT: orr.8b 1765 ;CHECK-NEXT: ret 1766 %tmp1 = load <2 x i32>* %A 1767 %tmp4 = load <2 x i32>* %B 1768 %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1> 1769 %tmp5 = or <2 x i32> %tmp3, %tmp4 1770 ret <2 x i32> %tmp5 1771 } 1772 1773 define <16 x i8> @shl_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1774 ;CHECK-LABEL: shl_orr16b: 1775 ;CHECK: shl.16b v0, {{v[0-9]+}}, #1 1776 ;CHECK-NEXT: orr.16b 1777 ;CHECK-NEXT: ret 1778 %tmp1 = load <16 x i8>* %A 1779 %tmp4 = load <16 x i8>* %B 1780 %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1781 %tmp5 = or <16 x i8> %tmp3, %tmp4 1782 ret <16 x i8> %tmp5 1783 } 1784 1785 define <8 x i16> @shl_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1786 ;CHECK-LABEL: shl_orr8h: 1787 ;CHECK: shl.8h v0, {{v[0-9]+}}, #1 1788 ;CHECK-NEXT: orr.16b 1789 ;CHECK-NEXT: ret 1790 %tmp1 = load <8 x i16>* %A 1791 %tmp4 = load <8 x i16>* %B 1792 %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1793 %tmp5 = or <8 x i16> %tmp3, %tmp4 1794 ret <8 x i16> %tmp5 1795 } 1796 1797 define <4 x i32> @shl_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1798 ;CHECK-LABEL: shl_orr4s: 1799 ;CHECK: shl.4s v0, {{v[0-9]+}}, #1 1800 ;CHECK-NEXT: orr.16b 1801 ;CHECK-NEXT: ret 1802 %tmp1 = load <4 x i32>* %A 1803 %tmp4 = load <4 x i32>* %B 1804 %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1805 %tmp5 = or <4 x i32> %tmp3, %tmp4 1806 ret <4 x i32> %tmp5 1807 } 1808 1809 define <2 x i64> @shl_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1810 ;CHECK-LABEL: shl_orr2d: 1811 ;CHECK: shl.2d v0, {{v[0-9]+}}, #1 1812 ;CHECK-NEXT: orr.16b 1813 ;CHECK-NEXT: ret 1814 %tmp1 = load <2 x i64>* %A 1815 %tmp4 = load <2 x i64>* %B 1816 %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1> 1817 %tmp5 = or <2 x i64> %tmp3, %tmp4 1818 ret <2 x i64> %tmp5 1819 } 1820 1821 define <8 x i16> @shll(<8 x i8> %in) { 1822 ; CHECK-LABEL: shll: 1823 ; CHECK: shll.8h v0, {{v[0-9]+}}, #8 1824 %ext = zext <8 x i8> %in to <8 x i16> 1825 %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1826 ret <8 x i16> %res 1827 } 1828 1829 define <4 x i32> @shll_high(<8 x i16> %in) { 1830 ; CHECK-LABEL: shll_high 1831 ; CHECK: shll2.4s v0, {{v[0-9]+}}, #16 1832 %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1833 %ext = zext <4 x i16> %extract to <4 x i32> 1834 %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16> 1835 ret <4 x i32> %res 1836 } 1837 1838 define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1839 ;CHECK-LABEL: sli8b: 1840 ;CHECK: sli.8b v0, {{v[0-9]+}}, #1 1841 %tmp1 = load <8 x i8>* %A 1842 %tmp2 = load <8 x i8>* %B 1843 %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1) 1844 ret <8 x i8> %tmp3 1845 } 1846 1847 define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1848 ;CHECK-LABEL: sli4h: 1849 ;CHECK: sli.4h v0, {{v[0-9]+}}, #1 1850 %tmp1 = load <4 x i16>* %A 1851 %tmp2 = load <4 x i16>* %B 1852 %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1) 1853 ret <4 x i16> %tmp3 1854 } 1855 1856 define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1857 ;CHECK-LABEL: sli2s: 1858 ;CHECK: sli.2s v0, {{v[0-9]+}}, #1 1859 %tmp1 = load <2 x i32>* %A 1860 %tmp2 = load <2 x i32>* %B 1861 %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1) 1862 ret <2 x i32> %tmp3 1863 } 1864 1865 define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { 1866 ;CHECK-LABEL: sli1d: 1867 ;CHECK: sli d0, {{d[0-9]+}}, #1 1868 %tmp1 = load <1 x i64>* %A 1869 %tmp2 = load <1 x i64>* %B 1870 %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1) 1871 ret <1 x i64> %tmp3 1872 } 1873 1874 define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1875 ;CHECK-LABEL: sli16b: 1876 ;CHECK: sli.16b v0, {{v[0-9]+}}, #1 1877 %tmp1 = load <16 x i8>* %A 1878 %tmp2 = load <16 x i8>* %B 1879 %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1) 1880 ret <16 x i8> %tmp3 1881 } 1882 1883 define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1884 ;CHECK-LABEL: sli8h: 1885 ;CHECK: sli.8h v0, {{v[0-9]+}}, #1 1886 %tmp1 = load <8 x i16>* %A 1887 %tmp2 = load <8 x i16>* %B 1888 %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1) 1889 ret <8 x i16> %tmp3 1890 } 1891 1892 define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1893 ;CHECK-LABEL: sli4s: 1894 ;CHECK: sli.4s v0, {{v[0-9]+}}, #1 1895 %tmp1 = load <4 x i32>* %A 1896 %tmp2 = load <4 x i32>* %B 1897 %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1) 1898 ret <4 x i32> %tmp3 1899 } 1900 1901 define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1902 ;CHECK-LABEL: sli2d: 1903 ;CHECK: sli.2d v0, {{v[0-9]+}}, #1 1904 %tmp1 = load <2 x i64>* %A 1905 %tmp2 = load <2 x i64>* %B 1906 %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1) 1907 ret <2 x i64> %tmp3 1908 } 1909 1910 declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone 1911 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone 1912 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone 1913 declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone 1914 1915 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone 1916 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone 1917 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone 1918 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone 1919 1920 define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) { 1921 ; CHECK-LABEL: ashr_v1i64: 1922 ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} 1923 ; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} 1924 %c = ashr <1 x i64> %a, %b 1925 ret <1 x i64> %c 1926 } 1927