1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4 ;CHECK: vshls8: 5 ;CHECK: vshl.u8 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = load <8 x i8>* %B 8 %tmp3 = shl <8 x i8> %tmp1, %tmp2 9 ret <8 x i8> %tmp3 10 } 11 12 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13 ;CHECK: vshls16: 14 ;CHECK: vshl.u16 15 %tmp1 = load <4 x i16>* %A 16 %tmp2 = load <4 x i16>* %B 17 %tmp3 = shl <4 x i16> %tmp1, %tmp2 18 ret <4 x i16> %tmp3 19 } 20 21 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22 ;CHECK: vshls32: 23 ;CHECK: vshl.u32 24 %tmp1 = load <2 x i32>* %A 25 %tmp2 = load <2 x i32>* %B 26 %tmp3 = shl <2 x i32> %tmp1, %tmp2 27 ret <2 x i32> %tmp3 28 } 29 30 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 31 ;CHECK: vshls64: 32 ;CHECK: vshl.u64 33 %tmp1 = load <1 x i64>* %A 34 %tmp2 = load <1 x i64>* %B 35 %tmp3 = shl <1 x i64> %tmp1, %tmp2 36 ret <1 x i64> %tmp3 37 } 38 39 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind { 40 ;CHECK: vshli8: 41 ;CHECK: vshl.i8 42 %tmp1 = load <8 x i8>* %A 43 %tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > 44 ret <8 x i8> %tmp2 45 } 46 47 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind { 48 ;CHECK: vshli16: 49 ;CHECK: vshl.i16 50 %tmp1 = load <4 x i16>* %A 51 %tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 > 52 ret <4 x i16> %tmp2 53 } 54 55 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind { 56 ;CHECK: vshli32: 57 ;CHECK: vshl.i32 58 %tmp1 = load <2 x i32>* %A 59 %tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 > 60 ret <2 x i32> %tmp2 61 } 62 63 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind { 64 ;CHECK: vshli64: 65 ;CHECK: vshl.i64 66 %tmp1 = load <1 x i64>* %A 67 %tmp2 = shl <1 x i64> %tmp1, < i64 63 > 68 ret <1 x i64> %tmp2 69 } 70 71 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 72 ;CHECK: vshlQs8: 73 ;CHECK: vshl.u8 74 %tmp1 = load <16 x i8>* %A 75 %tmp2 = load <16 x i8>* %B 76 %tmp3 = shl <16 x i8> %tmp1, %tmp2 77 ret <16 x i8> %tmp3 78 } 79 80 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 81 ;CHECK: vshlQs16: 82 ;CHECK: vshl.u16 83 %tmp1 = load <8 x i16>* %A 84 %tmp2 = load <8 x i16>* %B 85 %tmp3 = shl <8 x i16> %tmp1, %tmp2 86 ret <8 x i16> %tmp3 87 } 88 89 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 90 ;CHECK: vshlQs32: 91 ;CHECK: vshl.u32 92 %tmp1 = load <4 x i32>* %A 93 %tmp2 = load <4 x i32>* %B 94 %tmp3 = shl <4 x i32> %tmp1, %tmp2 95 ret <4 x i32> %tmp3 96 } 97 98 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 99 ;CHECK: vshlQs64: 100 ;CHECK: vshl.u64 101 %tmp1 = load <2 x i64>* %A 102 %tmp2 = load <2 x i64>* %B 103 %tmp3 = shl <2 x i64> %tmp1, %tmp2 104 ret <2 x i64> %tmp3 105 } 106 107 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind { 108 ;CHECK: vshlQi8: 109 ;CHECK: vshl.i8 110 %tmp1 = load <16 x i8>* %A 111 %tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > 112 ret <16 x i8> %tmp2 113 } 114 115 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind { 116 ;CHECK: vshlQi16: 117 ;CHECK: vshl.i16 118 %tmp1 = load <8 x i16>* %A 119 %tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > 120 ret <8 x i16> %tmp2 121 } 122 123 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind { 124 ;CHECK: vshlQi32: 125 ;CHECK: vshl.i32 126 %tmp1 = load <4 x i32>* %A 127 %tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 > 128 ret <4 x i32> %tmp2 129 } 130 131 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { 132 ;CHECK: vshlQi64: 133 ;CHECK: vshl.i64 134 %tmp1 = load <2 x i64>* %A 135 %tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 > 136 ret <2 x i64> %tmp2 137 } 138 139 define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 140 ;CHECK: vlshru8: 141 ;CHECK: vneg.s8 142 ;CHECK: vshl.u8 143 %tmp1 = load <8 x i8>* %A 144 %tmp2 = load <8 x i8>* %B 145 %tmp3 = lshr <8 x i8> %tmp1, %tmp2 146 ret <8 x i8> %tmp3 147 } 148 149 define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 150 ;CHECK: vlshru16: 151 ;CHECK: vneg.s16 152 ;CHECK: vshl.u16 153 %tmp1 = load <4 x i16>* %A 154 %tmp2 = load <4 x i16>* %B 155 %tmp3 = lshr <4 x i16> %tmp1, %tmp2 156 ret <4 x i16> %tmp3 157 } 158 159 define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 160 ;CHECK: vlshru32: 161 ;CHECK: vneg.s32 162 ;CHECK: vshl.u32 163 %tmp1 = load <2 x i32>* %A 164 %tmp2 = load <2 x i32>* %B 165 %tmp3 = lshr <2 x i32> %tmp1, %tmp2 166 ret <2 x i32> %tmp3 167 } 168 169 define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 170 ;CHECK: vlshru64: 171 ;CHECK: vsub.i64 172 ;CHECK: vshl.u64 173 %tmp1 = load <1 x i64>* %A 174 %tmp2 = load <1 x i64>* %B 175 %tmp3 = lshr <1 x i64> %tmp1, %tmp2 176 ret <1 x i64> %tmp3 177 } 178 179 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind { 180 ;CHECK: vlshri8: 181 ;CHECK: vshr.u8 182 %tmp1 = load <8 x i8>* %A 183 %tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 184 ret <8 x i8> %tmp2 185 } 186 187 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind { 188 ;CHECK: vlshri16: 189 ;CHECK: vshr.u16 190 %tmp1 = load <4 x i16>* %A 191 %tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > 192 ret <4 x i16> %tmp2 193 } 194 195 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind { 196 ;CHECK: vlshri32: 197 ;CHECK: vshr.u32 198 %tmp1 = load <2 x i32>* %A 199 %tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 > 200 ret <2 x i32> %tmp2 201 } 202 203 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind { 204 ;CHECK: vlshri64: 205 ;CHECK: vshr.u64 206 %tmp1 = load <1 x i64>* %A 207 %tmp2 = lshr <1 x i64> %tmp1, < i64 64 > 208 ret <1 x i64> %tmp2 209 } 210 211 define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 212 ;CHECK: vlshrQu8: 213 ;CHECK: vneg.s8 214 ;CHECK: vshl.u8 215 %tmp1 = load <16 x i8>* %A 216 %tmp2 = load <16 x i8>* %B 217 %tmp3 = lshr <16 x i8> %tmp1, %tmp2 218 ret <16 x i8> %tmp3 219 } 220 221 define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 222 ;CHECK: vlshrQu16: 223 ;CHECK: vneg.s16 224 ;CHECK: vshl.u16 225 %tmp1 = load <8 x i16>* %A 226 %tmp2 = load <8 x i16>* %B 227 %tmp3 = lshr <8 x i16> %tmp1, %tmp2 228 ret <8 x i16> %tmp3 229 } 230 231 define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 232 ;CHECK: vlshrQu32: 233 ;CHECK: vneg.s32 234 ;CHECK: vshl.u32 235 %tmp1 = load <4 x i32>* %A 236 %tmp2 = load <4 x i32>* %B 237 %tmp3 = lshr <4 x i32> %tmp1, %tmp2 238 ret <4 x i32> %tmp3 239 } 240 241 define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 242 ;CHECK: vlshrQu64: 243 ;CHECK: vsub.i64 244 ;CHECK: vshl.u64 245 %tmp1 = load <2 x i64>* %A 246 %tmp2 = load <2 x i64>* %B 247 %tmp3 = lshr <2 x i64> %tmp1, %tmp2 248 ret <2 x i64> %tmp3 249 } 250 251 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind { 252 ;CHECK: vlshrQi8: 253 ;CHECK: vshr.u8 254 %tmp1 = load <16 x i8>* %A 255 %tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 256 ret <16 x i8> %tmp2 257 } 258 259 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind { 260 ;CHECK: vlshrQi16: 261 ;CHECK: vshr.u16 262 %tmp1 = load <8 x i16>* %A 263 %tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > 264 ret <8 x i16> %tmp2 265 } 266 267 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind { 268 ;CHECK: vlshrQi32: 269 ;CHECK: vshr.u32 270 %tmp1 = load <4 x i32>* %A 271 %tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > 272 ret <4 x i32> %tmp2 273 } 274 275 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind { 276 ;CHECK: vlshrQi64: 277 ;CHECK: vshr.u64 278 %tmp1 = load <2 x i64>* %A 279 %tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 > 280 ret <2 x i64> %tmp2 281 } 282 283 ; Example that requires splitting and expanding a vector shift. 284 define <2 x i64> @update(<2 x i64> %val) nounwind readnone { 285 entry: 286 %shr = lshr <2 x i64> %val, < i64 2, i64 2 > ; <<2 x i64>> [#uses=1] 287 ret <2 x i64> %shr 288 } 289 290 define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 291 ;CHECK: vashrs8: 292 ;CHECK: vneg.s8 293 ;CHECK: vshl.s8 294 %tmp1 = load <8 x i8>* %A 295 %tmp2 = load <8 x i8>* %B 296 %tmp3 = ashr <8 x i8> %tmp1, %tmp2 297 ret <8 x i8> %tmp3 298 } 299 300 define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 301 ;CHECK: vashrs16: 302 ;CHECK: vneg.s16 303 ;CHECK: vshl.s16 304 %tmp1 = load <4 x i16>* %A 305 %tmp2 = load <4 x i16>* %B 306 %tmp3 = ashr <4 x i16> %tmp1, %tmp2 307 ret <4 x i16> %tmp3 308 } 309 310 define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 311 ;CHECK: vashrs32: 312 ;CHECK: vneg.s32 313 ;CHECK: vshl.s32 314 %tmp1 = load <2 x i32>* %A 315 %tmp2 = load <2 x i32>* %B 316 %tmp3 = ashr <2 x i32> %tmp1, %tmp2 317 ret <2 x i32> %tmp3 318 } 319 320 define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 321 ;CHECK: vashrs64: 322 ;CHECK: vsub.i64 323 ;CHECK: vshl.s64 324 %tmp1 = load <1 x i64>* %A 325 %tmp2 = load <1 x i64>* %B 326 %tmp3 = ashr <1 x i64> %tmp1, %tmp2 327 ret <1 x i64> %tmp3 328 } 329 330 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind { 331 ;CHECK: vashri8: 332 ;CHECK: vshr.s8 333 %tmp1 = load <8 x i8>* %A 334 %tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 335 ret <8 x i8> %tmp2 336 } 337 338 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind { 339 ;CHECK: vashri16: 340 ;CHECK: vshr.s16 341 %tmp1 = load <4 x i16>* %A 342 %tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > 343 ret <4 x i16> %tmp2 344 } 345 346 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind { 347 ;CHECK: vashri32: 348 ;CHECK: vshr.s32 349 %tmp1 = load <2 x i32>* %A 350 %tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 > 351 ret <2 x i32> %tmp2 352 } 353 354 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind { 355 ;CHECK: vashri64: 356 ;CHECK: vshr.s64 357 %tmp1 = load <1 x i64>* %A 358 %tmp2 = ashr <1 x i64> %tmp1, < i64 64 > 359 ret <1 x i64> %tmp2 360 } 361 362 define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 363 ;CHECK: vashrQs8: 364 ;CHECK: vneg.s8 365 ;CHECK: vshl.s8 366 %tmp1 = load <16 x i8>* %A 367 %tmp2 = load <16 x i8>* %B 368 %tmp3 = ashr <16 x i8> %tmp1, %tmp2 369 ret <16 x i8> %tmp3 370 } 371 372 define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 373 ;CHECK: vashrQs16: 374 ;CHECK: vneg.s16 375 ;CHECK: vshl.s16 376 %tmp1 = load <8 x i16>* %A 377 %tmp2 = load <8 x i16>* %B 378 %tmp3 = ashr <8 x i16> %tmp1, %tmp2 379 ret <8 x i16> %tmp3 380 } 381 382 define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 383 ;CHECK: vashrQs32: 384 ;CHECK: vneg.s32 385 ;CHECK: vshl.s32 386 %tmp1 = load <4 x i32>* %A 387 %tmp2 = load <4 x i32>* %B 388 %tmp3 = ashr <4 x i32> %tmp1, %tmp2 389 ret <4 x i32> %tmp3 390 } 391 392 define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 393 ;CHECK: vashrQs64: 394 ;CHECK: vsub.i64 395 ;CHECK: vshl.s64 396 %tmp1 = load <2 x i64>* %A 397 %tmp2 = load <2 x i64>* %B 398 %tmp3 = ashr <2 x i64> %tmp1, %tmp2 399 ret <2 x i64> %tmp3 400 } 401 402 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind { 403 ;CHECK: vashrQi8: 404 ;CHECK: vshr.s8 405 %tmp1 = load <16 x i8>* %A 406 %tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 407 ret <16 x i8> %tmp2 408 } 409 410 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind { 411 ;CHECK: vashrQi16: 412 ;CHECK: vshr.s16 413 %tmp1 = load <8 x i16>* %A 414 %tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > 415 ret <8 x i16> %tmp2 416 } 417 418 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind { 419 ;CHECK: vashrQi32: 420 ;CHECK: vshr.s32 421 %tmp1 = load <4 x i32>* %A 422 %tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > 423 ret <4 x i32> %tmp2 424 } 425 426 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind { 427 ;CHECK: vashrQi64: 428 ;CHECK: vshr.s64 429 %tmp1 = load <2 x i64>* %A 430 %tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 > 431 ret <2 x i64> %tmp2 432 } 433