Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vshls8:
      5 ;CHECK: vshl.u8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>* %B
      8 	%tmp3 = shl <8 x i8> %tmp1, %tmp2
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK: vshls16:
     14 ;CHECK: vshl.u16
     15 	%tmp1 = load <4 x i16>* %A
     16 	%tmp2 = load <4 x i16>* %B
     17 	%tmp3 = shl <4 x i16> %tmp1, %tmp2
     18 	ret <4 x i16> %tmp3
     19 }
     20 
     21 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK: vshls32:
     23 ;CHECK: vshl.u32
     24 	%tmp1 = load <2 x i32>* %A
     25 	%tmp2 = load <2 x i32>* %B
     26 	%tmp3 = shl <2 x i32> %tmp1, %tmp2
     27 	ret <2 x i32> %tmp3
     28 }
     29 
     30 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     31 ;CHECK: vshls64:
     32 ;CHECK: vshl.u64
     33 	%tmp1 = load <1 x i64>* %A
     34 	%tmp2 = load <1 x i64>* %B
     35 	%tmp3 = shl <1 x i64> %tmp1, %tmp2
     36 	ret <1 x i64> %tmp3
     37 }
     38 
     39 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
     40 ;CHECK: vshli8:
     41 ;CHECK: vshl.i8
     42 	%tmp1 = load <8 x i8>* %A
     43 	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
     44 	ret <8 x i8> %tmp2
     45 }
     46 
     47 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
     48 ;CHECK: vshli16:
     49 ;CHECK: vshl.i16
     50 	%tmp1 = load <4 x i16>* %A
     51 	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
     52 	ret <4 x i16> %tmp2
     53 }
     54 
     55 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
     56 ;CHECK: vshli32:
     57 ;CHECK: vshl.i32
     58 	%tmp1 = load <2 x i32>* %A
     59 	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
     60 	ret <2 x i32> %tmp2
     61 }
     62 
     63 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
     64 ;CHECK: vshli64:
     65 ;CHECK: vshl.i64
     66 	%tmp1 = load <1 x i64>* %A
     67 	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
     68 	ret <1 x i64> %tmp2
     69 }
     70 
     71 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     72 ;CHECK: vshlQs8:
     73 ;CHECK: vshl.u8
     74 	%tmp1 = load <16 x i8>* %A
     75 	%tmp2 = load <16 x i8>* %B
     76 	%tmp3 = shl <16 x i8> %tmp1, %tmp2
     77 	ret <16 x i8> %tmp3
     78 }
     79 
     80 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     81 ;CHECK: vshlQs16:
     82 ;CHECK: vshl.u16
     83 	%tmp1 = load <8 x i16>* %A
     84 	%tmp2 = load <8 x i16>* %B
     85 	%tmp3 = shl <8 x i16> %tmp1, %tmp2
     86 	ret <8 x i16> %tmp3
     87 }
     88 
     89 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     90 ;CHECK: vshlQs32:
     91 ;CHECK: vshl.u32
     92 	%tmp1 = load <4 x i32>* %A
     93 	%tmp2 = load <4 x i32>* %B
     94 	%tmp3 = shl <4 x i32> %tmp1, %tmp2
     95 	ret <4 x i32> %tmp3
     96 }
     97 
     98 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
     99 ;CHECK: vshlQs64:
    100 ;CHECK: vshl.u64
    101 	%tmp1 = load <2 x i64>* %A
    102 	%tmp2 = load <2 x i64>* %B
    103 	%tmp3 = shl <2 x i64> %tmp1, %tmp2
    104 	ret <2 x i64> %tmp3
    105 }
    106 
    107 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
    108 ;CHECK: vshlQi8:
    109 ;CHECK: vshl.i8
    110 	%tmp1 = load <16 x i8>* %A
    111 	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
    112 	ret <16 x i8> %tmp2
    113 }
    114 
    115 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
    116 ;CHECK: vshlQi16:
    117 ;CHECK: vshl.i16
    118 	%tmp1 = load <8 x i16>* %A
    119 	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
    120 	ret <8 x i16> %tmp2
    121 }
    122 
    123 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
    124 ;CHECK: vshlQi32:
    125 ;CHECK: vshl.i32
    126 	%tmp1 = load <4 x i32>* %A
    127 	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
    128 	ret <4 x i32> %tmp2
    129 }
    130 
    131 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
    132 ;CHECK: vshlQi64:
    133 ;CHECK: vshl.i64
    134 	%tmp1 = load <2 x i64>* %A
    135 	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
    136 	ret <2 x i64> %tmp2
    137 }
    138 
    139 define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    140 ;CHECK: vlshru8:
    141 ;CHECK: vneg.s8
    142 ;CHECK: vshl.u8
    143 	%tmp1 = load <8 x i8>* %A
    144 	%tmp2 = load <8 x i8>* %B
    145 	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
    146 	ret <8 x i8> %tmp3
    147 }
    148 
    149 define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    150 ;CHECK: vlshru16:
    151 ;CHECK: vneg.s16
    152 ;CHECK: vshl.u16
    153 	%tmp1 = load <4 x i16>* %A
    154 	%tmp2 = load <4 x i16>* %B
    155 	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
    156 	ret <4 x i16> %tmp3
    157 }
    158 
    159 define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    160 ;CHECK: vlshru32:
    161 ;CHECK: vneg.s32
    162 ;CHECK: vshl.u32
    163 	%tmp1 = load <2 x i32>* %A
    164 	%tmp2 = load <2 x i32>* %B
    165 	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
    166 	ret <2 x i32> %tmp3
    167 }
    168 
    169 define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
    170 ;CHECK: vlshru64:
    171 ;CHECK: vsub.i64
    172 ;CHECK: vshl.u64
    173 	%tmp1 = load <1 x i64>* %A
    174 	%tmp2 = load <1 x i64>* %B
    175 	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
    176 	ret <1 x i64> %tmp3
    177 }
    178 
    179 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
    180 ;CHECK: vlshri8:
    181 ;CHECK: vshr.u8
    182 	%tmp1 = load <8 x i8>* %A
    183 	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
    184 	ret <8 x i8> %tmp2
    185 }
    186 
    187 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
    188 ;CHECK: vlshri16:
    189 ;CHECK: vshr.u16
    190 	%tmp1 = load <4 x i16>* %A
    191 	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
    192 	ret <4 x i16> %tmp2
    193 }
    194 
    195 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
    196 ;CHECK: vlshri32:
    197 ;CHECK: vshr.u32
    198 	%tmp1 = load <2 x i32>* %A
    199 	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
    200 	ret <2 x i32> %tmp2
    201 }
    202 
    203 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
    204 ;CHECK: vlshri64:
    205 ;CHECK: vshr.u64
    206 	%tmp1 = load <1 x i64>* %A
    207 	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
    208 	ret <1 x i64> %tmp2
    209 }
    210 
    211 define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    212 ;CHECK: vlshrQu8:
    213 ;CHECK: vneg.s8
    214 ;CHECK: vshl.u8
    215 	%tmp1 = load <16 x i8>* %A
    216 	%tmp2 = load <16 x i8>* %B
    217 	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
    218 	ret <16 x i8> %tmp3
    219 }
    220 
    221 define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    222 ;CHECK: vlshrQu16:
    223 ;CHECK: vneg.s16
    224 ;CHECK: vshl.u16
    225 	%tmp1 = load <8 x i16>* %A
    226 	%tmp2 = load <8 x i16>* %B
    227 	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
    228 	ret <8 x i16> %tmp3
    229 }
    230 
    231 define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    232 ;CHECK: vlshrQu32:
    233 ;CHECK: vneg.s32
    234 ;CHECK: vshl.u32
    235 	%tmp1 = load <4 x i32>* %A
    236 	%tmp2 = load <4 x i32>* %B
    237 	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
    238 	ret <4 x i32> %tmp3
    239 }
    240 
    241 define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    242 ;CHECK: vlshrQu64:
    243 ;CHECK: vsub.i64
    244 ;CHECK: vshl.u64
    245 	%tmp1 = load <2 x i64>* %A
    246 	%tmp2 = load <2 x i64>* %B
    247 	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
    248 	ret <2 x i64> %tmp3
    249 }
    250 
    251 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
    252 ;CHECK: vlshrQi8:
    253 ;CHECK: vshr.u8
    254 	%tmp1 = load <16 x i8>* %A
    255 	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
    256 	ret <16 x i8> %tmp2
    257 }
    258 
    259 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
    260 ;CHECK: vlshrQi16:
    261 ;CHECK: vshr.u16
    262 	%tmp1 = load <8 x i16>* %A
    263 	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
    264 	ret <8 x i16> %tmp2
    265 }
    266 
    267 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
    268 ;CHECK: vlshrQi32:
    269 ;CHECK: vshr.u32
    270 	%tmp1 = load <4 x i32>* %A
    271 	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
    272 	ret <4 x i32> %tmp2
    273 }
    274 
    275 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
    276 ;CHECK: vlshrQi64:
    277 ;CHECK: vshr.u64
    278 	%tmp1 = load <2 x i64>* %A
    279 	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
    280 	ret <2 x i64> %tmp2
    281 }
    282 
    283 ; Example that requires splitting and expanding a vector shift.
    284 define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
    285 entry:
    286 	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
    287 	ret <2 x i64> %shr
    288 }
    289 
    290 define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    291 ;CHECK: vashrs8:
    292 ;CHECK: vneg.s8
    293 ;CHECK: vshl.s8
    294 	%tmp1 = load <8 x i8>* %A
    295 	%tmp2 = load <8 x i8>* %B
    296 	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
    297 	ret <8 x i8> %tmp3
    298 }
    299 
    300 define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    301 ;CHECK: vashrs16:
    302 ;CHECK: vneg.s16
    303 ;CHECK: vshl.s16
    304 	%tmp1 = load <4 x i16>* %A
    305 	%tmp2 = load <4 x i16>* %B
    306 	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
    307 	ret <4 x i16> %tmp3
    308 }
    309 
    310 define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    311 ;CHECK: vashrs32:
    312 ;CHECK: vneg.s32
    313 ;CHECK: vshl.s32
    314 	%tmp1 = load <2 x i32>* %A
    315 	%tmp2 = load <2 x i32>* %B
    316 	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
    317 	ret <2 x i32> %tmp3
    318 }
    319 
    320 define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
    321 ;CHECK: vashrs64:
    322 ;CHECK: vsub.i64
    323 ;CHECK: vshl.s64
    324 	%tmp1 = load <1 x i64>* %A
    325 	%tmp2 = load <1 x i64>* %B
    326 	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
    327 	ret <1 x i64> %tmp3
    328 }
    329 
    330 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
    331 ;CHECK: vashri8:
    332 ;CHECK: vshr.s8
    333 	%tmp1 = load <8 x i8>* %A
    334 	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
    335 	ret <8 x i8> %tmp2
    336 }
    337 
    338 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
    339 ;CHECK: vashri16:
    340 ;CHECK: vshr.s16
    341 	%tmp1 = load <4 x i16>* %A
    342 	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
    343 	ret <4 x i16> %tmp2
    344 }
    345 
    346 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
    347 ;CHECK: vashri32:
    348 ;CHECK: vshr.s32
    349 	%tmp1 = load <2 x i32>* %A
    350 	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
    351 	ret <2 x i32> %tmp2
    352 }
    353 
    354 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
    355 ;CHECK: vashri64:
    356 ;CHECK: vshr.s64
    357 	%tmp1 = load <1 x i64>* %A
    358 	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
    359 	ret <1 x i64> %tmp2
    360 }
    361 
    362 define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    363 ;CHECK: vashrQs8:
    364 ;CHECK: vneg.s8
    365 ;CHECK: vshl.s8
    366 	%tmp1 = load <16 x i8>* %A
    367 	%tmp2 = load <16 x i8>* %B
    368 	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
    369 	ret <16 x i8> %tmp3
    370 }
    371 
    372 define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    373 ;CHECK: vashrQs16:
    374 ;CHECK: vneg.s16
    375 ;CHECK: vshl.s16
    376 	%tmp1 = load <8 x i16>* %A
    377 	%tmp2 = load <8 x i16>* %B
    378 	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
    379 	ret <8 x i16> %tmp3
    380 }
    381 
    382 define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    383 ;CHECK: vashrQs32:
    384 ;CHECK: vneg.s32
    385 ;CHECK: vshl.s32
    386 	%tmp1 = load <4 x i32>* %A
    387 	%tmp2 = load <4 x i32>* %B
    388 	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
    389 	ret <4 x i32> %tmp3
    390 }
    391 
    392 define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    393 ;CHECK: vashrQs64:
    394 ;CHECK: vsub.i64
    395 ;CHECK: vshl.s64
    396 	%tmp1 = load <2 x i64>* %A
    397 	%tmp2 = load <2 x i64>* %B
    398 	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
    399 	ret <2 x i64> %tmp3
    400 }
    401 
    402 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
    403 ;CHECK: vashrQi8:
    404 ;CHECK: vshr.s8
    405 	%tmp1 = load <16 x i8>* %A
    406 	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
    407 	ret <16 x i8> %tmp2
    408 }
    409 
    410 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
    411 ;CHECK: vashrQi16:
    412 ;CHECK: vshr.s16
    413 	%tmp1 = load <8 x i16>* %A
    414 	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
    415 	ret <8 x i16> %tmp2
    416 }
    417 
    418 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
    419 ;CHECK: vashrQi32:
    420 ;CHECK: vshr.s32
    421 	%tmp1 = load <4 x i32>* %A
    422 	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
    423 	ret <4 x i32> %tmp2
    424 }
    425 
    426 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
    427 ;CHECK: vashrQi64:
    428 ;CHECK: vshr.s64
    429 	%tmp1 = load <2 x i64>* %A
    430 	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
    431 	ret <2 x i64> %tmp2
    432 }
    433