Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
      2 
      3 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK-LABEL: vsras8:
      5 ;CHECK: vsra.s8
      6 	%tmp1 = load <8 x i8>, <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>, <8 x i8>* %B
      8 	%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
      9     %tmp4 = add <8 x i8> %tmp1, %tmp3
     10 	ret <8 x i8> %tmp4
     11 }
     12 
     13 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     14 ;CHECK-LABEL: vsras16:
     15 ;CHECK: vsra.s16
     16 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     17 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     18 	%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
     19         %tmp4 = add <4 x i16> %tmp1, %tmp3
     20 	ret <4 x i16> %tmp4
     21 }
     22 
     23 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     24 ;CHECK-LABEL: vsras32:
     25 ;CHECK: vsra.s32
     26 	%tmp1 = load <2 x i32>, <2 x i32>* %A
     27 	%tmp2 = load <2 x i32>, <2 x i32>* %B
     28 	%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
     29         %tmp4 = add <2 x i32> %tmp1, %tmp3
     30 	ret <2 x i32> %tmp4
     31 }
     32 
     33 define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     34 ;CHECK-LABEL: vsras64:
     35 ;CHECK: vsra.s64
     36 	%tmp1 = load <1 x i64>, <1 x i64>* %A
     37 	%tmp2 = load <1 x i64>, <1 x i64>* %B
     38 	%tmp3 = ashr <1 x i64> %tmp2, < i64 63 >
     39         %tmp4 = add <1 x i64> %tmp1, %tmp3
     40 	ret <1 x i64> %tmp4
     41 }
     42 
     43 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     44 ;CHECK-LABEL: vsraQs8:
     45 ;CHECK: vsra.s8
     46 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     47 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     48 	%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
     49         %tmp4 = add <16 x i8> %tmp1, %tmp3
     50 	ret <16 x i8> %tmp4
     51 }
     52 
     53 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     54 ;CHECK-LABEL: vsraQs16:
     55 ;CHECK: vsra.s16
     56 	%tmp1 = load <8 x i16>, <8 x i16>* %A
     57 	%tmp2 = load <8 x i16>, <8 x i16>* %B
     58 	%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
     59         %tmp4 = add <8 x i16> %tmp1, %tmp3
     60 	ret <8 x i16> %tmp4
     61 }
     62 
     63 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     64 ;CHECK-LABEL: vsraQs32:
     65 ;CHECK: vsra.s32
     66 	%tmp1 = load <4 x i32>, <4 x i32>* %A
     67 	%tmp2 = load <4 x i32>, <4 x i32>* %B
     68 	%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
     69         %tmp4 = add <4 x i32> %tmp1, %tmp3
     70 	ret <4 x i32> %tmp4
     71 }
     72 
     73 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
     74 ;CHECK-LABEL: vsraQs64:
     75 ;CHECK: vsra.s64
     76 	%tmp1 = load <2 x i64>, <2 x i64>* %A
     77 	%tmp2 = load <2 x i64>, <2 x i64>* %B
     78 	%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
     79         %tmp4 = add <2 x i64> %tmp1, %tmp3
     80 	ret <2 x i64> %tmp4
     81 }
     82 
     83 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     84 ;CHECK-LABEL: vsrau8:
     85 ;CHECK: vsra.u8
     86 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     87 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     88 	%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
     89         %tmp4 = add <8 x i8> %tmp1, %tmp3
     90 	ret <8 x i8> %tmp4
     91 }
     92 
     93 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     94 ;CHECK-LABEL: vsrau16:
     95 ;CHECK: vsra.u16
     96 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     97 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     98 	%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
     99         %tmp4 = add <4 x i16> %tmp1, %tmp3
    100 	ret <4 x i16> %tmp4
    101 }
    102 
    103 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    104 ;CHECK-LABEL: vsrau32:
    105 ;CHECK: vsra.u32
    106 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    107 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    108 	%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
    109         %tmp4 = add <2 x i32> %tmp1, %tmp3
    110 	ret <2 x i32> %tmp4
    111 }
    112 
    113 define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
    114 ;CHECK-LABEL: vsrau64:
    115 ;CHECK: vsra.u64
    116 	%tmp1 = load <1 x i64>, <1 x i64>* %A
    117 	%tmp2 = load <1 x i64>, <1 x i64>* %B
    118 	%tmp3 = lshr <1 x i64> %tmp2, < i64 63 >
    119         %tmp4 = add <1 x i64> %tmp1, %tmp3
    120 	ret <1 x i64> %tmp4
    121 }
    122 
    123 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    124 ;CHECK-LABEL: vsraQu8:
    125 ;CHECK: vsra.u8
    126 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    127 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    128 	%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
    129         %tmp4 = add <16 x i8> %tmp1, %tmp3
    130 	ret <16 x i8> %tmp4
    131 }
    132 
    133 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    134 ;CHECK-LABEL: vsraQu16:
    135 ;CHECK: vsra.u16
    136 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    137 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    138 	%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
    139         %tmp4 = add <8 x i16> %tmp1, %tmp3
    140 	ret <8 x i16> %tmp4
    141 }
    142 
    143 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    144 ;CHECK-LABEL: vsraQu32:
    145 ;CHECK: vsra.u32
    146 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    147 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    148 	%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
    149         %tmp4 = add <4 x i32> %tmp1, %tmp3
    150 	ret <4 x i32> %tmp4
    151 }
    152 
    153 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    154 ;CHECK-LABEL: vsraQu64:
    155 ;CHECK: vsra.u64
    156 	%tmp1 = load <2 x i64>, <2 x i64>* %A
    157 	%tmp2 = load <2 x i64>, <2 x i64>* %B
    158 	%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
    159         %tmp4 = add <2 x i64> %tmp1, %tmp3
    160 	ret <2 x i64> %tmp4
    161 }
    162 
    163 define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    164 ;CHECK-LABEL: vrsras8:
    165 ;CHECK: vrsra.s8
    166 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    167 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    168 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
    169         %tmp4 = add <8 x i8> %tmp1, %tmp3
    170 	ret <8 x i8> %tmp4
    171 }
    172 
    173 define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    174 ;CHECK-LABEL: vrsras16:
    175 ;CHECK: vrsra.s16
    176 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    177 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    178 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
    179         %tmp4 = add <4 x i16> %tmp1, %tmp3
    180 	ret <4 x i16> %tmp4
    181 }
    182 
    183 define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    184 ;CHECK-LABEL: vrsras32:
    185 ;CHECK: vrsra.s32
    186 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    187 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    188 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
    189         %tmp4 = add <2 x i32> %tmp1, %tmp3
    190 	ret <2 x i32> %tmp4
    191 }
    192 
    193 define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
    194 ;CHECK-LABEL: vrsras64:
    195 ;CHECK: vrsra.s64
    196 	%tmp1 = load <1 x i64>, <1 x i64>* %A
    197 	%tmp2 = load <1 x i64>, <1 x i64>* %B
    198 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
    199         %tmp4 = add <1 x i64> %tmp1, %tmp3
    200 	ret <1 x i64> %tmp4
    201 }
    202 
    203 define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    204 ;CHECK-LABEL: vrsrau8:
    205 ;CHECK: vrsra.u8
    206 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    207 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    208 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
    209         %tmp4 = add <8 x i8> %tmp1, %tmp3
    210 	ret <8 x i8> %tmp4
    211 }
    212 
    213 define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    214 ;CHECK-LABEL: vrsrau16:
    215 ;CHECK: vrsra.u16
    216 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    217 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    218 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
    219         %tmp4 = add <4 x i16> %tmp1, %tmp3
    220 	ret <4 x i16> %tmp4
    221 }
    222 
    223 define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    224 ;CHECK-LABEL: vrsrau32:
    225 ;CHECK: vrsra.u32
    226 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    227 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    228 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
    229         %tmp4 = add <2 x i32> %tmp1, %tmp3
    230 	ret <2 x i32> %tmp4
    231 }
    232 
    233 define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
    234 ;CHECK-LABEL: vrsrau64:
    235 ;CHECK: vrsra.u64
    236 	%tmp1 = load <1 x i64>, <1 x i64>* %A
    237 	%tmp2 = load <1 x i64>, <1 x i64>* %B
    238 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
    239         %tmp4 = add <1 x i64> %tmp1, %tmp3
    240 	ret <1 x i64> %tmp4
    241 }
    242 
    243 define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    244 ;CHECK-LABEL: vrsraQs8:
    245 ;CHECK: vrsra.s8
    246 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    247 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    248 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
    249         %tmp4 = add <16 x i8> %tmp1, %tmp3
    250 	ret <16 x i8> %tmp4
    251 }
    252 
    253 define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    254 ;CHECK-LABEL: vrsraQs16:
    255 ;CHECK: vrsra.s16
    256 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    257 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    258 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
    259         %tmp4 = add <8 x i16> %tmp1, %tmp3
    260 	ret <8 x i16> %tmp4
    261 }
    262 
    263 define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    264 ;CHECK-LABEL: vrsraQs32:
    265 ;CHECK: vrsra.s32
    266 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    267 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    268 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
    269         %tmp4 = add <4 x i32> %tmp1, %tmp3
    270 	ret <4 x i32> %tmp4
    271 }
    272 
    273 define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    274 ;CHECK-LABEL: vrsraQs64:
    275 ;CHECK: vrsra.s64
    276 	%tmp1 = load <2 x i64>, <2 x i64>* %A
    277 	%tmp2 = load <2 x i64>, <2 x i64>* %B
    278 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
    279         %tmp4 = add <2 x i64> %tmp1, %tmp3
    280 	ret <2 x i64> %tmp4
    281 }
    282 
    283 define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    284 ;CHECK-LABEL: vrsraQu8:
    285 ;CHECK: vrsra.u8
    286 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    287 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    288 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
    289         %tmp4 = add <16 x i8> %tmp1, %tmp3
    290 	ret <16 x i8> %tmp4
    291 }
    292 
    293 define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    294 ;CHECK-LABEL: vrsraQu16:
    295 ;CHECK: vrsra.u16
    296 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    297 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    298 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
    299         %tmp4 = add <8 x i16> %tmp1, %tmp3
    300 	ret <8 x i16> %tmp4
    301 }
    302 
    303 define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    304 ;CHECK-LABEL: vrsraQu32:
    305 ;CHECK: vrsra.u32
    306 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    307 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    308 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
    309         %tmp4 = add <4 x i32> %tmp1, %tmp3
    310 	ret <4 x i32> %tmp4
    311 }
    312 
    313 define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    314 ;CHECK-LABEL: vrsraQu64:
    315 ;CHECK: vrsra.u64
    316 	%tmp1 = load <2 x i64>, <2 x i64>* %A
    317 	%tmp2 = load <2 x i64>, <2 x i64>* %B
    318 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
    319         %tmp4 = add <2 x i64> %tmp1, %tmp3
    320 	ret <2 x i64> %tmp4
    321 }
    322 
    323 declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    324 declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    325 declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    326 declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    327 
    328 declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    329 declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    330 declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    331 declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
    332 
    333 declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    334 declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    335 declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    336 declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    337 
    338 declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    339 declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    340 declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    341 declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
    342