Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vsubi8:
      5 ;CHECK: vsub.i8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>* %B
      8 	%tmp3 = sub <8 x i8> %tmp1, %tmp2
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK: vsubi16:
     14 ;CHECK: vsub.i16
     15 	%tmp1 = load <4 x i16>* %A
     16 	%tmp2 = load <4 x i16>* %B
     17 	%tmp3 = sub <4 x i16> %tmp1, %tmp2
     18 	ret <4 x i16> %tmp3
     19 }
     20 
     21 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK: vsubi32:
     23 ;CHECK: vsub.i32
     24 	%tmp1 = load <2 x i32>* %A
     25 	%tmp2 = load <2 x i32>* %B
     26 	%tmp3 = sub <2 x i32> %tmp1, %tmp2
     27 	ret <2 x i32> %tmp3
     28 }
     29 
     30 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     31 ;CHECK: vsubi64:
     32 ;CHECK: vsub.i64
     33 	%tmp1 = load <1 x i64>* %A
     34 	%tmp2 = load <1 x i64>* %B
     35 	%tmp3 = sub <1 x i64> %tmp1, %tmp2
     36 	ret <1 x i64> %tmp3
     37 }
     38 
     39 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     40 ;CHECK: vsubf32:
     41 ;CHECK: vsub.f32
     42 	%tmp1 = load <2 x float>* %A
     43 	%tmp2 = load <2 x float>* %B
     44 	%tmp3 = fsub <2 x float> %tmp1, %tmp2
     45 	ret <2 x float> %tmp3
     46 }
     47 
     48 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     49 ;CHECK: vsubQi8:
     50 ;CHECK: vsub.i8
     51 	%tmp1 = load <16 x i8>* %A
     52 	%tmp2 = load <16 x i8>* %B
     53 	%tmp3 = sub <16 x i8> %tmp1, %tmp2
     54 	ret <16 x i8> %tmp3
     55 }
     56 
     57 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     58 ;CHECK: vsubQi16:
     59 ;CHECK: vsub.i16
     60 	%tmp1 = load <8 x i16>* %A
     61 	%tmp2 = load <8 x i16>* %B
     62 	%tmp3 = sub <8 x i16> %tmp1, %tmp2
     63 	ret <8 x i16> %tmp3
     64 }
     65 
     66 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     67 ;CHECK: vsubQi32:
     68 ;CHECK: vsub.i32
     69 	%tmp1 = load <4 x i32>* %A
     70 	%tmp2 = load <4 x i32>* %B
     71 	%tmp3 = sub <4 x i32> %tmp1, %tmp2
     72 	ret <4 x i32> %tmp3
     73 }
     74 
     75 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
     76 ;CHECK: vsubQi64:
     77 ;CHECK: vsub.i64
     78 	%tmp1 = load <2 x i64>* %A
     79 	%tmp2 = load <2 x i64>* %B
     80 	%tmp3 = sub <2 x i64> %tmp1, %tmp2
     81 	ret <2 x i64> %tmp3
     82 }
     83 
     84 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
     85 ;CHECK: vsubQf32:
     86 ;CHECK: vsub.f32
     87 	%tmp1 = load <4 x float>* %A
     88 	%tmp2 = load <4 x float>* %B
     89 	%tmp3 = fsub <4 x float> %tmp1, %tmp2
     90 	ret <4 x float> %tmp3
     91 }
     92 
     93 define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     94 ;CHECK: vsubhni16:
     95 ;CHECK: vsubhn.i16
     96 	%tmp1 = load <8 x i16>* %A
     97 	%tmp2 = load <8 x i16>* %B
     98 	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
     99 	ret <8 x i8> %tmp3
    100 }
    101 
    102 define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    103 ;CHECK: vsubhni32:
    104 ;CHECK: vsubhn.i32
    105 	%tmp1 = load <4 x i32>* %A
    106 	%tmp2 = load <4 x i32>* %B
    107 	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
    108 	ret <4 x i16> %tmp3
    109 }
    110 
    111 define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    112 ;CHECK: vsubhni64:
    113 ;CHECK: vsubhn.i64
    114 	%tmp1 = load <2 x i64>* %A
    115 	%tmp2 = load <2 x i64>* %B
    116 	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
    117 	ret <2 x i32> %tmp3
    118 }
    119 
    120 declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
    121 declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
    122 declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
    123 
    124 define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    125 ;CHECK: vrsubhni16:
    126 ;CHECK: vrsubhn.i16
    127 	%tmp1 = load <8 x i16>* %A
    128 	%tmp2 = load <8 x i16>* %B
    129 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
    130 	ret <8 x i8> %tmp3
    131 }
    132 
    133 define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    134 ;CHECK: vrsubhni32:
    135 ;CHECK: vrsubhn.i32
    136 	%tmp1 = load <4 x i32>* %A
    137 	%tmp2 = load <4 x i32>* %B
    138 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
    139 	ret <4 x i16> %tmp3
    140 }
    141 
    142 define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    143 ;CHECK: vrsubhni64:
    144 ;CHECK: vrsubhn.i64
    145 	%tmp1 = load <2 x i64>* %A
    146 	%tmp2 = load <2 x i64>* %B
    147 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
    148 	ret <2 x i32> %tmp3
    149 }
    150 
    151 declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
    152 declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
    153 declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
    154 
    155 define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    156 ;CHECK: vsubls8:
    157 ;CHECK: vsubl.s8
    158 	%tmp1 = load <8 x i8>* %A
    159 	%tmp2 = load <8 x i8>* %B
    160 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
    161 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
    162 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
    163 	ret <8 x i16> %tmp5
    164 }
    165 
    166 define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    167 ;CHECK: vsubls16:
    168 ;CHECK: vsubl.s16
    169 	%tmp1 = load <4 x i16>* %A
    170 	%tmp2 = load <4 x i16>* %B
    171 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
    172 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
    173 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
    174 	ret <4 x i32> %tmp5
    175 }
    176 
    177 define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    178 ;CHECK: vsubls32:
    179 ;CHECK: vsubl.s32
    180 	%tmp1 = load <2 x i32>* %A
    181 	%tmp2 = load <2 x i32>* %B
    182 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
    183 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
    184 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
    185 	ret <2 x i64> %tmp5
    186 }
    187 
    188 define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    189 ;CHECK: vsublu8:
    190 ;CHECK: vsubl.u8
    191 	%tmp1 = load <8 x i8>* %A
    192 	%tmp2 = load <8 x i8>* %B
    193 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
    194 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
    195 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
    196 	ret <8 x i16> %tmp5
    197 }
    198 
    199 define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    200 ;CHECK: vsublu16:
    201 ;CHECK: vsubl.u16
    202 	%tmp1 = load <4 x i16>* %A
    203 	%tmp2 = load <4 x i16>* %B
    204 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
    205 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
    206 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
    207 	ret <4 x i32> %tmp5
    208 }
    209 
    210 define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    211 ;CHECK: vsublu32:
    212 ;CHECK: vsubl.u32
    213 	%tmp1 = load <2 x i32>* %A
    214 	%tmp2 = load <2 x i32>* %B
    215 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
    216 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
    217 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
    218 	ret <2 x i64> %tmp5
    219 }
    220 
    221 define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
    222 ;CHECK: vsubws8:
    223 ;CHECK: vsubw.s8
    224 	%tmp1 = load <8 x i16>* %A
    225 	%tmp2 = load <8 x i8>* %B
    226 	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
    227 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
    228 	ret <8 x i16> %tmp4
    229 }
    230 
    231 define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
    232 ;CHECK: vsubws16:
    233 ;CHECK: vsubw.s16
    234 	%tmp1 = load <4 x i32>* %A
    235 	%tmp2 = load <4 x i16>* %B
    236 	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
    237 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
    238 	ret <4 x i32> %tmp4
    239 }
    240 
    241 define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
    242 ;CHECK: vsubws32:
    243 ;CHECK: vsubw.s32
    244 	%tmp1 = load <2 x i64>* %A
    245 	%tmp2 = load <2 x i32>* %B
    246 	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
    247 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
    248 	ret <2 x i64> %tmp4
    249 }
    250 
    251 define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
    252 ;CHECK: vsubwu8:
    253 ;CHECK: vsubw.u8
    254 	%tmp1 = load <8 x i16>* %A
    255 	%tmp2 = load <8 x i8>* %B
    256 	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
    257 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
    258 	ret <8 x i16> %tmp4
    259 }
    260 
    261 define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
    262 ;CHECK: vsubwu16:
    263 ;CHECK: vsubw.u16
    264 	%tmp1 = load <4 x i32>* %A
    265 	%tmp2 = load <4 x i16>* %B
    266 	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
    267 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
    268 	ret <4 x i32> %tmp4
    269 }
    270 
    271 define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
    272 ;CHECK: vsubwu32:
    273 ;CHECK: vsubw.u32
    274 	%tmp1 = load <2 x i64>* %A
    275 	%tmp2 = load <2 x i32>* %B
    276 	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
    277 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
    278 	ret <2 x i64> %tmp4
    279 }
    280