Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
      2 
      3 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK-LABEL: vsubi8:
      5 ;CHECK: vsub.i8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>* %B
      8 	%tmp3 = sub <8 x i8> %tmp1, %tmp2
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK-LABEL: vsubi16:
     14 ;CHECK: vsub.i16
     15 	%tmp1 = load <4 x i16>* %A
     16 	%tmp2 = load <4 x i16>* %B
     17 	%tmp3 = sub <4 x i16> %tmp1, %tmp2
     18 	ret <4 x i16> %tmp3
     19 }
     20 
     21 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK-LABEL: vsubi32:
     23 ;CHECK: vsub.i32
     24 	%tmp1 = load <2 x i32>* %A
     25 	%tmp2 = load <2 x i32>* %B
     26 	%tmp3 = sub <2 x i32> %tmp1, %tmp2
     27 	ret <2 x i32> %tmp3
     28 }
     29 
     30 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     31 ;CHECK-LABEL: vsubi64:
     32 ;CHECK: vsub.i64
     33 	%tmp1 = load <1 x i64>* %A
     34 	%tmp2 = load <1 x i64>* %B
     35 	%tmp3 = sub <1 x i64> %tmp1, %tmp2
     36 	ret <1 x i64> %tmp3
     37 }
     38 
     39 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     40 ;CHECK-LABEL: vsubf32:
     41 ;CHECK: vsub.f32
     42 	%tmp1 = load <2 x float>* %A
     43 	%tmp2 = load <2 x float>* %B
     44 	%tmp3 = fsub <2 x float> %tmp1, %tmp2
     45 	ret <2 x float> %tmp3
     46 }
     47 
     48 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     49 ;CHECK-LABEL: vsubQi8:
     50 ;CHECK: vsub.i8
     51 	%tmp1 = load <16 x i8>* %A
     52 	%tmp2 = load <16 x i8>* %B
     53 	%tmp3 = sub <16 x i8> %tmp1, %tmp2
     54 	ret <16 x i8> %tmp3
     55 }
     56 
     57 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     58 ;CHECK-LABEL: vsubQi16:
     59 ;CHECK: vsub.i16
     60 	%tmp1 = load <8 x i16>* %A
     61 	%tmp2 = load <8 x i16>* %B
     62 	%tmp3 = sub <8 x i16> %tmp1, %tmp2
     63 	ret <8 x i16> %tmp3
     64 }
     65 
     66 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     67 ;CHECK-LABEL: vsubQi32:
     68 ;CHECK: vsub.i32
     69 	%tmp1 = load <4 x i32>* %A
     70 	%tmp2 = load <4 x i32>* %B
     71 	%tmp3 = sub <4 x i32> %tmp1, %tmp2
     72 	ret <4 x i32> %tmp3
     73 }
     74 
     75 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
     76 ;CHECK-LABEL: vsubQi64:
     77 ;CHECK: vsub.i64
     78 	%tmp1 = load <2 x i64>* %A
     79 	%tmp2 = load <2 x i64>* %B
     80 	%tmp3 = sub <2 x i64> %tmp1, %tmp2
     81 	ret <2 x i64> %tmp3
     82 }
     83 
     84 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
     85 ;CHECK-LABEL: vsubQf32:
     86 ;CHECK: vsub.f32
     87 	%tmp1 = load <4 x float>* %A
     88 	%tmp2 = load <4 x float>* %B
     89 	%tmp3 = fsub <4 x float> %tmp1, %tmp2
     90 	ret <4 x float> %tmp3
     91 }
     92 
     93 define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind {
     94 ; CHECK-LABEL: vsubhni16_natural:
     95 ; CHECK: vsubhn.i16
     96   %sum = sub <8 x i16> %A, %B
     97   %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
     98   %trunc = trunc <8 x i16> %shift to <8 x i8>
     99   ret <8 x i8> %trunc
    100 }
    101 
    102 define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind {
    103 ; CHECK-LABEL: vsubhni32_natural:
    104 ; CHECK: vsubhn.i32
    105   %sum = sub <4 x i32> %A, %B
    106   %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
    107   %trunc = trunc <4 x i32> %shift to <4 x i16>
    108   ret <4 x i16> %trunc
    109 }
    110 
    111 define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
    112 ; CHECK-LABEL: vsubhni64_natural:
    113 ; CHECK: vsubhn.i64
    114   %sum = sub <2 x i64> %A, %B
    115   %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
    116   %trunc = trunc <2 x i64> %shift to <2 x i32>
    117   ret <2 x i32> %trunc
    118 }
    119 
    120 define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    121 ;CHECK-LABEL: vrsubhni16:
    122 ;CHECK: vrsubhn.i16
    123 	%tmp1 = load <8 x i16>* %A
    124 	%tmp2 = load <8 x i16>* %B
    125 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
    126 	ret <8 x i8> %tmp3
    127 }
    128 
    129 define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    130 ;CHECK-LABEL: vrsubhni32:
    131 ;CHECK: vrsubhn.i32
    132 	%tmp1 = load <4 x i32>* %A
    133 	%tmp2 = load <4 x i32>* %B
    134 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
    135 	ret <4 x i16> %tmp3
    136 }
    137 
    138 define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
    139 ;CHECK-LABEL: vrsubhni64:
    140 ;CHECK: vrsubhn.i64
    141 	%tmp1 = load <2 x i64>* %A
    142 	%tmp2 = load <2 x i64>* %B
    143 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
    144 	ret <2 x i32> %tmp3
    145 }
    146 
    147 declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
    148 declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
    149 declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
    150 
    151 define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    152 ;CHECK-LABEL: vsubls8:
    153 ;CHECK: vsubl.s8
    154 	%tmp1 = load <8 x i8>* %A
    155 	%tmp2 = load <8 x i8>* %B
    156 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
    157 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
    158 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
    159 	ret <8 x i16> %tmp5
    160 }
    161 
    162 define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    163 ;CHECK-LABEL: vsubls16:
    164 ;CHECK: vsubl.s16
    165 	%tmp1 = load <4 x i16>* %A
    166 	%tmp2 = load <4 x i16>* %B
    167 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
    168 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
    169 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
    170 	ret <4 x i32> %tmp5
    171 }
    172 
    173 define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    174 ;CHECK-LABEL: vsubls32:
    175 ;CHECK: vsubl.s32
    176 	%tmp1 = load <2 x i32>* %A
    177 	%tmp2 = load <2 x i32>* %B
    178 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
    179 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
    180 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
    181 	ret <2 x i64> %tmp5
    182 }
    183 
    184 define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    185 ;CHECK-LABEL: vsublu8:
    186 ;CHECK: vsubl.u8
    187 	%tmp1 = load <8 x i8>* %A
    188 	%tmp2 = load <8 x i8>* %B
    189 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
    190 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
    191 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
    192 	ret <8 x i16> %tmp5
    193 }
    194 
    195 define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    196 ;CHECK-LABEL: vsublu16:
    197 ;CHECK: vsubl.u16
    198 	%tmp1 = load <4 x i16>* %A
    199 	%tmp2 = load <4 x i16>* %B
    200 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
    201 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
    202 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
    203 	ret <4 x i32> %tmp5
    204 }
    205 
    206 define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    207 ;CHECK-LABEL: vsublu32:
    208 ;CHECK: vsubl.u32
    209 	%tmp1 = load <2 x i32>* %A
    210 	%tmp2 = load <2 x i32>* %B
    211 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
    212 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
    213 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
    214 	ret <2 x i64> %tmp5
    215 }
    216 
    217 define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
    218 ;CHECK-LABEL: vsubws8:
    219 ;CHECK: vsubw.s8
    220 	%tmp1 = load <8 x i16>* %A
    221 	%tmp2 = load <8 x i8>* %B
    222 	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
    223 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
    224 	ret <8 x i16> %tmp4
    225 }
    226 
    227 define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
    228 ;CHECK-LABEL: vsubws16:
    229 ;CHECK: vsubw.s16
    230 	%tmp1 = load <4 x i32>* %A
    231 	%tmp2 = load <4 x i16>* %B
    232 	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
    233 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
    234 	ret <4 x i32> %tmp4
    235 }
    236 
    237 define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
    238 ;CHECK-LABEL: vsubws32:
    239 ;CHECK: vsubw.s32
    240 	%tmp1 = load <2 x i64>* %A
    241 	%tmp2 = load <2 x i32>* %B
    242 	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
    243 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
    244 	ret <2 x i64> %tmp4
    245 }
    246 
    247 define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
    248 ;CHECK-LABEL: vsubwu8:
    249 ;CHECK: vsubw.u8
    250 	%tmp1 = load <8 x i16>* %A
    251 	%tmp2 = load <8 x i8>* %B
    252 	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
    253 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
    254 	ret <8 x i16> %tmp4
    255 }
    256 
    257 define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
    258 ;CHECK-LABEL: vsubwu16:
    259 ;CHECK: vsubw.u16
    260 	%tmp1 = load <4 x i32>* %A
    261 	%tmp2 = load <4 x i16>* %B
    262 	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
    263 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
    264 	ret <4 x i32> %tmp4
    265 }
    266 
    267 define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
    268 ;CHECK-LABEL: vsubwu32:
    269 ;CHECK: vsubw.u32
    270 	%tmp1 = load <2 x i64>* %A
    271 	%tmp2 = load <2 x i32>* %B
    272 	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
    273 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
    274 	ret <2 x i64> %tmp4
    275 }
    276