Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vmins8:
      5 ;CHECK: vmin.s8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>* %B
      8 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK: vmins16:
     14 ;CHECK: vmin.s16
     15 	%tmp1 = load <4 x i16>* %A
     16 	%tmp2 = load <4 x i16>* %B
     17 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     18 	ret <4 x i16> %tmp3
     19 }
     20 
     21 define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK: vmins32:
     23 ;CHECK: vmin.s32
     24 	%tmp1 = load <2 x i32>* %A
     25 	%tmp2 = load <2 x i32>* %B
     26 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     27 	ret <2 x i32> %tmp3
     28 }
     29 
     30 define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     31 ;CHECK: vminu8:
     32 ;CHECK: vmin.u8
     33 	%tmp1 = load <8 x i8>* %A
     34 	%tmp2 = load <8 x i8>* %B
     35 	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     36 	ret <8 x i8> %tmp3
     37 }
     38 
     39 define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     40 ;CHECK: vminu16:
     41 ;CHECK: vmin.u16
     42 	%tmp1 = load <4 x i16>* %A
     43 	%tmp2 = load <4 x i16>* %B
     44 	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     45 	ret <4 x i16> %tmp3
     46 }
     47 
     48 define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     49 ;CHECK: vminu32:
     50 ;CHECK: vmin.u32
     51 	%tmp1 = load <2 x i32>* %A
     52 	%tmp2 = load <2 x i32>* %B
     53 	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     54 	ret <2 x i32> %tmp3
     55 }
     56 
     57 define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
     58 ;CHECK: vminf32:
     59 ;CHECK: vmin.f32
     60 	%tmp1 = load <2 x float>* %A
     61 	%tmp2 = load <2 x float>* %B
     62 	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
     63 	ret <2 x float> %tmp3
     64 }
     65 
     66 define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     67 ;CHECK: vminQs8:
     68 ;CHECK: vmin.s8
     69 	%tmp1 = load <16 x i8>* %A
     70 	%tmp2 = load <16 x i8>* %B
     71 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     72 	ret <16 x i8> %tmp3
     73 }
     74 
     75 define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     76 ;CHECK: vminQs16:
     77 ;CHECK: vmin.s16
     78 	%tmp1 = load <8 x i16>* %A
     79 	%tmp2 = load <8 x i16>* %B
     80 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     81 	ret <8 x i16> %tmp3
     82 }
     83 
     84 define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     85 ;CHECK: vminQs32:
     86 ;CHECK: vmin.s32
     87 	%tmp1 = load <4 x i32>* %A
     88 	%tmp2 = load <4 x i32>* %B
     89 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
     90 	ret <4 x i32> %tmp3
     91 }
     92 
     93 define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     94 ;CHECK: vminQu8:
     95 ;CHECK: vmin.u8
     96 	%tmp1 = load <16 x i8>* %A
     97 	%tmp2 = load <16 x i8>* %B
     98 	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     99 	ret <16 x i8> %tmp3
    100 }
    101 
    102 define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    103 ;CHECK: vminQu16:
    104 ;CHECK: vmin.u16
    105 	%tmp1 = load <8 x i16>* %A
    106 	%tmp2 = load <8 x i16>* %B
    107 	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    108 	ret <8 x i16> %tmp3
    109 }
    110 
    111 define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    112 ;CHECK: vminQu32:
    113 ;CHECK: vmin.u32
    114 	%tmp1 = load <4 x i32>* %A
    115 	%tmp2 = load <4 x i32>* %B
    116 	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    117 	ret <4 x i32> %tmp3
    118 }
    119 
    120 define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
    121 ;CHECK: vminQf32:
    122 ;CHECK: vmin.f32
    123 	%tmp1 = load <4 x float>* %A
    124 	%tmp2 = load <4 x float>* %B
    125 	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    126 	ret <4 x float> %tmp3
    127 }
    128 
    129 declare <8 x i8>  @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    130 declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    131 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    132 
    133 declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    134 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    135 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    136 
    137 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
    138 
    139 declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    140 declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    141 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    142 
    143 declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    144 declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    145 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    146 
    147 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
    148 
    149 define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    150 ;CHECK: vmaxs8:
    151 ;CHECK: vmax.s8
    152 	%tmp1 = load <8 x i8>* %A
    153 	%tmp2 = load <8 x i8>* %B
    154 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    155 	ret <8 x i8> %tmp3
    156 }
    157 
    158 define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    159 ;CHECK: vmaxs16:
    160 ;CHECK: vmax.s16
    161 	%tmp1 = load <4 x i16>* %A
    162 	%tmp2 = load <4 x i16>* %B
    163 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    164 	ret <4 x i16> %tmp3
    165 }
    166 
    167 define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    168 ;CHECK: vmaxs32:
    169 ;CHECK: vmax.s32
    170 	%tmp1 = load <2 x i32>* %A
    171 	%tmp2 = load <2 x i32>* %B
    172 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    173 	ret <2 x i32> %tmp3
    174 }
    175 
    176 define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    177 ;CHECK: vmaxu8:
    178 ;CHECK: vmax.u8
    179 	%tmp1 = load <8 x i8>* %A
    180 	%tmp2 = load <8 x i8>* %B
    181 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    182 	ret <8 x i8> %tmp3
    183 }
    184 
    185 define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    186 ;CHECK: vmaxu16:
    187 ;CHECK: vmax.u16
    188 	%tmp1 = load <4 x i16>* %A
    189 	%tmp2 = load <4 x i16>* %B
    190 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    191 	ret <4 x i16> %tmp3
    192 }
    193 
    194 define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    195 ;CHECK: vmaxu32:
    196 ;CHECK: vmax.u32
    197 	%tmp1 = load <2 x i32>* %A
    198 	%tmp2 = load <2 x i32>* %B
    199 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    200 	ret <2 x i32> %tmp3
    201 }
    202 
    203 define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
    204 ;CHECK: vmaxf32:
    205 ;CHECK: vmax.f32
    206 	%tmp1 = load <2 x float>* %A
    207 	%tmp2 = load <2 x float>* %B
    208 	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    209 	ret <2 x float> %tmp3
    210 }
    211 
    212 define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    213 ;CHECK: vmaxQs8:
    214 ;CHECK: vmax.s8
    215 	%tmp1 = load <16 x i8>* %A
    216 	%tmp2 = load <16 x i8>* %B
    217 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    218 	ret <16 x i8> %tmp3
    219 }
    220 
    221 define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    222 ;CHECK: vmaxQs16:
    223 ;CHECK: vmax.s16
    224 	%tmp1 = load <8 x i16>* %A
    225 	%tmp2 = load <8 x i16>* %B
    226 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    227 	ret <8 x i16> %tmp3
    228 }
    229 
    230 define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    231 ;CHECK: vmaxQs32:
    232 ;CHECK: vmax.s32
    233 	%tmp1 = load <4 x i32>* %A
    234 	%tmp2 = load <4 x i32>* %B
    235 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    236 	ret <4 x i32> %tmp3
    237 }
    238 
    239 define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    240 ;CHECK: vmaxQu8:
    241 ;CHECK: vmax.u8
    242 	%tmp1 = load <16 x i8>* %A
    243 	%tmp2 = load <16 x i8>* %B
    244 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    245 	ret <16 x i8> %tmp3
    246 }
    247 
    248 define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    249 ;CHECK: vmaxQu16:
    250 ;CHECK: vmax.u16
    251 	%tmp1 = load <8 x i16>* %A
    252 	%tmp2 = load <8 x i16>* %B
    253 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    254 	ret <8 x i16> %tmp3
    255 }
    256 
    257 define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    258 ;CHECK: vmaxQu32:
    259 ;CHECK: vmax.u32
    260 	%tmp1 = load <4 x i32>* %A
    261 	%tmp2 = load <4 x i32>* %B
    262 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    263 	ret <4 x i32> %tmp3
    264 }
    265 
    266 define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
    267 ;CHECK: vmaxQf32:
    268 ;CHECK: vmax.f32
    269 	%tmp1 = load <4 x float>* %A
    270 	%tmp2 = load <4 x float>* %B
    271 	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    272 	ret <4 x float> %tmp3
    273 }
    274 
    275 declare <8 x i8>  @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    276 declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    277 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    278 
    279 declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    280 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    281 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    282 
    283 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
    284 
    285 declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    286 declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    287 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    288 
    289 declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    290 declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    291 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    292 
    293 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
    294