Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
      2 
      3 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK-LABEL: smax_8b:
      5 ;CHECK: smax.8b
      6 	%tmp1 = load <8 x i8>, <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>, <8 x i8>* %B
      8 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     13 ;CHECK-LABEL: smax_16b:
     14 ;CHECK: smax.16b
     15 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     16 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     17 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     18 	ret <16 x i8> %tmp3
     19 }
     20 
     21 define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     22 ;CHECK-LABEL: smax_4h:
     23 ;CHECK: smax.4h
     24 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     25 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     26 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     27 	ret <4 x i16> %tmp3
     28 }
     29 
     30 define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     31 ;CHECK-LABEL: smax_8h:
     32 ;CHECK: smax.8h
     33 	%tmp1 = load <8 x i16>, <8 x i16>* %A
     34 	%tmp2 = load <8 x i16>, <8 x i16>* %B
     35 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     36 	ret <8 x i16> %tmp3
     37 }
     38 
     39 define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     40 ;CHECK-LABEL: smax_2s:
     41 ;CHECK: smax.2s
     42 	%tmp1 = load <2 x i32>, <2 x i32>* %A
     43 	%tmp2 = load <2 x i32>, <2 x i32>* %B
     44 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     45 	ret <2 x i32> %tmp3
     46 }
     47 
     48 define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     49 ;CHECK-LABEL: smax_4s:
     50 ;CHECK: smax.4s
     51 	%tmp1 = load <4 x i32>, <4 x i32>* %A
     52 	%tmp2 = load <4 x i32>, <4 x i32>* %B
     53 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
     54 	ret <4 x i32> %tmp3
     55 }
     56 
     57 declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
     58 declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
     59 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
     60 declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
     61 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
     62 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
     63 
     64 define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     65 ;CHECK-LABEL: umax_8b:
     66 ;CHECK: umax.8b
     67 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     68 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     69 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     70 	ret <8 x i8> %tmp3
     71 }
     72 
     73 define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     74 ;CHECK-LABEL: umax_16b:
     75 ;CHECK: umax.16b
     76 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     77 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     78 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     79 	ret <16 x i8> %tmp3
     80 }
     81 
     82 define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     83 ;CHECK-LABEL: umax_4h:
     84 ;CHECK: umax.4h
     85 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     86 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     87 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     88 	ret <4 x i16> %tmp3
     89 }
     90 
     91 define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     92 ;CHECK-LABEL: umax_8h:
     93 ;CHECK: umax.8h
     94 	%tmp1 = load <8 x i16>, <8 x i16>* %A
     95 	%tmp2 = load <8 x i16>, <8 x i16>* %B
     96 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     97 	ret <8 x i16> %tmp3
     98 }
     99 
    100 define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    101 ;CHECK-LABEL: umax_2s:
    102 ;CHECK: umax.2s
    103 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    104 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    105 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    106 	ret <2 x i32> %tmp3
    107 }
    108 
    109 define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    110 ;CHECK-LABEL: umax_4s:
    111 ;CHECK: umax.4s
    112 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    113 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    114 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    115 	ret <4 x i32> %tmp3
    116 }
    117 
    118 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    119 declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    120 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    121 declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    122 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    123 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    124 
    125 define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    126 ;CHECK-LABEL: smin_8b:
    127 ;CHECK: smin.8b
    128 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    129 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    130 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    131 	ret <8 x i8> %tmp3
    132 }
    133 
    134 define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    135 ;CHECK-LABEL: smin_16b:
    136 ;CHECK: smin.16b
    137 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    138 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    139 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    140 	ret <16 x i8> %tmp3
    141 }
    142 
    143 define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    144 ;CHECK-LABEL: smin_4h:
    145 ;CHECK: smin.4h
    146 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    147 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    148 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    149 	ret <4 x i16> %tmp3
    150 }
    151 
    152 define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    153 ;CHECK-LABEL: smin_8h:
    154 ;CHECK: smin.8h
    155 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    156 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    157 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    158 	ret <8 x i16> %tmp3
    159 }
    160 
    161 define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    162 ;CHECK-LABEL: smin_2s:
    163 ;CHECK: smin.2s
    164 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    165 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    166 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    167 	ret <2 x i32> %tmp3
    168 }
    169 
    170 define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    171 ;CHECK-LABEL: smin_4s:
    172 ;CHECK: smin.4s
    173 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    174 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    175 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    176 	ret <4 x i32> %tmp3
    177 }
    178 
    179 declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    180 declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    181 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    182 declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    183 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    184 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    185 
    186 define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    187 ;CHECK-LABEL: umin_8b:
    188 ;CHECK: umin.8b
    189 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    190 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    191 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    192 	ret <8 x i8> %tmp3
    193 }
    194 
    195 define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    196 ;CHECK-LABEL: umin_16b:
    197 ;CHECK: umin.16b
    198 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    199 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    200 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    201 	ret <16 x i8> %tmp3
    202 }
    203 
    204 define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    205 ;CHECK-LABEL: umin_4h:
    206 ;CHECK: umin.4h
    207 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    208 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    209 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    210 	ret <4 x i16> %tmp3
    211 }
    212 
    213 define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    214 ;CHECK-LABEL: umin_8h:
    215 ;CHECK: umin.8h
    216 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    217 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    218 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    219 	ret <8 x i16> %tmp3
    220 }
    221 
    222 define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    223 ;CHECK-LABEL: umin_2s:
    224 ;CHECK: umin.2s
    225 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    226 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    227 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    228 	ret <2 x i32> %tmp3
    229 }
    230 
    231 define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    232 ;CHECK-LABEL: umin_4s:
    233 ;CHECK: umin.4s
    234 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    235 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    236 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    237 	ret <4 x i32> %tmp3
    238 }
    239 
    240 declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    241 declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    242 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    243 declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    244 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    245 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    246 
    247 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
    248 
    249 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    250 ;CHECK-LABEL: smaxp_8b:
    251 ;CHECK: smaxp.8b
    252 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    253 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    254 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    255 	ret <8 x i8> %tmp3
    256 }
    257 
    258 define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    259 ;CHECK-LABEL: smaxp_16b:
    260 ;CHECK: smaxp.16b
    261 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    262 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    263 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    264 	ret <16 x i8> %tmp3
    265 }
    266 
    267 define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    268 ;CHECK-LABEL: smaxp_4h:
    269 ;CHECK: smaxp.4h
    270 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    271 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    272 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    273 	ret <4 x i16> %tmp3
    274 }
    275 
    276 define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    277 ;CHECK-LABEL: smaxp_8h:
    278 ;CHECK: smaxp.8h
    279 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    280 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    281 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    282 	ret <8 x i16> %tmp3
    283 }
    284 
    285 define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    286 ;CHECK-LABEL: smaxp_2s:
    287 ;CHECK: smaxp.2s
    288 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    289 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    290 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    291 	ret <2 x i32> %tmp3
    292 }
    293 
    294 define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    295 ;CHECK-LABEL: smaxp_4s:
    296 ;CHECK: smaxp.4s
    297 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    298 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    299 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    300 	ret <4 x i32> %tmp3
    301 }
    302 
    303 declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    304 declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    305 declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    306 declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    307 declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    308 declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    309 
    310 define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    311 ;CHECK-LABEL: umaxp_8b:
    312 ;CHECK: umaxp.8b
    313 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    314 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    315 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    316 	ret <8 x i8> %tmp3
    317 }
    318 
    319 define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    320 ;CHECK-LABEL: umaxp_16b:
    321 ;CHECK: umaxp.16b
    322 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    323 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    324 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    325 	ret <16 x i8> %tmp3
    326 }
    327 
    328 define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    329 ;CHECK-LABEL: umaxp_4h:
    330 ;CHECK: umaxp.4h
    331 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    332 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    333 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    334 	ret <4 x i16> %tmp3
    335 }
    336 
    337 define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    338 ;CHECK-LABEL: umaxp_8h:
    339 ;CHECK: umaxp.8h
    340 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    341 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    342 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    343 	ret <8 x i16> %tmp3
    344 }
    345 
    346 define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    347 ;CHECK-LABEL: umaxp_2s:
    348 ;CHECK: umaxp.2s
    349 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    350 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    351 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    352 	ret <2 x i32> %tmp3
    353 }
    354 
    355 define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    356 ;CHECK-LABEL: umaxp_4s:
    357 ;CHECK: umaxp.4s
    358 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    359 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    360 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    361 	ret <4 x i32> %tmp3
    362 }
    363 
    364 declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    365 declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    366 declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    367 declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    368 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    369 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    370 
    371 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
    372 
    373 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    374 ;CHECK-LABEL: sminp_8b:
    375 ;CHECK: sminp.8b
    376 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    377 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    378 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    379 	ret <8 x i8> %tmp3
    380 }
    381 
    382 define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    383 ;CHECK-LABEL: sminp_16b:
    384 ;CHECK: sminp.16b
    385 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    386 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    387 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    388 	ret <16 x i8> %tmp3
    389 }
    390 
    391 define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    392 ;CHECK-LABEL: sminp_4h:
    393 ;CHECK: sminp.4h
    394 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    395 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    396 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    397 	ret <4 x i16> %tmp3
    398 }
    399 
    400 define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    401 ;CHECK-LABEL: sminp_8h:
    402 ;CHECK: sminp.8h
    403 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    404 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    405 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    406 	ret <8 x i16> %tmp3
    407 }
    408 
    409 define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    410 ;CHECK-LABEL: sminp_2s:
    411 ;CHECK: sminp.2s
    412 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    413 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    414 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    415 	ret <2 x i32> %tmp3
    416 }
    417 
    418 define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    419 ;CHECK-LABEL: sminp_4s:
    420 ;CHECK: sminp.4s
    421 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    422 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    423 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    424 	ret <4 x i32> %tmp3
    425 }
    426 
    427 declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    428 declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    429 declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    430 declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    431 declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    432 declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    433 
    434 define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    435 ;CHECK-LABEL: uminp_8b:
    436 ;CHECK: uminp.8b
    437 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    438 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    439 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    440 	ret <8 x i8> %tmp3
    441 }
    442 
    443 define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    444 ;CHECK-LABEL: uminp_16b:
    445 ;CHECK: uminp.16b
    446 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    447 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    448 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    449 	ret <16 x i8> %tmp3
    450 }
    451 
    452 define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    453 ;CHECK-LABEL: uminp_4h:
    454 ;CHECK: uminp.4h
    455 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    456 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    457 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    458 	ret <4 x i16> %tmp3
    459 }
    460 
    461 define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    462 ;CHECK-LABEL: uminp_8h:
    463 ;CHECK: uminp.8h
    464 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    465 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    466 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    467 	ret <8 x i16> %tmp3
    468 }
    469 
    470 define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    471 ;CHECK-LABEL: uminp_2s:
    472 ;CHECK: uminp.2s
    473 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    474 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    475 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    476 	ret <2 x i32> %tmp3
    477 }
    478 
    479 define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    480 ;CHECK-LABEL: uminp_4s:
    481 ;CHECK: uminp.4s
    482 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    483 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    484 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    485 	ret <4 x i32> %tmp3
    486 }
    487 
    488 declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    489 declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    490 declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    491 declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    492 declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    493 declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    494 
    495 define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
    496 ;CHECK-LABEL: fmax_2s:
    497 ;CHECK: fmax.2s
    498 	%tmp1 = load <2 x float>, <2 x float>* %A
    499 	%tmp2 = load <2 x float>, <2 x float>* %B
    500 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    501 	ret <2 x float> %tmp3
    502 }
    503 
    504 define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
    505 ;CHECK-LABEL: fmax_4s:
    506 ;CHECK: fmax.4s
    507 	%tmp1 = load <4 x float>, <4 x float>* %A
    508 	%tmp2 = load <4 x float>, <4 x float>* %B
    509 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    510 	ret <4 x float> %tmp3
    511 }
    512 
    513 define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
    514 ;CHECK-LABEL: fmax_2d:
    515 ;CHECK: fmax.2d
    516 	%tmp1 = load <2 x double>, <2 x double>* %A
    517 	%tmp2 = load <2 x double>, <2 x double>* %B
    518 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
    519 	ret <2 x double> %tmp3
    520 }
    521 
    522 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
    523 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
    524 declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
    525 
    526 define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
    527 ;CHECK-LABEL: fmaxp_2s:
    528 ;CHECK: fmaxp.2s
    529 	%tmp1 = load <2 x float>, <2 x float>* %A
    530 	%tmp2 = load <2 x float>, <2 x float>* %B
    531 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    532 	ret <2 x float> %tmp3
    533 }
    534 
    535 define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
    536 ;CHECK-LABEL: fmaxp_4s:
    537 ;CHECK: fmaxp.4s
    538 	%tmp1 = load <4 x float>, <4 x float>* %A
    539 	%tmp2 = load <4 x float>, <4 x float>* %B
    540 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    541 	ret <4 x float> %tmp3
    542 }
    543 
    544 define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
    545 ;CHECK-LABEL: fmaxp_2d:
    546 ;CHECK: fmaxp.2d
    547 	%tmp1 = load <2 x double>, <2 x double>* %A
    548 	%tmp2 = load <2 x double>, <2 x double>* %B
    549 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
    550 	ret <2 x double> %tmp3
    551 }
    552 
    553 declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
    554 declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
    555 declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
    556 
    557 define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
    558 ;CHECK-LABEL: fmin_2s:
    559 ;CHECK: fmin.2s
    560 	%tmp1 = load <2 x float>, <2 x float>* %A
    561 	%tmp2 = load <2 x float>, <2 x float>* %B
    562 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    563 	ret <2 x float> %tmp3
    564 }
    565 
    566 define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
    567 ;CHECK-LABEL: fmin_4s:
    568 ;CHECK: fmin.4s
    569 	%tmp1 = load <4 x float>, <4 x float>* %A
    570 	%tmp2 = load <4 x float>, <4 x float>* %B
    571 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    572 	ret <4 x float> %tmp3
    573 }
    574 
    575 define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
    576 ;CHECK-LABEL: fmin_2d:
    577 ;CHECK: fmin.2d
    578 	%tmp1 = load <2 x double>, <2 x double>* %A
    579 	%tmp2 = load <2 x double>, <2 x double>* %B
    580 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
    581 	ret <2 x double> %tmp3
    582 }
    583 
    584 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
    585 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
    586 declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
    587 
    588 define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
    589 ;CHECK-LABEL: fminp_2s:
    590 ;CHECK: fminp.2s
    591 	%tmp1 = load <2 x float>, <2 x float>* %A
    592 	%tmp2 = load <2 x float>, <2 x float>* %B
    593 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    594 	ret <2 x float> %tmp3
    595 }
    596 
    597 define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
    598 ;CHECK-LABEL: fminp_4s:
    599 ;CHECK: fminp.4s
    600 	%tmp1 = load <4 x float>, <4 x float>* %A
    601 	%tmp2 = load <4 x float>, <4 x float>* %B
    602 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    603 	ret <4 x float> %tmp3
    604 }
    605 
    606 define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
    607 ;CHECK-LABEL: fminp_2d:
    608 ;CHECK: fminp.2d
    609 	%tmp1 = load <2 x double>, <2 x double>* %A
    610 	%tmp2 = load <2 x double>, <2 x double>* %B
    611 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
    612 	ret <2 x double> %tmp3
    613 }
    614 
    615 declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
    616 declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
    617 declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
    618 
    619 define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
    620 ;CHECK-LABEL: fminnmp_2s:
    621 ;CHECK: fminnmp.2s
    622 	%tmp1 = load <2 x float>, <2 x float>* %A
    623 	%tmp2 = load <2 x float>, <2 x float>* %B
    624 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    625 	ret <2 x float> %tmp3
    626 }
    627 
    628 define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
    629 ;CHECK-LABEL: fminnmp_4s:
    630 ;CHECK: fminnmp.4s
    631 	%tmp1 = load <4 x float>, <4 x float>* %A
    632 	%tmp2 = load <4 x float>, <4 x float>* %B
    633 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    634 	ret <4 x float> %tmp3
    635 }
    636 
    637 define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
    638 ;CHECK-LABEL: fminnmp_2d:
    639 ;CHECK: fminnmp.2d
    640 	%tmp1 = load <2 x double>, <2 x double>* %A
    641 	%tmp2 = load <2 x double>, <2 x double>* %B
    642 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
    643 	ret <2 x double> %tmp3
    644 }
    645 
    646 declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
    647 declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
    648 declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
    649 
    650 define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
    651 ;CHECK-LABEL: fmaxnmp_2s:
    652 ;CHECK: fmaxnmp.2s
    653 	%tmp1 = load <2 x float>, <2 x float>* %A
    654 	%tmp2 = load <2 x float>, <2 x float>* %B
    655 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
    656 	ret <2 x float> %tmp3
    657 }
    658 
    659 define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
    660 ;CHECK-LABEL: fmaxnmp_4s:
    661 ;CHECK: fmaxnmp.4s
    662 	%tmp1 = load <4 x float>, <4 x float>* %A
    663 	%tmp2 = load <4 x float>, <4 x float>* %B
    664 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
    665 	ret <4 x float> %tmp3
    666 }
    667 
    668 define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
    669 ;CHECK-LABEL: fmaxnmp_2d:
    670 ;CHECK: fmaxnmp.2d
    671 	%tmp1 = load <2 x double>, <2 x double>* %A
    672 	%tmp2 = load <2 x double>, <2 x double>* %B
    673 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
    674 	ret <2 x double> %tmp3
    675 }
    676 
    677 declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
    678 declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
    679 declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
    680