1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 2 3 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4 ;CHECK-LABEL: smax_8b: 5 ;CHECK: smax.8b 6 %tmp1 = load <8 x i8>, <8 x i8>* %A 7 %tmp2 = load <8 x i8>, <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10 } 11 12 define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 13 ;CHECK-LABEL: smax_16b: 14 ;CHECK: smax.16b 15 %tmp1 = load <16 x i8>, <16 x i8>* %A 16 %tmp2 = load <16 x i8>, <16 x i8>* %B 17 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 18 ret <16 x i8> %tmp3 19 } 20 21 define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 22 ;CHECK-LABEL: smax_4h: 23 ;CHECK: smax.4h 24 %tmp1 = load <4 x i16>, <4 x i16>* %A 25 %tmp2 = load <4 x i16>, <4 x i16>* %B 26 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 27 ret <4 x i16> %tmp3 28 } 29 30 define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 31 ;CHECK-LABEL: smax_8h: 32 ;CHECK: smax.8h 33 %tmp1 = load <8 x i16>, <8 x i16>* %A 34 %tmp2 = load <8 x i16>, <8 x i16>* %B 35 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 36 ret <8 x i16> %tmp3 37 } 38 39 define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 40 ;CHECK-LABEL: smax_2s: 41 ;CHECK: smax.2s 42 %tmp1 = load <2 x i32>, <2 x i32>* %A 43 %tmp2 = load <2 x i32>, <2 x i32>* %B 44 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 45 ret <2 x i32> %tmp3 46 } 47 48 define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 49 ;CHECK-LABEL: smax_4s: 50 ;CHECK: smax.4s 51 %tmp1 = load <4 x i32>, <4 x i32>* %A 52 %tmp2 = load <4 x i32>, <4 x i32>* %B 53 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 54 ret <4 x i32> %tmp3 55 } 56 57 declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 58 declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 59 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 60 declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 61 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 62 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 63 64 define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 65 ;CHECK-LABEL: umax_8b: 66 ;CHECK: umax.8b 67 %tmp1 = load <8 x i8>, <8 x i8>* %A 68 %tmp2 = load <8 x i8>, <8 x i8>* %B 69 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 70 ret <8 x i8> %tmp3 71 } 72 73 define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 74 ;CHECK-LABEL: umax_16b: 75 ;CHECK: umax.16b 76 %tmp1 = load <16 x i8>, <16 x i8>* %A 77 %tmp2 = load <16 x i8>, <16 x i8>* %B 78 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 79 ret <16 x i8> %tmp3 80 } 81 82 define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 83 ;CHECK-LABEL: umax_4h: 84 ;CHECK: umax.4h 85 %tmp1 = load <4 x i16>, <4 x i16>* %A 86 %tmp2 = load <4 x i16>, <4 x i16>* %B 87 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 88 ret <4 x i16> %tmp3 89 } 90 91 define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 92 ;CHECK-LABEL: umax_8h: 93 ;CHECK: umax.8h 94 %tmp1 = load <8 x i16>, <8 x i16>* %A 95 %tmp2 = load <8 x i16>, <8 x i16>* %B 96 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 97 ret <8 x i16> %tmp3 98 } 99 100 define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 101 ;CHECK-LABEL: umax_2s: 102 ;CHECK: umax.2s 103 %tmp1 = load <2 x i32>, <2 x i32>* %A 104 %tmp2 = load <2 x i32>, <2 x i32>* %B 105 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 106 ret <2 x i32> %tmp3 107 } 108 109 define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 110 ;CHECK-LABEL: umax_4s: 111 ;CHECK: umax.4s 112 %tmp1 = load <4 x i32>, <4 x i32>* %A 113 %tmp2 = load <4 x i32>, <4 x i32>* %B 114 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 115 ret <4 x i32> %tmp3 116 } 117 118 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 119 declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 120 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 121 declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 122 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 123 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 124 125 define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 126 ;CHECK-LABEL: smin_8b: 127 ;CHECK: smin.8b 128 %tmp1 = load <8 x i8>, <8 x i8>* %A 129 %tmp2 = load <8 x i8>, <8 x i8>* %B 130 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 131 ret <8 x i8> %tmp3 132 } 133 134 define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 135 ;CHECK-LABEL: smin_16b: 136 ;CHECK: smin.16b 137 %tmp1 = load <16 x i8>, <16 x i8>* %A 138 %tmp2 = load <16 x i8>, <16 x i8>* %B 139 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 140 ret <16 x i8> %tmp3 141 } 142 143 define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 144 ;CHECK-LABEL: smin_4h: 145 ;CHECK: smin.4h 146 %tmp1 = load <4 x i16>, <4 x i16>* %A 147 %tmp2 = load <4 x i16>, <4 x i16>* %B 148 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 149 ret <4 x i16> %tmp3 150 } 151 152 define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 153 ;CHECK-LABEL: smin_8h: 154 ;CHECK: smin.8h 155 %tmp1 = load <8 x i16>, <8 x i16>* %A 156 %tmp2 = load <8 x i16>, <8 x i16>* %B 157 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 158 ret <8 x i16> %tmp3 159 } 160 161 define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 162 ;CHECK-LABEL: smin_2s: 163 ;CHECK: smin.2s 164 %tmp1 = load <2 x i32>, <2 x i32>* %A 165 %tmp2 = load <2 x i32>, <2 x i32>* %B 166 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 167 ret <2 x i32> %tmp3 168 } 169 170 define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 171 ;CHECK-LABEL: smin_4s: 172 ;CHECK: smin.4s 173 %tmp1 = load <4 x i32>, <4 x i32>* %A 174 %tmp2 = load <4 x i32>, <4 x i32>* %B 175 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 176 ret <4 x i32> %tmp3 177 } 178 179 declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 180 declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 181 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 182 declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 183 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 184 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 185 186 define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 187 ;CHECK-LABEL: umin_8b: 188 ;CHECK: umin.8b 189 %tmp1 = load <8 x i8>, <8 x i8>* %A 190 %tmp2 = load <8 x i8>, <8 x i8>* %B 191 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 192 ret <8 x i8> %tmp3 193 } 194 195 define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 196 ;CHECK-LABEL: umin_16b: 197 ;CHECK: umin.16b 198 %tmp1 = load <16 x i8>, <16 x i8>* %A 199 %tmp2 = load <16 x i8>, <16 x i8>* %B 200 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 201 ret <16 x i8> %tmp3 202 } 203 204 define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 205 ;CHECK-LABEL: umin_4h: 206 ;CHECK: umin.4h 207 %tmp1 = load <4 x i16>, <4 x i16>* %A 208 %tmp2 = load <4 x i16>, <4 x i16>* %B 209 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 210 ret <4 x i16> %tmp3 211 } 212 213 define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 214 ;CHECK-LABEL: umin_8h: 215 ;CHECK: umin.8h 216 %tmp1 = load <8 x i16>, <8 x i16>* %A 217 %tmp2 = load <8 x i16>, <8 x i16>* %B 218 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 219 ret <8 x i16> %tmp3 220 } 221 222 define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 223 ;CHECK-LABEL: umin_2s: 224 ;CHECK: umin.2s 225 %tmp1 = load <2 x i32>, <2 x i32>* %A 226 %tmp2 = load <2 x i32>, <2 x i32>* %B 227 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 228 ret <2 x i32> %tmp3 229 } 230 231 define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 232 ;CHECK-LABEL: umin_4s: 233 ;CHECK: umin.4s 234 %tmp1 = load <4 x i32>, <4 x i32>* %A 235 %tmp2 = load <4 x i32>, <4 x i32>* %B 236 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 237 ret <4 x i32> %tmp3 238 } 239 240 declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 241 declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 242 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 243 declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 244 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 245 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 246 247 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 248 249 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 250 ;CHECK-LABEL: smaxp_8b: 251 ;CHECK: smaxp.8b 252 %tmp1 = load <8 x i8>, <8 x i8>* %A 253 %tmp2 = load <8 x i8>, <8 x i8>* %B 254 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 255 ret <8 x i8> %tmp3 256 } 257 258 define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 259 ;CHECK-LABEL: smaxp_16b: 260 ;CHECK: smaxp.16b 261 %tmp1 = load <16 x i8>, <16 x i8>* %A 262 %tmp2 = load <16 x i8>, <16 x i8>* %B 263 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 264 ret <16 x i8> %tmp3 265 } 266 267 define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 268 ;CHECK-LABEL: smaxp_4h: 269 ;CHECK: smaxp.4h 270 %tmp1 = load <4 x i16>, <4 x i16>* %A 271 %tmp2 = load <4 x i16>, <4 x i16>* %B 272 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 273 ret <4 x i16> %tmp3 274 } 275 276 define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 277 ;CHECK-LABEL: smaxp_8h: 278 ;CHECK: smaxp.8h 279 %tmp1 = load <8 x i16>, <8 x i16>* %A 280 %tmp2 = load <8 x i16>, <8 x i16>* %B 281 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 282 ret <8 x i16> %tmp3 283 } 284 285 define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 286 ;CHECK-LABEL: smaxp_2s: 287 ;CHECK: smaxp.2s 288 %tmp1 = load <2 x i32>, <2 x i32>* %A 289 %tmp2 = load <2 x i32>, <2 x i32>* %B 290 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 291 ret <2 x i32> %tmp3 292 } 293 294 define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 295 ;CHECK-LABEL: smaxp_4s: 296 ;CHECK: smaxp.4s 297 %tmp1 = load <4 x i32>, <4 x i32>* %A 298 %tmp2 = load <4 x i32>, <4 x i32>* %B 299 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 300 ret <4 x i32> %tmp3 301 } 302 303 declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 304 declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 305 declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 306 declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 307 declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 308 declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 309 310 define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 311 ;CHECK-LABEL: umaxp_8b: 312 ;CHECK: umaxp.8b 313 %tmp1 = load <8 x i8>, <8 x i8>* %A 314 %tmp2 = load <8 x i8>, <8 x i8>* %B 315 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 316 ret <8 x i8> %tmp3 317 } 318 319 define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 320 ;CHECK-LABEL: umaxp_16b: 321 ;CHECK: umaxp.16b 322 %tmp1 = load <16 x i8>, <16 x i8>* %A 323 %tmp2 = load <16 x i8>, <16 x i8>* %B 324 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 325 ret <16 x i8> %tmp3 326 } 327 328 define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 329 ;CHECK-LABEL: umaxp_4h: 330 ;CHECK: umaxp.4h 331 %tmp1 = load <4 x i16>, <4 x i16>* %A 332 %tmp2 = load <4 x i16>, <4 x i16>* %B 333 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 334 ret <4 x i16> %tmp3 335 } 336 337 define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 338 ;CHECK-LABEL: umaxp_8h: 339 ;CHECK: umaxp.8h 340 %tmp1 = load <8 x i16>, <8 x i16>* %A 341 %tmp2 = load <8 x i16>, <8 x i16>* %B 342 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 343 ret <8 x i16> %tmp3 344 } 345 346 define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 347 ;CHECK-LABEL: umaxp_2s: 348 ;CHECK: umaxp.2s 349 %tmp1 = load <2 x i32>, <2 x i32>* %A 350 %tmp2 = load <2 x i32>, <2 x i32>* %B 351 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 352 ret <2 x i32> %tmp3 353 } 354 355 define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 356 ;CHECK-LABEL: umaxp_4s: 357 ;CHECK: umaxp.4s 358 %tmp1 = load <4 x i32>, <4 x i32>* %A 359 %tmp2 = load <4 x i32>, <4 x i32>* %B 360 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 361 ret <4 x i32> %tmp3 362 } 363 364 declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 365 declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 366 declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 367 declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 368 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 369 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 370 371 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 372 373 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 374 ;CHECK-LABEL: sminp_8b: 375 ;CHECK: sminp.8b 376 %tmp1 = load <8 x i8>, <8 x i8>* %A 377 %tmp2 = load <8 x i8>, <8 x i8>* %B 378 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 379 ret <8 x i8> %tmp3 380 } 381 382 define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 383 ;CHECK-LABEL: sminp_16b: 384 ;CHECK: sminp.16b 385 %tmp1 = load <16 x i8>, <16 x i8>* %A 386 %tmp2 = load <16 x i8>, <16 x i8>* %B 387 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 388 ret <16 x i8> %tmp3 389 } 390 391 define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 392 ;CHECK-LABEL: sminp_4h: 393 ;CHECK: sminp.4h 394 %tmp1 = load <4 x i16>, <4 x i16>* %A 395 %tmp2 = load <4 x i16>, <4 x i16>* %B 396 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 397 ret <4 x i16> %tmp3 398 } 399 400 define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 401 ;CHECK-LABEL: sminp_8h: 402 ;CHECK: sminp.8h 403 %tmp1 = load <8 x i16>, <8 x i16>* %A 404 %tmp2 = load <8 x i16>, <8 x i16>* %B 405 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 406 ret <8 x i16> %tmp3 407 } 408 409 define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 410 ;CHECK-LABEL: sminp_2s: 411 ;CHECK: sminp.2s 412 %tmp1 = load <2 x i32>, <2 x i32>* %A 413 %tmp2 = load <2 x i32>, <2 x i32>* %B 414 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 415 ret <2 x i32> %tmp3 416 } 417 418 define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 419 ;CHECK-LABEL: sminp_4s: 420 ;CHECK: sminp.4s 421 %tmp1 = load <4 x i32>, <4 x i32>* %A 422 %tmp2 = load <4 x i32>, <4 x i32>* %B 423 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 424 ret <4 x i32> %tmp3 425 } 426 427 declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 428 declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 429 declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 430 declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 431 declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 432 declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 433 434 define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 435 ;CHECK-LABEL: uminp_8b: 436 ;CHECK: uminp.8b 437 %tmp1 = load <8 x i8>, <8 x i8>* %A 438 %tmp2 = load <8 x i8>, <8 x i8>* %B 439 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 440 ret <8 x i8> %tmp3 441 } 442 443 define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 444 ;CHECK-LABEL: uminp_16b: 445 ;CHECK: uminp.16b 446 %tmp1 = load <16 x i8>, <16 x i8>* %A 447 %tmp2 = load <16 x i8>, <16 x i8>* %B 448 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 449 ret <16 x i8> %tmp3 450 } 451 452 define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 453 ;CHECK-LABEL: uminp_4h: 454 ;CHECK: uminp.4h 455 %tmp1 = load <4 x i16>, <4 x i16>* %A 456 %tmp2 = load <4 x i16>, <4 x i16>* %B 457 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 458 ret <4 x i16> %tmp3 459 } 460 461 define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 462 ;CHECK-LABEL: uminp_8h: 463 ;CHECK: uminp.8h 464 %tmp1 = load <8 x i16>, <8 x i16>* %A 465 %tmp2 = load <8 x i16>, <8 x i16>* %B 466 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 467 ret <8 x i16> %tmp3 468 } 469 470 define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 471 ;CHECK-LABEL: uminp_2s: 472 ;CHECK: uminp.2s 473 %tmp1 = load <2 x i32>, <2 x i32>* %A 474 %tmp2 = load <2 x i32>, <2 x i32>* %B 475 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 476 ret <2 x i32> %tmp3 477 } 478 479 define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 480 ;CHECK-LABEL: uminp_4s: 481 ;CHECK: uminp.4s 482 %tmp1 = load <4 x i32>, <4 x i32>* %A 483 %tmp2 = load <4 x i32>, <4 x i32>* %B 484 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 485 ret <4 x i32> %tmp3 486 } 487 488 declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 489 declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 490 declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 491 declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 492 declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 493 declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 494 495 define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 496 ;CHECK-LABEL: fmax_2s: 497 ;CHECK: fmax.2s 498 %tmp1 = load <2 x float>, <2 x float>* %A 499 %tmp2 = load <2 x float>, <2 x float>* %B 500 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 501 ret <2 x float> %tmp3 502 } 503 504 define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 505 ;CHECK-LABEL: fmax_4s: 506 ;CHECK: fmax.4s 507 %tmp1 = load <4 x float>, <4 x float>* %A 508 %tmp2 = load <4 x float>, <4 x float>* %B 509 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 510 ret <4 x float> %tmp3 511 } 512 513 define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 514 ;CHECK-LABEL: fmax_2d: 515 ;CHECK: fmax.2d 516 %tmp1 = load <2 x double>, <2 x double>* %A 517 %tmp2 = load <2 x double>, <2 x double>* %B 518 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 519 ret <2 x double> %tmp3 520 } 521 522 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone 523 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone 524 declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone 525 526 define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 527 ;CHECK-LABEL: fmaxp_2s: 528 ;CHECK: fmaxp.2s 529 %tmp1 = load <2 x float>, <2 x float>* %A 530 %tmp2 = load <2 x float>, <2 x float>* %B 531 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 532 ret <2 x float> %tmp3 533 } 534 535 define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 536 ;CHECK-LABEL: fmaxp_4s: 537 ;CHECK: fmaxp.4s 538 %tmp1 = load <4 x float>, <4 x float>* %A 539 %tmp2 = load <4 x float>, <4 x float>* %B 540 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 541 ret <4 x float> %tmp3 542 } 543 544 define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 545 ;CHECK-LABEL: fmaxp_2d: 546 ;CHECK: fmaxp.2d 547 %tmp1 = load <2 x double>, <2 x double>* %A 548 %tmp2 = load <2 x double>, <2 x double>* %B 549 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 550 ret <2 x double> %tmp3 551 } 552 553 declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone 554 declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone 555 declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone 556 557 define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 558 ;CHECK-LABEL: fmin_2s: 559 ;CHECK: fmin.2s 560 %tmp1 = load <2 x float>, <2 x float>* %A 561 %tmp2 = load <2 x float>, <2 x float>* %B 562 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 563 ret <2 x float> %tmp3 564 } 565 566 define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 567 ;CHECK-LABEL: fmin_4s: 568 ;CHECK: fmin.4s 569 %tmp1 = load <4 x float>, <4 x float>* %A 570 %tmp2 = load <4 x float>, <4 x float>* %B 571 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 572 ret <4 x float> %tmp3 573 } 574 575 define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 576 ;CHECK-LABEL: fmin_2d: 577 ;CHECK: fmin.2d 578 %tmp1 = load <2 x double>, <2 x double>* %A 579 %tmp2 = load <2 x double>, <2 x double>* %B 580 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 581 ret <2 x double> %tmp3 582 } 583 584 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone 585 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone 586 declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone 587 588 define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 589 ;CHECK-LABEL: fminp_2s: 590 ;CHECK: fminp.2s 591 %tmp1 = load <2 x float>, <2 x float>* %A 592 %tmp2 = load <2 x float>, <2 x float>* %B 593 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 594 ret <2 x float> %tmp3 595 } 596 597 define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 598 ;CHECK-LABEL: fminp_4s: 599 ;CHECK: fminp.4s 600 %tmp1 = load <4 x float>, <4 x float>* %A 601 %tmp2 = load <4 x float>, <4 x float>* %B 602 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 603 ret <4 x float> %tmp3 604 } 605 606 define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 607 ;CHECK-LABEL: fminp_2d: 608 ;CHECK: fminp.2d 609 %tmp1 = load <2 x double>, <2 x double>* %A 610 %tmp2 = load <2 x double>, <2 x double>* %B 611 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 612 ret <2 x double> %tmp3 613 } 614 615 declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone 616 declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone 617 declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone 618 619 define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 620 ;CHECK-LABEL: fminnmp_2s: 621 ;CHECK: fminnmp.2s 622 %tmp1 = load <2 x float>, <2 x float>* %A 623 %tmp2 = load <2 x float>, <2 x float>* %B 624 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 625 ret <2 x float> %tmp3 626 } 627 628 define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 629 ;CHECK-LABEL: fminnmp_4s: 630 ;CHECK: fminnmp.4s 631 %tmp1 = load <4 x float>, <4 x float>* %A 632 %tmp2 = load <4 x float>, <4 x float>* %B 633 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 634 ret <4 x float> %tmp3 635 } 636 637 define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 638 ;CHECK-LABEL: fminnmp_2d: 639 ;CHECK: fminnmp.2d 640 %tmp1 = load <2 x double>, <2 x double>* %A 641 %tmp2 = load <2 x double>, <2 x double>* %B 642 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 643 ret <2 x double> %tmp3 644 } 645 646 declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone 647 declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone 648 declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone 649 650 define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 651 ;CHECK-LABEL: fmaxnmp_2s: 652 ;CHECK: fmaxnmp.2s 653 %tmp1 = load <2 x float>, <2 x float>* %A 654 %tmp2 = load <2 x float>, <2 x float>* %B 655 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 656 ret <2 x float> %tmp3 657 } 658 659 define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 660 ;CHECK-LABEL: fmaxnmp_4s: 661 ;CHECK: fmaxnmp.4s 662 %tmp1 = load <4 x float>, <4 x float>* %A 663 %tmp2 = load <4 x float>, <4 x float>* %B 664 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 665 ret <4 x float> %tmp3 666 } 667 668 define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 669 ;CHECK-LABEL: fmaxnmp_2d: 670 ;CHECK: fmaxnmp.2d 671 %tmp1 = load <2 x double>, <2 x double>* %A 672 %tmp2 = load <2 x double>, <2 x double>* %B 673 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 674 ret <2 x double> %tmp3 675 } 676 677 declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone 678 declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone 679 declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone 680