1 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s 2 3 declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) 4 declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) 5 6 define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 7 ; Using registers other than v0, v1 are possible, but would be odd. 8 ; CHECK: test_smaxp_v8i8: 9 %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 10 ; CHECK: smaxp v0.8b, v0.8b, v1.8b 11 ret <8 x i8> %tmp1 12 } 13 14 define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 15 %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 16 ; CHECK: umaxp v0.8b, v0.8b, v1.8b 17 ret <8 x i8> %tmp1 18 } 19 20 declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>) 21 declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>) 22 23 define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 24 ; CHECK: test_smaxp_v16i8: 25 %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 26 ; CHECK: smaxp v0.16b, v0.16b, v1.16b 27 ret <16 x i8> %tmp1 28 } 29 30 define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 31 ; CHECK: test_umaxp_v16i8: 32 %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 33 ; CHECK: umaxp v0.16b, v0.16b, v1.16b 34 ret <16 x i8> %tmp1 35 } 36 37 declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) 38 declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) 39 40 define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 41 ; CHECK: test_smaxp_v4i16: 42 %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 43 ; CHECK: smaxp v0.4h, v0.4h, v1.4h 44 ret <4 x i16> %tmp1 45 } 46 47 define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 48 ; CHECK: test_umaxp_v4i16: 49 %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 50 ; CHECK: umaxp v0.4h, v0.4h, v1.4h 51 ret <4 x i16> %tmp1 52 } 53 54 55 declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>) 56 declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>) 57 58 define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 59 ; CHECK: test_smaxp_v8i16: 60 %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 61 ; CHECK: smaxp v0.8h, v0.8h, v1.8h 62 ret <8 x i16> %tmp1 63 } 64 65 define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 66 ; CHECK: test_umaxp_v8i16: 67 %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 68 ; CHECK: umaxp v0.8h, v0.8h, v1.8h 69 ret <8 x i16> %tmp1 70 } 71 72 73 declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) 74 declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) 75 76 define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 77 ; CHECK: test_smaxp_v2i32: 78 %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 79 ; CHECK: smaxp v0.2s, v0.2s, v1.2s 80 ret <2 x i32> %tmp1 81 } 82 83 define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 84 ; CHECK: test_umaxp_v2i32: 85 %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 86 ; CHECK: umaxp v0.2s, v0.2s, v1.2s 87 ret <2 x i32> %tmp1 88 } 89 90 declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>) 91 declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>) 92 93 define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 94 ; CHECK: test_smaxp_v4i32: 95 %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 96 ; CHECK: smaxp v0.4s, v0.4s, v1.4s 97 ret <4 x i32> %tmp1 98 } 99 100 define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 101 ; CHECK: test_umaxp_v4i32: 102 %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 103 ; CHECK: umaxp v0.4s, v0.4s, v1.4s 104 ret <4 x i32> %tmp1 105 } 106 107 declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) 108 declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) 109 110 define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 111 ; Using registers other than v0, v1 are possible, but would be odd. 112 ; CHECK: test_sminp_v8i8: 113 %tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 114 ; CHECK: sminp v0.8b, v0.8b, v1.8b 115 ret <8 x i8> %tmp1 116 } 117 118 define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 119 %tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 120 ; CHECK: uminp v0.8b, v0.8b, v1.8b 121 ret <8 x i8> %tmp1 122 } 123 124 declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>) 125 declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>) 126 127 define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 128 ; CHECK: test_sminp_v16i8: 129 %tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 130 ; CHECK: sminp v0.16b, v0.16b, v1.16b 131 ret <16 x i8> %tmp1 132 } 133 134 define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 135 ; CHECK: test_uminp_v16i8: 136 %tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 137 ; CHECK: uminp v0.16b, v0.16b, v1.16b 138 ret <16 x i8> %tmp1 139 } 140 141 declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) 142 declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) 143 144 define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 145 ; CHECK: test_sminp_v4i16: 146 %tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 147 ; CHECK: sminp v0.4h, v0.4h, v1.4h 148 ret <4 x i16> %tmp1 149 } 150 151 define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 152 ; CHECK: test_uminp_v4i16: 153 %tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 154 ; CHECK: uminp v0.4h, v0.4h, v1.4h 155 ret <4 x i16> %tmp1 156 } 157 158 159 declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>) 160 declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>) 161 162 define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 163 ; CHECK: test_sminp_v8i16: 164 %tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 165 ; CHECK: sminp v0.8h, v0.8h, v1.8h 166 ret <8 x i16> %tmp1 167 } 168 169 define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 170 ; CHECK: test_uminp_v8i16: 171 %tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 172 ; CHECK: uminp v0.8h, v0.8h, v1.8h 173 ret <8 x i16> %tmp1 174 } 175 176 177 declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) 178 declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) 179 180 define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 181 ; CHECK: test_sminp_v2i32: 182 %tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 183 ; CHECK: sminp v0.2s, v0.2s, v1.2s 184 ret <2 x i32> %tmp1 185 } 186 187 define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 188 ; CHECK: test_uminp_v2i32: 189 %tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 190 ; CHECK: uminp v0.2s, v0.2s, v1.2s 191 ret <2 x i32> %tmp1 192 } 193 194 declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>) 195 declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>) 196 197 define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 198 ; CHECK: test_sminp_v4i32: 199 %tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 200 ; CHECK: sminp v0.4s, v0.4s, v1.4s 201 ret <4 x i32> %tmp1 202 } 203 204 define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 205 ; CHECK: test_uminp_v4i32: 206 %tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 207 ; CHECK: uminp v0.4s, v0.4s, v1.4s 208 ret <4 x i32> %tmp1 209 } 210 211 declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) 212 declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>) 213 declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>) 214 215 define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 216 ; CHECK: test_fmaxp_v2f32: 217 %val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs) 218 ; CHECK: fmaxp v0.2s, v0.2s, v1.2s 219 ret <2 x float> %val 220 } 221 222 define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 223 ; CHECK: test_fmaxp_v4f32: 224 %val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs) 225 ; CHECK: fmaxp v0.4s, v0.4s, v1.4s 226 ret <4 x float> %val 227 } 228 229 define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 230 ; CHECK: test_fmaxp_v2f64: 231 %val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs) 232 ; CHECK: fmaxp v0.2d, v0.2d, v1.2d 233 ret <2 x double> %val 234 } 235 236 declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) 237 declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>) 238 declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>) 239 240 define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 241 ; CHECK: test_fminp_v2f32: 242 %val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs) 243 ; CHECK: fminp v0.2s, v0.2s, v1.2s 244 ret <2 x float> %val 245 } 246 247 define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 248 ; CHECK: test_fminp_v4f32: 249 %val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs) 250 ; CHECK: fminp v0.4s, v0.4s, v1.4s 251 ret <4 x float> %val 252 } 253 254 define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 255 ; CHECK: test_fminp_v2f64: 256 %val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs) 257 ; CHECK: fminp v0.2d, v0.2d, v1.2d 258 ret <2 x double> %val 259 } 260 261 declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>) 262 declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>) 263 declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>) 264 265 define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 266 ; CHECK: test_fmaxnmp_v2f32: 267 %val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) 268 ; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s 269 ret <2 x float> %val 270 } 271 272 define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 273 ; CHECK: test_fmaxnmp_v4f32: 274 %val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) 275 ; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s 276 ret <4 x float> %val 277 } 278 279 define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 280 ; CHECK: test_fmaxnmp_v2f64: 281 %val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) 282 ; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d 283 ret <2 x double> %val 284 } 285 286 declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>) 287 declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>) 288 declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>) 289 290 define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 291 ; CHECK: test_fminnmp_v2f32: 292 %val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) 293 ; CHECK: fminnmp v0.2s, v0.2s, v1.2s 294 ret <2 x float> %val 295 } 296 297 define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 298 ; CHECK: test_fminnmp_v4f32: 299 %val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) 300 ; CHECK: fminnmp v0.4s, v0.4s, v1.4s 301 ret <4 x float> %val 302 } 303 304 define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 305 ; CHECK: test_fminnmp_v2f64: 306 %val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) 307 ; CHECK: fminnmp v0.2d, v0.2d, v1.2d 308 ret <2 x double> %val 309 } 310 311