1 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s 2 3 declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) 4 declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) 5 6 define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 7 ; Using registers other than v0, v1 are possible, but would be odd. 8 ; CHECK: test_smax_v8i8: 9 %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 10 ; CHECK: smax v0.8b, v0.8b, v1.8b 11 ret <8 x i8> %tmp1 12 } 13 14 define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 15 %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 16 ; CHECK: umax v0.8b, v0.8b, v1.8b 17 ret <8 x i8> %tmp1 18 } 19 20 declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) 21 declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) 22 23 define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 24 ; CHECK: test_smax_v16i8: 25 %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 26 ; CHECK: smax v0.16b, v0.16b, v1.16b 27 ret <16 x i8> %tmp1 28 } 29 30 define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 31 ; CHECK: test_umax_v16i8: 32 %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 33 ; CHECK: umax v0.16b, v0.16b, v1.16b 34 ret <16 x i8> %tmp1 35 } 36 37 declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) 38 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) 39 40 define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 41 ; CHECK: test_smax_v4i16: 42 %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 43 ; CHECK: smax v0.4h, v0.4h, v1.4h 44 ret <4 x i16> %tmp1 45 } 46 47 define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 48 ; CHECK: test_umax_v4i16: 49 %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 50 ; CHECK: umax v0.4h, v0.4h, v1.4h 51 ret <4 x i16> %tmp1 52 } 53 54 55 declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) 56 declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) 57 58 define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 59 ; CHECK: test_smax_v8i16: 60 %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 61 ; CHECK: smax v0.8h, v0.8h, v1.8h 62 ret <8 x i16> %tmp1 63 } 64 65 define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 66 ; CHECK: test_umax_v8i16: 67 %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 68 ; CHECK: umax v0.8h, v0.8h, v1.8h 69 ret <8 x i16> %tmp1 70 } 71 72 73 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) 74 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) 75 76 define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 77 ; CHECK: test_smax_v2i32: 78 %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 79 ; CHECK: smax v0.2s, v0.2s, v1.2s 80 ret <2 x i32> %tmp1 81 } 82 83 define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 84 ; CHECK: test_umax_v2i32: 85 %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 86 ; CHECK: umax v0.2s, v0.2s, v1.2s 87 ret <2 x i32> %tmp1 88 } 89 90 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) 91 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) 92 93 define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 94 ; CHECK: test_smax_v4i32: 95 %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 96 ; CHECK: smax v0.4s, v0.4s, v1.4s 97 ret <4 x i32> %tmp1 98 } 99 100 define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 101 ; CHECK: test_umax_v4i32: 102 %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 103 ; CHECK: umax v0.4s, v0.4s, v1.4s 104 ret <4 x i32> %tmp1 105 } 106 107 declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) 108 declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) 109 110 define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 111 ; Using registers other than v0, v1 are possible, but would be odd. 112 ; CHECK: test_smin_v8i8: 113 %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 114 ; CHECK: smin v0.8b, v0.8b, v1.8b 115 ret <8 x i8> %tmp1 116 } 117 118 define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 119 %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 120 ; CHECK: umin v0.8b, v0.8b, v1.8b 121 ret <8 x i8> %tmp1 122 } 123 124 declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) 125 declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) 126 127 define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 128 ; CHECK: test_smin_v16i8: 129 %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 130 ; CHECK: smin v0.16b, v0.16b, v1.16b 131 ret <16 x i8> %tmp1 132 } 133 134 define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 135 ; CHECK: test_umin_v16i8: 136 %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 137 ; CHECK: umin v0.16b, v0.16b, v1.16b 138 ret <16 x i8> %tmp1 139 } 140 141 declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) 142 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) 143 144 define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 145 ; CHECK: test_smin_v4i16: 146 %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 147 ; CHECK: smin v0.4h, v0.4h, v1.4h 148 ret <4 x i16> %tmp1 149 } 150 151 define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 152 ; CHECK: test_umin_v4i16: 153 %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 154 ; CHECK: umin v0.4h, v0.4h, v1.4h 155 ret <4 x i16> %tmp1 156 } 157 158 159 declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) 160 declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) 161 162 define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 163 ; CHECK: test_smin_v8i16: 164 %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 165 ; CHECK: smin v0.8h, v0.8h, v1.8h 166 ret <8 x i16> %tmp1 167 } 168 169 define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 170 ; CHECK: test_umin_v8i16: 171 %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 172 ; CHECK: umin v0.8h, v0.8h, v1.8h 173 ret <8 x i16> %tmp1 174 } 175 176 177 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) 178 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) 179 180 define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 181 ; CHECK: test_smin_v2i32: 182 %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 183 ; CHECK: smin v0.2s, v0.2s, v1.2s 184 ret <2 x i32> %tmp1 185 } 186 187 define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 188 ; CHECK: test_umin_v2i32: 189 %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 190 ; CHECK: umin v0.2s, v0.2s, v1.2s 191 ret <2 x i32> %tmp1 192 } 193 194 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) 195 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) 196 197 define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 198 ; CHECK: test_smin_v4i32: 199 %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 200 ; CHECK: smin v0.4s, v0.4s, v1.4s 201 ret <4 x i32> %tmp1 202 } 203 204 define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 205 ; CHECK: test_umin_v4i32: 206 %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 207 ; CHECK: umin v0.4s, v0.4s, v1.4s 208 ret <4 x i32> %tmp1 209 } 210 211 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) 212 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) 213 declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>) 214 215 define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 216 ; CHECK: test_fmax_v2f32: 217 %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs) 218 ; CHECK: fmax v0.2s, v0.2s, v1.2s 219 ret <2 x float> %val 220 } 221 222 define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 223 ; CHECK: test_fmax_v4f32: 224 %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs) 225 ; CHECK: fmax v0.4s, v0.4s, v1.4s 226 ret <4 x float> %val 227 } 228 229 define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 230 ; CHECK: test_fmax_v2f64: 231 %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs) 232 ; CHECK: fmax v0.2d, v0.2d, v1.2d 233 ret <2 x double> %val 234 } 235 236 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) 237 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) 238 declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>) 239 240 define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 241 ; CHECK: test_fmin_v2f32: 242 %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs) 243 ; CHECK: fmin v0.2s, v0.2s, v1.2s 244 ret <2 x float> %val 245 } 246 247 define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 248 ; CHECK: test_fmin_v4f32: 249 %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs) 250 ; CHECK: fmin v0.4s, v0.4s, v1.4s 251 ret <4 x float> %val 252 } 253 254 define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 255 ; CHECK: test_fmin_v2f64: 256 %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs) 257 ; CHECK: fmin v0.2d, v0.2d, v1.2d 258 ret <2 x double> %val 259 } 260 261 262 declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) 263 declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) 264 declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>) 265 266 define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 267 ; CHECK: test_fmaxnm_v2f32: 268 %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) 269 ; CHECK: fmaxnm v0.2s, v0.2s, v1.2s 270 ret <2 x float> %val 271 } 272 273 define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 274 ; CHECK: test_fmaxnm_v4f32: 275 %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) 276 ; CHECK: fmaxnm v0.4s, v0.4s, v1.4s 277 ret <4 x float> %val 278 } 279 280 define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 281 ; CHECK: test_fmaxnm_v2f64: 282 %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) 283 ; CHECK: fmaxnm v0.2d, v0.2d, v1.2d 284 ret <2 x double> %val 285 } 286 287 declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>) 288 declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>) 289 declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>) 290 291 define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 292 ; CHECK: test_fminnm_v2f32: 293 %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) 294 ; CHECK: fminnm v0.2s, v0.2s, v1.2s 295 ret <2 x float> %val 296 } 297 298 define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 299 ; CHECK: test_fminnm_v4f32: 300 %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) 301 ; CHECK: fminnm v0.4s, v0.4s, v1.4s 302 ret <4 x float> %val 303 } 304 305 define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 306 ; CHECK: test_fminnm_v2f64: 307 %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) 308 ; CHECK: fminnm v0.2d, v0.2d, v1.2d 309 ret <2 x double> %val 310 } 311