1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 2 3 declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) 4 5 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 6 7 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 8 9 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 10 11 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 12 13 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 14 15 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 16 17 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 18 19 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 20 21 declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) 22 23 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 24 25 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 26 27 declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) 28 29 declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) 30 31 declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) 32 33 declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) 34 35 declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) 36 37 declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) 38 39 declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) 40 41 declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) 42 43 declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) 44 45 declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) 46 47 declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) 48 49 declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) 50 51 declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) 52 53 define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) { 54 ; CHECK-LABEL: test_vaddl_s8: 55 ; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 56 entry: 57 %vmovl.i.i = sext <8 x i8> %a to <8 x i16> 58 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> 59 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i 60 ret <8 x i16> %add.i 61 } 62 63 define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) { 64 ; CHECK-LABEL: test_vaddl_s16: 65 ; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 66 entry: 67 %vmovl.i.i = sext <4 x i16> %a to <4 x i32> 68 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> 69 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i 70 ret <4 x i32> %add.i 71 } 72 73 define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) { 74 ; CHECK-LABEL: test_vaddl_s32: 75 ; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 76 entry: 77 %vmovl.i.i = sext <2 x i32> %a to <2 x i64> 78 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> 79 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i 80 ret <2 x i64> %add.i 81 } 82 83 define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) { 84 ; CHECK-LABEL: test_vaddl_u8: 85 ; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 86 entry: 87 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 88 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 89 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i 90 ret <8 x i16> %add.i 91 } 92 93 define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) { 94 ; CHECK-LABEL: test_vaddl_u16: 95 ; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 96 entry: 97 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 98 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 99 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i 100 ret <4 x i32> %add.i 101 } 102 103 define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) { 104 ; CHECK-LABEL: test_vaddl_u32: 105 ; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 106 entry: 107 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 108 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 109 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i 110 ret <2 x i64> %add.i 111 } 112 113 define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) { 114 ; CHECK-LABEL: test_vaddl_high_s8: 115 ; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 116 entry: 117 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 118 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 119 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 120 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> 121 %add.i = add <8 x i16> %0, %1 122 ret <8 x i16> %add.i 123 } 124 125 define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) { 126 ; CHECK-LABEL: test_vaddl_high_s16: 127 ; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 128 entry: 129 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 130 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 131 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 132 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> 133 %add.i = add <4 x i32> %0, %1 134 ret <4 x i32> %add.i 135 } 136 137 define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) { 138 ; CHECK-LABEL: test_vaddl_high_s32: 139 ; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 140 entry: 141 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 142 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 143 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 144 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> 145 %add.i = add <2 x i64> %0, %1 146 ret <2 x i64> %add.i 147 } 148 149 define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) { 150 ; CHECK-LABEL: test_vaddl_high_u8: 151 ; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 152 entry: 153 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 154 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 155 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 156 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 157 %add.i = add <8 x i16> %0, %1 158 ret <8 x i16> %add.i 159 } 160 161 define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) { 162 ; CHECK-LABEL: test_vaddl_high_u16: 163 ; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 164 entry: 165 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 166 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 167 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 168 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 169 %add.i = add <4 x i32> %0, %1 170 ret <4 x i32> %add.i 171 } 172 173 define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) { 174 ; CHECK-LABEL: test_vaddl_high_u32: 175 ; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 176 entry: 177 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 178 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 179 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 180 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 181 %add.i = add <2 x i64> %0, %1 182 ret <2 x i64> %add.i 183 } 184 185 define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) { 186 ; CHECK-LABEL: test_vaddw_s8: 187 ; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 188 entry: 189 %vmovl.i.i = sext <8 x i8> %b to <8 x i16> 190 %add.i = add <8 x i16> %vmovl.i.i, %a 191 ret <8 x i16> %add.i 192 } 193 194 define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) { 195 ; CHECK-LABEL: test_vaddw_s16: 196 ; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 197 entry: 198 %vmovl.i.i = sext <4 x i16> %b to <4 x i32> 199 %add.i = add <4 x i32> %vmovl.i.i, %a 200 ret <4 x i32> %add.i 201 } 202 203 define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) { 204 ; CHECK-LABEL: test_vaddw_s32: 205 ; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 206 entry: 207 %vmovl.i.i = sext <2 x i32> %b to <2 x i64> 208 %add.i = add <2 x i64> %vmovl.i.i, %a 209 ret <2 x i64> %add.i 210 } 211 212 define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) { 213 ; CHECK-LABEL: test_vaddw_u8: 214 ; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 215 entry: 216 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 217 %add.i = add <8 x i16> %vmovl.i.i, %a 218 ret <8 x i16> %add.i 219 } 220 221 define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) { 222 ; CHECK-LABEL: test_vaddw_u16: 223 ; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 224 entry: 225 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 226 %add.i = add <4 x i32> %vmovl.i.i, %a 227 ret <4 x i32> %add.i 228 } 229 230 define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) { 231 ; CHECK-LABEL: test_vaddw_u32: 232 ; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 233 entry: 234 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 235 %add.i = add <2 x i64> %vmovl.i.i, %a 236 ret <2 x i64> %add.i 237 } 238 239 define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) { 240 ; CHECK-LABEL: test_vaddw_high_s8: 241 ; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 242 entry: 243 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 244 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 245 %add.i = add <8 x i16> %0, %a 246 ret <8 x i16> %add.i 247 } 248 249 define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) { 250 ; CHECK-LABEL: test_vaddw_high_s16: 251 ; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 252 entry: 253 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 254 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 255 %add.i = add <4 x i32> %0, %a 256 ret <4 x i32> %add.i 257 } 258 259 define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) { 260 ; CHECK-LABEL: test_vaddw_high_s32: 261 ; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 262 entry: 263 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 264 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 265 %add.i = add <2 x i64> %0, %a 266 ret <2 x i64> %add.i 267 } 268 269 define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) { 270 ; CHECK-LABEL: test_vaddw_high_u8: 271 ; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 272 entry: 273 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 274 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 275 %add.i = add <8 x i16> %0, %a 276 ret <8 x i16> %add.i 277 } 278 279 define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) { 280 ; CHECK-LABEL: test_vaddw_high_u16: 281 ; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 282 entry: 283 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 284 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 285 %add.i = add <4 x i32> %0, %a 286 ret <4 x i32> %add.i 287 } 288 289 define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) { 290 ; CHECK-LABEL: test_vaddw_high_u32: 291 ; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 292 entry: 293 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 294 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 295 %add.i = add <2 x i64> %0, %a 296 ret <2 x i64> %add.i 297 } 298 299 define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) { 300 ; CHECK-LABEL: test_vsubl_s8: 301 ; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 302 entry: 303 %vmovl.i.i = sext <8 x i8> %a to <8 x i16> 304 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> 305 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i 306 ret <8 x i16> %sub.i 307 } 308 309 define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) { 310 ; CHECK-LABEL: test_vsubl_s16: 311 ; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 312 entry: 313 %vmovl.i.i = sext <4 x i16> %a to <4 x i32> 314 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> 315 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i 316 ret <4 x i32> %sub.i 317 } 318 319 define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) { 320 ; CHECK-LABEL: test_vsubl_s32: 321 ; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 322 entry: 323 %vmovl.i.i = sext <2 x i32> %a to <2 x i64> 324 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> 325 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i 326 ret <2 x i64> %sub.i 327 } 328 329 define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) { 330 ; CHECK-LABEL: test_vsubl_u8: 331 ; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 332 entry: 333 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 334 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 335 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i 336 ret <8 x i16> %sub.i 337 } 338 339 define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) { 340 ; CHECK-LABEL: test_vsubl_u16: 341 ; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 342 entry: 343 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 344 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 345 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i 346 ret <4 x i32> %sub.i 347 } 348 349 define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) { 350 ; CHECK-LABEL: test_vsubl_u32: 351 ; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 352 entry: 353 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 354 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 355 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i 356 ret <2 x i64> %sub.i 357 } 358 359 define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) { 360 ; CHECK-LABEL: test_vsubl_high_s8: 361 ; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 362 entry: 363 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 364 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 365 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 366 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> 367 %sub.i = sub <8 x i16> %0, %1 368 ret <8 x i16> %sub.i 369 } 370 371 define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) { 372 ; CHECK-LABEL: test_vsubl_high_s16: 373 ; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 374 entry: 375 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 376 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 377 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 378 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> 379 %sub.i = sub <4 x i32> %0, %1 380 ret <4 x i32> %sub.i 381 } 382 383 define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) { 384 ; CHECK-LABEL: test_vsubl_high_s32: 385 ; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 386 entry: 387 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 388 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 389 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 390 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> 391 %sub.i = sub <2 x i64> %0, %1 392 ret <2 x i64> %sub.i 393 } 394 395 define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) { 396 ; CHECK-LABEL: test_vsubl_high_u8: 397 ; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 398 entry: 399 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 400 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 401 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 402 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 403 %sub.i = sub <8 x i16> %0, %1 404 ret <8 x i16> %sub.i 405 } 406 407 define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) { 408 ; CHECK-LABEL: test_vsubl_high_u16: 409 ; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 410 entry: 411 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 412 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 413 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 414 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 415 %sub.i = sub <4 x i32> %0, %1 416 ret <4 x i32> %sub.i 417 } 418 419 define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) { 420 ; CHECK-LABEL: test_vsubl_high_u32: 421 ; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 422 entry: 423 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 424 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 425 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 426 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 427 %sub.i = sub <2 x i64> %0, %1 428 ret <2 x i64> %sub.i 429 } 430 431 define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) { 432 ; CHECK-LABEL: test_vsubw_s8: 433 ; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 434 entry: 435 %vmovl.i.i = sext <8 x i8> %b to <8 x i16> 436 %sub.i = sub <8 x i16> %a, %vmovl.i.i 437 ret <8 x i16> %sub.i 438 } 439 440 define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) { 441 ; CHECK-LABEL: test_vsubw_s16: 442 ; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 443 entry: 444 %vmovl.i.i = sext <4 x i16> %b to <4 x i32> 445 %sub.i = sub <4 x i32> %a, %vmovl.i.i 446 ret <4 x i32> %sub.i 447 } 448 449 define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) { 450 ; CHECK-LABEL: test_vsubw_s32: 451 ; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 452 entry: 453 %vmovl.i.i = sext <2 x i32> %b to <2 x i64> 454 %sub.i = sub <2 x i64> %a, %vmovl.i.i 455 ret <2 x i64> %sub.i 456 } 457 458 define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) { 459 ; CHECK-LABEL: test_vsubw_u8: 460 ; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b 461 entry: 462 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 463 %sub.i = sub <8 x i16> %a, %vmovl.i.i 464 ret <8 x i16> %sub.i 465 } 466 467 define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) { 468 ; CHECK-LABEL: test_vsubw_u16: 469 ; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h 470 entry: 471 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 472 %sub.i = sub <4 x i32> %a, %vmovl.i.i 473 ret <4 x i32> %sub.i 474 } 475 476 define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) { 477 ; CHECK-LABEL: test_vsubw_u32: 478 ; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s 479 entry: 480 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 481 %sub.i = sub <2 x i64> %a, %vmovl.i.i 482 ret <2 x i64> %sub.i 483 } 484 485 define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) { 486 ; CHECK-LABEL: test_vsubw_high_s8: 487 ; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 488 entry: 489 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 490 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 491 %sub.i = sub <8 x i16> %a, %0 492 ret <8 x i16> %sub.i 493 } 494 495 define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) { 496 ; CHECK-LABEL: test_vsubw_high_s16: 497 ; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 498 entry: 499 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 500 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 501 %sub.i = sub <4 x i32> %a, %0 502 ret <4 x i32> %sub.i 503 } 504 505 define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) { 506 ; CHECK-LABEL: test_vsubw_high_s32: 507 ; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 508 entry: 509 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 510 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 511 %sub.i = sub <2 x i64> %a, %0 512 ret <2 x i64> %sub.i 513 } 514 515 define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) { 516 ; CHECK-LABEL: test_vsubw_high_u8: 517 ; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b 518 entry: 519 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 520 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 521 %sub.i = sub <8 x i16> %a, %0 522 ret <8 x i16> %sub.i 523 } 524 525 define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) { 526 ; CHECK-LABEL: test_vsubw_high_u16: 527 ; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h 528 entry: 529 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 530 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 531 %sub.i = sub <4 x i32> %a, %0 532 ret <4 x i32> %sub.i 533 } 534 535 define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) { 536 ; CHECK-LABEL: test_vsubw_high_u32: 537 ; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s 538 entry: 539 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 540 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 541 %sub.i = sub <2 x i64> %a, %0 542 ret <2 x i64> %sub.i 543 } 544 545 define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { 546 ; CHECK-LABEL: test_vaddhn_s16: 547 ; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 548 entry: 549 %vaddhn.i = add <8 x i16> %a, %b 550 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 551 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> 552 ret <8 x i8> %vaddhn2.i 553 } 554 555 define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { 556 ; CHECK-LABEL: test_vaddhn_s32: 557 ; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 558 entry: 559 %vaddhn.i = add <4 x i32> %a, %b 560 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> 561 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> 562 ret <4 x i16> %vaddhn2.i 563 } 564 565 define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { 566 ; CHECK-LABEL: test_vaddhn_s64: 567 ; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 568 entry: 569 %vaddhn.i = add <2 x i64> %a, %b 570 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> 571 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> 572 ret <2 x i32> %vaddhn2.i 573 } 574 575 define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { 576 ; CHECK-LABEL: test_vaddhn_u16: 577 ; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 578 entry: 579 %vaddhn.i = add <8 x i16> %a, %b 580 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 581 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> 582 ret <8 x i8> %vaddhn2.i 583 } 584 585 define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { 586 ; CHECK-LABEL: test_vaddhn_u32: 587 ; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 588 entry: 589 %vaddhn.i = add <4 x i32> %a, %b 590 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> 591 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> 592 ret <4 x i16> %vaddhn2.i 593 } 594 595 define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { 596 ; CHECK-LABEL: test_vaddhn_u64: 597 ; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 598 entry: 599 %vaddhn.i = add <2 x i64> %a, %b 600 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> 601 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> 602 ret <2 x i32> %vaddhn2.i 603 } 604 605 define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 606 ; CHECK-LABEL: test_vaddhn_high_s16: 607 ; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 608 entry: 609 %vaddhn.i.i = add <8 x i16> %a, %b 610 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 611 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> 612 %0 = bitcast <8 x i8> %r to <1 x i64> 613 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> 614 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 615 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 616 ret <16 x i8> %2 617 } 618 619 define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 620 ; CHECK-LABEL: test_vaddhn_high_s32: 621 ; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 622 entry: 623 %vaddhn.i.i = add <4 x i32> %a, %b 624 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> 625 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> 626 %0 = bitcast <4 x i16> %r to <1 x i64> 627 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> 628 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 629 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 630 ret <8 x i16> %2 631 } 632 633 define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 634 ; CHECK-LABEL: test_vaddhn_high_s64: 635 ; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 636 entry: 637 %vaddhn.i.i = add <2 x i64> %a, %b 638 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> 639 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> 640 %0 = bitcast <2 x i32> %r to <1 x i64> 641 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> 642 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 643 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 644 ret <4 x i32> %2 645 } 646 647 define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 648 ; CHECK-LABEL: test_vaddhn_high_u16: 649 ; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 650 entry: 651 %vaddhn.i.i = add <8 x i16> %a, %b 652 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 653 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> 654 %0 = bitcast <8 x i8> %r to <1 x i64> 655 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> 656 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 657 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 658 ret <16 x i8> %2 659 } 660 661 define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 662 ; CHECK-LABEL: test_vaddhn_high_u32: 663 ; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 664 entry: 665 %vaddhn.i.i = add <4 x i32> %a, %b 666 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> 667 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> 668 %0 = bitcast <4 x i16> %r to <1 x i64> 669 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> 670 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 671 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 672 ret <8 x i16> %2 673 } 674 675 define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 676 ; CHECK-LABEL: test_vaddhn_high_u64: 677 ; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 678 entry: 679 %vaddhn.i.i = add <2 x i64> %a, %b 680 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> 681 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> 682 %0 = bitcast <2 x i32> %r to <1 x i64> 683 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> 684 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 685 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 686 ret <4 x i32> %2 687 } 688 689 define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) { 690 ; CHECK-LABEL: test_vraddhn_s16: 691 ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 692 entry: 693 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 694 ret <8 x i8> %vraddhn2.i 695 } 696 697 define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) { 698 ; CHECK-LABEL: test_vraddhn_s32: 699 ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 700 entry: 701 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 702 ret <4 x i16> %vraddhn2.i 703 } 704 705 define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) { 706 ; CHECK-LABEL: test_vraddhn_s64: 707 ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 708 entry: 709 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 710 ret <2 x i32> %vraddhn2.i 711 } 712 713 define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) { 714 ; CHECK-LABEL: test_vraddhn_u16: 715 ; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 716 entry: 717 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 718 ret <8 x i8> %vraddhn2.i 719 } 720 721 define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) { 722 ; CHECK-LABEL: test_vraddhn_u32: 723 ; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 724 entry: 725 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 726 ret <4 x i16> %vraddhn2.i 727 } 728 729 define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) { 730 ; CHECK-LABEL: test_vraddhn_u64: 731 ; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 732 entry: 733 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 734 ret <2 x i32> %vraddhn2.i 735 } 736 737 define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 738 ; CHECK-LABEL: test_vraddhn_high_s16: 739 ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 740 entry: 741 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 742 %0 = bitcast <8 x i8> %r to <1 x i64> 743 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> 744 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 745 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 746 ret <16 x i8> %2 747 } 748 749 define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 750 ; CHECK-LABEL: test_vraddhn_high_s32: 751 ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 752 entry: 753 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 754 %0 = bitcast <4 x i16> %r to <1 x i64> 755 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> 756 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 757 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 758 ret <8 x i16> %2 759 } 760 761 define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 762 ; CHECK-LABEL: test_vraddhn_high_s64: 763 ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 764 entry: 765 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 766 %0 = bitcast <2 x i32> %r to <1 x i64> 767 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> 768 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 769 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 770 ret <4 x i32> %2 771 } 772 773 define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 774 ; CHECK-LABEL: test_vraddhn_high_u16: 775 ; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 776 entry: 777 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 778 %0 = bitcast <8 x i8> %r to <1 x i64> 779 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> 780 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 781 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 782 ret <16 x i8> %2 783 } 784 785 define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 786 ; CHECK-LABEL: test_vraddhn_high_u32: 787 ; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 788 entry: 789 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 790 %0 = bitcast <4 x i16> %r to <1 x i64> 791 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> 792 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 793 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 794 ret <8 x i16> %2 795 } 796 797 define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 798 ; CHECK-LABEL: test_vraddhn_high_u64: 799 ; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 800 entry: 801 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 802 %0 = bitcast <2 x i32> %r to <1 x i64> 803 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> 804 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 805 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 806 ret <4 x i32> %2 807 } 808 809 define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { 810 ; CHECK-LABEL: test_vsubhn_s16: 811 ; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 812 entry: 813 %vsubhn.i = sub <8 x i16> %a, %b 814 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 815 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> 816 ret <8 x i8> %vsubhn2.i 817 } 818 819 define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { 820 ; CHECK-LABEL: test_vsubhn_s32: 821 ; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 822 entry: 823 %vsubhn.i = sub <4 x i32> %a, %b 824 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> 825 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> 826 ret <4 x i16> %vsubhn2.i 827 } 828 829 define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { 830 ; CHECK-LABEL: test_vsubhn_s64: 831 ; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 832 entry: 833 %vsubhn.i = sub <2 x i64> %a, %b 834 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> 835 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> 836 ret <2 x i32> %vsubhn2.i 837 } 838 839 define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { 840 ; CHECK-LABEL: test_vsubhn_u16: 841 ; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 842 entry: 843 %vsubhn.i = sub <8 x i16> %a, %b 844 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 845 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> 846 ret <8 x i8> %vsubhn2.i 847 } 848 849 define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { 850 ; CHECK-LABEL: test_vsubhn_u32: 851 ; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 852 entry: 853 %vsubhn.i = sub <4 x i32> %a, %b 854 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> 855 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> 856 ret <4 x i16> %vsubhn2.i 857 } 858 859 define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { 860 ; CHECK-LABEL: test_vsubhn_u64: 861 ; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 862 entry: 863 %vsubhn.i = sub <2 x i64> %a, %b 864 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> 865 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> 866 ret <2 x i32> %vsubhn2.i 867 } 868 869 define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 870 ; CHECK-LABEL: test_vsubhn_high_s16: 871 ; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 872 entry: 873 %vsubhn.i.i = sub <8 x i16> %a, %b 874 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 875 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> 876 %0 = bitcast <8 x i8> %r to <1 x i64> 877 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> 878 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 879 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 880 ret <16 x i8> %2 881 } 882 883 define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 884 ; CHECK-LABEL: test_vsubhn_high_s32: 885 ; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 886 entry: 887 %vsubhn.i.i = sub <4 x i32> %a, %b 888 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> 889 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> 890 %0 = bitcast <4 x i16> %r to <1 x i64> 891 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> 892 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 893 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 894 ret <8 x i16> %2 895 } 896 897 define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 898 ; CHECK-LABEL: test_vsubhn_high_s64: 899 ; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 900 entry: 901 %vsubhn.i.i = sub <2 x i64> %a, %b 902 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> 903 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> 904 %0 = bitcast <2 x i32> %r to <1 x i64> 905 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> 906 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 907 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 908 ret <4 x i32> %2 909 } 910 911 define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 912 ; CHECK-LABEL: test_vsubhn_high_u16: 913 ; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 914 entry: 915 %vsubhn.i.i = sub <8 x i16> %a, %b 916 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 917 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> 918 %0 = bitcast <8 x i8> %r to <1 x i64> 919 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> 920 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 921 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 922 ret <16 x i8> %2 923 } 924 925 define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 926 ; CHECK-LABEL: test_vsubhn_high_u32: 927 ; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 928 entry: 929 %vsubhn.i.i = sub <4 x i32> %a, %b 930 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> 931 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> 932 %0 = bitcast <4 x i16> %r to <1 x i64> 933 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> 934 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 935 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 936 ret <8 x i16> %2 937 } 938 939 define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 940 ; CHECK-LABEL: test_vsubhn_high_u64: 941 ; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 942 entry: 943 %vsubhn.i.i = sub <2 x i64> %a, %b 944 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> 945 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> 946 %0 = bitcast <2 x i32> %r to <1 x i64> 947 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> 948 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 949 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 950 ret <4 x i32> %2 951 } 952 953 define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) { 954 ; CHECK-LABEL: test_vrsubhn_s16: 955 ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 956 entry: 957 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 958 ret <8 x i8> %vrsubhn2.i 959 } 960 961 define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) { 962 ; CHECK-LABEL: test_vrsubhn_s32: 963 ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 964 entry: 965 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 966 ret <4 x i16> %vrsubhn2.i 967 } 968 969 define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) { 970 ; CHECK-LABEL: test_vrsubhn_s64: 971 ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 972 entry: 973 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 974 ret <2 x i32> %vrsubhn2.i 975 } 976 977 define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) { 978 ; CHECK-LABEL: test_vrsubhn_u16: 979 ; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 980 entry: 981 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 982 ret <8 x i8> %vrsubhn2.i 983 } 984 985 define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) { 986 ; CHECK-LABEL: test_vrsubhn_u32: 987 ; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 988 entry: 989 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 990 ret <4 x i16> %vrsubhn2.i 991 } 992 993 define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) { 994 ; CHECK-LABEL: test_vrsubhn_u64: 995 ; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 996 entry: 997 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 998 ret <2 x i32> %vrsubhn2.i 999 } 1000 1001 define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1002 ; CHECK-LABEL: test_vrsubhn_high_s16: 1003 ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1004 entry: 1005 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1006 %0 = bitcast <8 x i8> %r to <1 x i64> 1007 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> 1008 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1009 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1010 ret <16 x i8> %2 1011 } 1012 1013 define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1014 ; CHECK-LABEL: test_vrsubhn_high_s32: 1015 ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1016 entry: 1017 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1018 %0 = bitcast <4 x i16> %r to <1 x i64> 1019 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> 1020 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1021 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1022 ret <8 x i16> %2 1023 } 1024 1025 define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1026 ; CHECK-LABEL: test_vrsubhn_high_s64: 1027 ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1028 entry: 1029 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1030 %0 = bitcast <2 x i32> %r to <1 x i64> 1031 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> 1032 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1033 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1034 ret <4 x i32> %2 1035 } 1036 1037 define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1038 ; CHECK-LABEL: test_vrsubhn_high_u16: 1039 ; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1040 entry: 1041 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1042 %0 = bitcast <8 x i8> %r to <1 x i64> 1043 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> 1044 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1045 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1046 ret <16 x i8> %2 1047 } 1048 1049 define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1050 ; CHECK-LABEL: test_vrsubhn_high_u32: 1051 ; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1052 entry: 1053 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1054 %0 = bitcast <4 x i16> %r to <1 x i64> 1055 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> 1056 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1057 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1058 ret <8 x i16> %2 1059 } 1060 1061 define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1062 ; CHECK-LABEL: test_vrsubhn_high_u64: 1063 ; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1064 entry: 1065 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1066 %0 = bitcast <2 x i32> %r to <1 x i64> 1067 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> 1068 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1069 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1070 ret <4 x i32> %2 1071 } 1072 1073 define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) { 1074 ; CHECK-LABEL: test_vabdl_s8: 1075 ; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1076 entry: 1077 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) 1078 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> 1079 ret <8 x i16> %vmovl.i.i 1080 } 1081 1082 define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) { 1083 ; CHECK-LABEL: test_vabdl_s16: 1084 ; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1085 entry: 1086 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) 1087 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> 1088 ret <4 x i32> %vmovl.i.i 1089 } 1090 1091 define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) { 1092 ; CHECK-LABEL: test_vabdl_s32: 1093 ; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1094 entry: 1095 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) 1096 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> 1097 ret <2 x i64> %vmovl.i.i 1098 } 1099 1100 define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) { 1101 ; CHECK-LABEL: test_vabdl_u8: 1102 ; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1103 entry: 1104 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) 1105 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> 1106 ret <8 x i16> %vmovl.i.i 1107 } 1108 1109 define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) { 1110 ; CHECK-LABEL: test_vabdl_u16: 1111 ; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1112 entry: 1113 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) 1114 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> 1115 ret <4 x i32> %vmovl.i.i 1116 } 1117 1118 define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) { 1119 ; CHECK-LABEL: test_vabdl_u32: 1120 ; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1121 entry: 1122 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) 1123 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> 1124 ret <2 x i64> %vmovl.i.i 1125 } 1126 1127 define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1128 ; CHECK-LABEL: test_vabal_s8: 1129 ; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1130 entry: 1131 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) 1132 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1133 %add.i = add <8 x i16> %vmovl.i.i.i, %a 1134 ret <8 x i16> %add.i 1135 } 1136 1137 define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1138 ; CHECK-LABEL: test_vabal_s16: 1139 ; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1140 entry: 1141 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) 1142 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1143 %add.i = add <4 x i32> %vmovl.i.i.i, %a 1144 ret <4 x i32> %add.i 1145 } 1146 1147 define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1148 ; CHECK-LABEL: test_vabal_s32: 1149 ; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1150 entry: 1151 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) 1152 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1153 %add.i = add <2 x i64> %vmovl.i.i.i, %a 1154 ret <2 x i64> %add.i 1155 } 1156 1157 define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1158 ; CHECK-LABEL: test_vabal_u8: 1159 ; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1160 entry: 1161 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) 1162 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1163 %add.i = add <8 x i16> %vmovl.i.i.i, %a 1164 ret <8 x i16> %add.i 1165 } 1166 1167 define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1168 ; CHECK-LABEL: test_vabal_u16: 1169 ; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1170 entry: 1171 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) 1172 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1173 %add.i = add <4 x i32> %vmovl.i.i.i, %a 1174 ret <4 x i32> %add.i 1175 } 1176 1177 define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1178 ; CHECK-LABEL: test_vabal_u32: 1179 ; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1180 entry: 1181 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) 1182 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1183 %add.i = add <2 x i64> %vmovl.i.i.i, %a 1184 ret <2 x i64> %add.i 1185 } 1186 1187 define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) { 1188 ; CHECK-LABEL: test_vabdl_high_s8: 1189 ; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1190 entry: 1191 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1192 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1193 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1194 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1195 ret <8 x i16> %vmovl.i.i.i 1196 } 1197 1198 define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) { 1199 ; CHECK-LABEL: test_vabdl_high_s16: 1200 ; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1201 entry: 1202 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1203 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1204 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1205 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1206 ret <4 x i32> %vmovl.i.i.i 1207 } 1208 1209 define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) { 1210 ; CHECK-LABEL: test_vabdl_high_s32: 1211 ; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1212 entry: 1213 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1214 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1215 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1216 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1217 ret <2 x i64> %vmovl.i.i.i 1218 } 1219 1220 define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) { 1221 ; CHECK-LABEL: test_vabdl_high_u8: 1222 ; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1223 entry: 1224 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1225 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1226 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1227 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1228 ret <8 x i16> %vmovl.i.i.i 1229 } 1230 1231 define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) { 1232 ; CHECK-LABEL: test_vabdl_high_u16: 1233 ; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1234 entry: 1235 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1236 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1237 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1238 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1239 ret <4 x i32> %vmovl.i.i.i 1240 } 1241 1242 define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) { 1243 ; CHECK-LABEL: test_vabdl_high_u32: 1244 ; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1245 entry: 1246 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1247 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1248 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1249 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1250 ret <2 x i64> %vmovl.i.i.i 1251 } 1252 1253 define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1254 ; CHECK-LABEL: test_vabal_high_s8: 1255 ; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1256 entry: 1257 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1258 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1259 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1260 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> 1261 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a 1262 ret <8 x i16> %add.i.i 1263 } 1264 1265 define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1266 ; CHECK-LABEL: test_vabal_high_s16: 1267 ; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1268 entry: 1269 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1270 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1271 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1272 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> 1273 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a 1274 ret <4 x i32> %add.i.i 1275 } 1276 1277 define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1278 ; CHECK-LABEL: test_vabal_high_s32: 1279 ; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1280 entry: 1281 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1282 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1283 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1284 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> 1285 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a 1286 ret <2 x i64> %add.i.i 1287 } 1288 1289 define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1290 ; CHECK-LABEL: test_vabal_high_u8: 1291 ; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1292 entry: 1293 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1294 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1295 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1296 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> 1297 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a 1298 ret <8 x i16> %add.i.i 1299 } 1300 1301 define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1302 ; CHECK-LABEL: test_vabal_high_u16: 1303 ; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1304 entry: 1305 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1306 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1307 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1308 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> 1309 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a 1310 ret <4 x i32> %add.i.i 1311 } 1312 1313 define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1314 ; CHECK-LABEL: test_vabal_high_u32: 1315 ; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1316 entry: 1317 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1318 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1319 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1320 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> 1321 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a 1322 ret <2 x i64> %add.i.i 1323 } 1324 1325 define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) { 1326 ; CHECK-LABEL: test_vmull_s8: 1327 ; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1328 entry: 1329 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) 1330 ret <8 x i16> %vmull.i 1331 } 1332 1333 define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) { 1334 ; CHECK-LABEL: test_vmull_s16: 1335 ; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1336 entry: 1337 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) 1338 ret <4 x i32> %vmull2.i 1339 } 1340 1341 define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) { 1342 ; CHECK-LABEL: test_vmull_s32: 1343 ; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1344 entry: 1345 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) 1346 ret <2 x i64> %vmull2.i 1347 } 1348 1349 define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) { 1350 ; CHECK-LABEL: test_vmull_u8: 1351 ; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1352 entry: 1353 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) 1354 ret <8 x i16> %vmull.i 1355 } 1356 1357 define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) { 1358 ; CHECK-LABEL: test_vmull_u16: 1359 ; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1360 entry: 1361 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) 1362 ret <4 x i32> %vmull2.i 1363 } 1364 1365 define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) { 1366 ; CHECK-LABEL: test_vmull_u32: 1367 ; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1368 entry: 1369 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) 1370 ret <2 x i64> %vmull2.i 1371 } 1372 1373 define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) { 1374 ; CHECK-LABEL: test_vmull_high_s8: 1375 ; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1376 entry: 1377 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1378 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1379 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1380 ret <8 x i16> %vmull.i.i 1381 } 1382 1383 define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) { 1384 ; CHECK-LABEL: test_vmull_high_s16: 1385 ; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1386 entry: 1387 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1388 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1389 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1390 ret <4 x i32> %vmull2.i.i 1391 } 1392 1393 define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) { 1394 ; CHECK-LABEL: test_vmull_high_s32: 1395 ; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1396 entry: 1397 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1398 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1399 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1400 ret <2 x i64> %vmull2.i.i 1401 } 1402 1403 define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) { 1404 ; CHECK-LABEL: test_vmull_high_u8: 1405 ; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1406 entry: 1407 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1408 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1409 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1410 ret <8 x i16> %vmull.i.i 1411 } 1412 1413 define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) { 1414 ; CHECK-LABEL: test_vmull_high_u16: 1415 ; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1416 entry: 1417 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1418 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1419 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1420 ret <4 x i32> %vmull2.i.i 1421 } 1422 1423 define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) { 1424 ; CHECK-LABEL: test_vmull_high_u32: 1425 ; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1426 entry: 1427 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1428 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1429 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1430 ret <2 x i64> %vmull2.i.i 1431 } 1432 1433 define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1434 ; CHECK-LABEL: test_vmlal_s8: 1435 ; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1436 entry: 1437 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 1438 %add.i = add <8 x i16> %vmull.i.i, %a 1439 ret <8 x i16> %add.i 1440 } 1441 1442 define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1443 ; CHECK-LABEL: test_vmlal_s16: 1444 ; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1445 entry: 1446 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 1447 %add.i = add <4 x i32> %vmull2.i.i, %a 1448 ret <4 x i32> %add.i 1449 } 1450 1451 define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1452 ; CHECK-LABEL: test_vmlal_s32: 1453 ; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1454 entry: 1455 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 1456 %add.i = add <2 x i64> %vmull2.i.i, %a 1457 ret <2 x i64> %add.i 1458 } 1459 1460 define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1461 ; CHECK-LABEL: test_vmlal_u8: 1462 ; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1463 entry: 1464 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 1465 %add.i = add <8 x i16> %vmull.i.i, %a 1466 ret <8 x i16> %add.i 1467 } 1468 1469 define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1470 ; CHECK-LABEL: test_vmlal_u16: 1471 ; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1472 entry: 1473 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 1474 %add.i = add <4 x i32> %vmull2.i.i, %a 1475 ret <4 x i32> %add.i 1476 } 1477 1478 define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1479 ; CHECK-LABEL: test_vmlal_u32: 1480 ; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1481 entry: 1482 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 1483 %add.i = add <2 x i64> %vmull2.i.i, %a 1484 ret <2 x i64> %add.i 1485 } 1486 1487 define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1488 ; CHECK-LABEL: test_vmlal_high_s8: 1489 ; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1490 entry: 1491 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1492 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1493 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1494 %add.i.i = add <8 x i16> %vmull.i.i.i, %a 1495 ret <8 x i16> %add.i.i 1496 } 1497 1498 define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1499 ; CHECK-LABEL: test_vmlal_high_s16: 1500 ; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1501 entry: 1502 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1503 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1504 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1505 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 1506 ret <4 x i32> %add.i.i 1507 } 1508 1509 define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1510 ; CHECK-LABEL: test_vmlal_high_s32: 1511 ; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1512 entry: 1513 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1514 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1515 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1516 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 1517 ret <2 x i64> %add.i.i 1518 } 1519 1520 define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1521 ; CHECK-LABEL: test_vmlal_high_u8: 1522 ; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1523 entry: 1524 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1525 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1526 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1527 %add.i.i = add <8 x i16> %vmull.i.i.i, %a 1528 ret <8 x i16> %add.i.i 1529 } 1530 1531 define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1532 ; CHECK-LABEL: test_vmlal_high_u16: 1533 ; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1534 entry: 1535 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1536 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1537 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1538 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 1539 ret <4 x i32> %add.i.i 1540 } 1541 1542 define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1543 ; CHECK-LABEL: test_vmlal_high_u32: 1544 ; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1545 entry: 1546 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1547 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1548 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1549 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 1550 ret <2 x i64> %add.i.i 1551 } 1552 1553 define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1554 ; CHECK-LABEL: test_vmlsl_s8: 1555 ; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1556 entry: 1557 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 1558 %sub.i = sub <8 x i16> %a, %vmull.i.i 1559 ret <8 x i16> %sub.i 1560 } 1561 1562 define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1563 ; CHECK-LABEL: test_vmlsl_s16: 1564 ; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1565 entry: 1566 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 1567 %sub.i = sub <4 x i32> %a, %vmull2.i.i 1568 ret <4 x i32> %sub.i 1569 } 1570 1571 define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1572 ; CHECK-LABEL: test_vmlsl_s32: 1573 ; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1574 entry: 1575 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 1576 %sub.i = sub <2 x i64> %a, %vmull2.i.i 1577 ret <2 x i64> %sub.i 1578 } 1579 1580 define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1581 ; CHECK-LABEL: test_vmlsl_u8: 1582 ; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1583 entry: 1584 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 1585 %sub.i = sub <8 x i16> %a, %vmull.i.i 1586 ret <8 x i16> %sub.i 1587 } 1588 1589 define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1590 ; CHECK-LABEL: test_vmlsl_u16: 1591 ; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1592 entry: 1593 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 1594 %sub.i = sub <4 x i32> %a, %vmull2.i.i 1595 ret <4 x i32> %sub.i 1596 } 1597 1598 define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1599 ; CHECK-LABEL: test_vmlsl_u32: 1600 ; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1601 entry: 1602 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 1603 %sub.i = sub <2 x i64> %a, %vmull2.i.i 1604 ret <2 x i64> %sub.i 1605 } 1606 1607 define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1608 ; CHECK-LABEL: test_vmlsl_high_s8: 1609 ; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1610 entry: 1611 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1612 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1613 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1614 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i 1615 ret <8 x i16> %sub.i.i 1616 } 1617 1618 define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1619 ; CHECK-LABEL: test_vmlsl_high_s16: 1620 ; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1621 entry: 1622 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1623 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1624 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1625 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 1626 ret <4 x i32> %sub.i.i 1627 } 1628 1629 define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1630 ; CHECK-LABEL: test_vmlsl_high_s32: 1631 ; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1632 entry: 1633 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1634 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1635 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1636 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 1637 ret <2 x i64> %sub.i.i 1638 } 1639 1640 define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1641 ; CHECK-LABEL: test_vmlsl_high_u8: 1642 ; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1643 entry: 1644 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1645 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1646 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1647 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i 1648 ret <8 x i16> %sub.i.i 1649 } 1650 1651 define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1652 ; CHECK-LABEL: test_vmlsl_high_u16: 1653 ; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1654 entry: 1655 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1656 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1657 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1658 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 1659 ret <4 x i32> %sub.i.i 1660 } 1661 1662 define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1663 ; CHECK-LABEL: test_vmlsl_high_u32: 1664 ; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1665 entry: 1666 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1667 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1668 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1669 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 1670 ret <2 x i64> %sub.i.i 1671 } 1672 1673 define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) { 1674 ; CHECK-LABEL: test_vqdmull_s16: 1675 ; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1676 entry: 1677 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) 1678 ret <4 x i32> %vqdmull2.i 1679 } 1680 1681 define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) { 1682 ; CHECK-LABEL: test_vqdmull_s32: 1683 ; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1684 entry: 1685 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) 1686 ret <2 x i64> %vqdmull2.i 1687 } 1688 1689 define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1690 ; CHECK-LABEL: test_vqdmlal_s16: 1691 ; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1692 entry: 1693 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 1694 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1695 ret <4 x i32> %vqdmlal4.i 1696 } 1697 1698 define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1699 ; CHECK-LABEL: test_vqdmlal_s32: 1700 ; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1701 entry: 1702 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 1703 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1704 ret <2 x i64> %vqdmlal4.i 1705 } 1706 1707 define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1708 ; CHECK-LABEL: test_vqdmlsl_s16: 1709 ; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 1710 entry: 1711 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 1712 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1713 ret <4 x i32> %vqdmlsl4.i 1714 } 1715 1716 define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1717 ; CHECK-LABEL: test_vqdmlsl_s32: 1718 ; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 1719 entry: 1720 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 1721 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1722 ret <2 x i64> %vqdmlsl4.i 1723 } 1724 1725 define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) { 1726 ; CHECK-LABEL: test_vqdmull_high_s16: 1727 ; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1728 entry: 1729 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1730 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1731 %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1732 ret <4 x i32> %vqdmull2.i.i 1733 } 1734 1735 define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) { 1736 ; CHECK-LABEL: test_vqdmull_high_s32: 1737 ; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1738 entry: 1739 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1740 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1741 %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1742 ret <2 x i64> %vqdmull2.i.i 1743 } 1744 1745 define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1746 ; CHECK-LABEL: test_vqdmlal_high_s16: 1747 ; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1748 entry: 1749 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1750 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1751 %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1752 %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i) 1753 ret <4 x i32> %vqdmlal4.i.i 1754 } 1755 1756 define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1757 ; CHECK-LABEL: test_vqdmlal_high_s32: 1758 ; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1759 entry: 1760 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1761 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1762 %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1763 %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i) 1764 ret <2 x i64> %vqdmlal4.i.i 1765 } 1766 1767 define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1768 ; CHECK-LABEL: test_vqdmlsl_high_s16: 1769 ; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 1770 entry: 1771 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1772 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1773 %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1774 %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i) 1775 ret <4 x i32> %vqdmlsl4.i.i 1776 } 1777 1778 define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1779 ; CHECK-LABEL: test_vqdmlsl_high_s32: 1780 ; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 1781 entry: 1782 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1783 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1784 %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1785 %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i) 1786 ret <2 x i64> %vqdmlsl4.i.i 1787 } 1788 1789 define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) { 1790 ; CHECK-LABEL: test_vmull_p8: 1791 ; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 1792 entry: 1793 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) 1794 ret <8 x i16> %vmull.i 1795 } 1796 1797 define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) { 1798 ; CHECK-LABEL: test_vmull_high_p8: 1799 ; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 1800 entry: 1801 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1802 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1803 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1804 ret <8 x i16> %vmull.i.i 1805 } 1806 1807 define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { 1808 ; CHECK-LABEL: test_vmull_p64 1809 ; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d 1810 entry: 1811 %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) 1812 %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 1813 ret i128 %vmull3.i 1814 } 1815 1816 define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { 1817 ; CHECK-LABEL: test_vmull_high_p64 1818 ; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1819 entry: 1820 %0 = extractelement <2 x i64> %a, i32 1 1821 %1 = extractelement <2 x i64> %b, i32 1 1822 %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1 1823 %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 1824 ret i128 %vmull3.i.i 1825 } 1826 1827 declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 1828 1829 1830